Files
testing123/streamlit_dashboard/cached_function_wrappers/trainings_cached_functions.py
2026-05-21 08:40:24 -04:00

61 lines
3.3 KiB
Python

from typing import List
import datetime
from .shared import cached_csv_url_to_dataframe
from section_1_datasets_module import generate_cleaned_trainings_dataset, generate_center_trainings_count_statistics
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS, Constants
import pandas as pd
import streamlit as st
@st.cache_data
def cached_generate_center_trainings_count_statistics(export_url:str, reportable_only:bool, include_future_events:bool, include_on_demand:bool, allowed_centers:List[str] | None = None):
trainings_df = cached_generate_cleaned_trainings_dataset(export_url,
reportable_only=reportable_only,
allowed_centers=allowed_centers,
include_future_events=include_future_events,
include_on_demand=include_on_demand
)
attendees_numeric = pd.to_numeric(trainings_df[NEOSERRA_COLUMNS.attendees_total], errors='coerce').fillna(0)
stats_df = generate_center_trainings_count_statistics(
full_df=trainings_df,
filtered_df=trainings_df[attendees_numeric == 0],
funding_source_group=['Core Services', 'LEXNET', 'PDA', 'NAP'],
col_primary_topic=NEOSERRA_COLUMNS.primary_training_topic,
col_center=NEOSERRA_COLUMNS.center,
col_funding_source=NEOSERRA_COLUMNS.funding_source,
col_attendees_total=NEOSERRA_COLUMNS.attendees_total,
col_is_preplanning=OUT_COLUMNS.is_preplanning
)
return stats_df
@st.cache_data
def cached_generate_cleaned_trainings_dataset(export_url:str, reportable_only:bool, include_future_events:bool, include_on_demand:bool, allowed_centers:List[str] | None = None):
trainings_df = cached_csv_url_to_dataframe(export_url)
trainings_df = generate_cleaned_trainings_dataset(
trainings_df,
col_neo_event_title=NEOSERRA_COLUMNS.event_title,
col_neo_primary_topic=NEOSERRA_COLUMNS.primary_training_topic,
col_neo_training_topics=NEOSERRA_COLUMNS.training_topics,
col_neo_center=NEOSERRA_COLUMNS.center,
col_is_preplanning=OUT_COLUMNS.is_preplanning,
col_neo_attendees_total=NEOSERRA_COLUMNS.attendees_total,
col_out_attendees_range=OUT_COLUMNS.attendees_range
)
if allowed_centers is not None:
trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.center].isin(allowed_centers)]
if reportable_only:
trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.reportable] == 1]
# Convert the start date to an actual date object, then filter out all future events if they are not desired by the user
trainings_df[NEOSERRA_COLUMNS.start_date] = pd.to_datetime(trainings_df[NEOSERRA_COLUMNS.start_date], format="%m/%d/%Y")
if not include_future_events:
trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.start_date].dt.date < datetime.date.today()]
if not include_on_demand:
trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.program_format] != Constants.ON_DEMAND_VALUE.value]
return trainings_df