from typing import List import datetime from .shared import cached_csv_url_to_dataframe from section_1_datasets_module import generate_cleaned_trainings_dataset, generate_center_trainings_count_statistics from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS, Constants import pandas as pd import streamlit as st @st.cache_data def cached_generate_center_trainings_count_statistics(export_url:str, reportable_only:bool, include_future_events:bool, include_on_demand:bool, allowed_centers:List[str] | None = None): trainings_df = cached_generate_cleaned_trainings_dataset(export_url, reportable_only=reportable_only, allowed_centers=allowed_centers, include_future_events=include_future_events, include_on_demand=include_on_demand ) attendees_numeric = pd.to_numeric(trainings_df[NEOSERRA_COLUMNS.attendees_total], errors='coerce').fillna(0) stats_df = generate_center_trainings_count_statistics( full_df=trainings_df, filtered_df=trainings_df[attendees_numeric == 0], funding_source_group=['Core Services', 'LEXNET', 'PDA', 'NAP'], col_primary_topic=NEOSERRA_COLUMNS.primary_training_topic, col_center=NEOSERRA_COLUMNS.center, col_funding_source=NEOSERRA_COLUMNS.funding_source, col_attendees_total=NEOSERRA_COLUMNS.attendees_total, col_is_preplanning=OUT_COLUMNS.is_preplanning ) return stats_df @st.cache_data def cached_generate_cleaned_trainings_dataset(export_url:str, reportable_only:bool, include_future_events:bool, include_on_demand:bool, allowed_centers:List[str] | None = None): trainings_df = cached_csv_url_to_dataframe(export_url) trainings_df = generate_cleaned_trainings_dataset( trainings_df, col_neo_event_title=NEOSERRA_COLUMNS.event_title, col_neo_primary_topic=NEOSERRA_COLUMNS.primary_training_topic, col_neo_training_topics=NEOSERRA_COLUMNS.training_topics, col_neo_center=NEOSERRA_COLUMNS.center, col_is_preplanning=OUT_COLUMNS.is_preplanning, col_neo_attendees_total=NEOSERRA_COLUMNS.attendees_total, col_out_attendees_range=OUT_COLUMNS.attendees_range ) if allowed_centers is not None: trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.center].isin(allowed_centers)] if reportable_only: trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.reportable] == 1] # Convert the start date to an actual date object, then filter out all future events if they are not desired by the user trainings_df[NEOSERRA_COLUMNS.start_date] = pd.to_datetime(trainings_df[NEOSERRA_COLUMNS.start_date], format="%m/%d/%Y") if not include_future_events: trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.start_date].dt.date < datetime.date.today()] if not include_on_demand: trainings_df = trainings_df[trainings_df[NEOSERRA_COLUMNS.program_format] != Constants.ON_DEMAND_VALUE.value] return trainings_df