from typing import List import streamlit as st from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS from milestone_attribution_dataset_module import sanitize_nbs_data from cached_function_wrappers.shared import cached_csv_url_to_dataframe from pasbdc_data_cleaning import remove_duplicate_client_records @st.cache_data def cached_get_nbs_data(export_url:str, reportable_only:bool, allowed_centers:List[str] | None = None): nbs_df = cached_csv_url_to_dataframe(export_url) nbs_df = sanitize_nbs_data( nbs_df, col_neo_center=NEOSERRA_COLUMNS.center, col_neo_client_id=NEOSERRA_COLUMNS.client_id, col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date, col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date, col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source, col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation, col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name, col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level, col_neo_reportable=NEOSERRA_COLUMNS.reportable, business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val, business_established_val=NEOSERRA_COLUMNS.business_established_val ) nbs_df = remove_duplicate_client_records(nbs_df) if allowed_centers is not None: nbs_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.center].isin(allowed_centers)] if reportable_only: nbs_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.reportable] == 1] return nbs_df