# Python modules import datetime from typing import Dict, Any, List import logging from pygments.styles import default # Third Party Libraries from streamlit.delta_generator import DeltaGenerator from fiscalyear import * from plotly.graph_objects import Figure import streamlit as st import pandas as pd # Imports from this module from cached_function_wrappers.client_list_cached_functions import cached_generate_client_naics_dataset, filter_df_by_naics_codes, cached_get_pa_naics_source_data, cached_get_bls_naics11_data, cached_csv_url_to_dataframe, cached_create_naics_census_percentage_table, cached_get_county_dataset from cached_function_wrappers.shared import get_df_centers from streamlit_constants import USDA_API_KEY, CENSUS_YEAR, DASHBOARD_CONFIG_OBJECT_KEY from utility_classes.base_report_page import BaseReportPage from utility_classes.figure_with_max_y import FigureWithMaxY # Imports from the script version of the reports from constants_module import OUT_COLUMNS, NEOSERRA_COLUMNS from section_1_graph_library_module import make_census_naics_chart, make_client_census_comparison_graph, make_county_heatmap from utility_classes.dashboard_config_parser import DashboardConfig, ExportModulePair class NaicsReportPage(BaseReportPage): """ Concrete implementation of a report page comparing client NAICS distributions against census baselines. This class manages the lifecycle of the NAICS report, bridging internal client demographic data with external USDA Census APIs to evaluate service penetration across industries and counties. :param usda_api_key: Authentication key for querying baseline census datasets. :type usda_api_key: str :param census_year: The specific census year to use as the population baseline. :type census_year: str """ def __init__(self): """ Initializes external API context, temporal boundaries, and application configuration. Captures authentication credentials and establishes the current/previous fiscal year state to parameterize downstream data fetching protocols. """ super().__init__("NAICS Report") self.usda_api_key = self.app_config.get_usda_api_key() self.census_year = self.app_config.get_census_year() # Set to the previous fiscal year self.fiscal_year = FiscalYear.current() self.prev_fiscal_year = self.fiscal_year.prev_fiscal_year self.fiscal_year_text = f'FY{str(self.fiscal_year.fiscal_year)[2:]}' self.prev_fiscal_year_text = f'FY{str(self.prev_fiscal_year.fiscal_year)[2:]}' def get_syncable_figure_keys(self) -> List[str]: """ Exposes the primary comparison graph for external dynamic axis scaling. Permits a parent orchestrator to synchronize the Y-axis of the client/census comparison chart when multiple instances of this report are rendered side-by-side. :return: A list containing the dictionary key for the comparison graph figure. :rtype: List[str] """ return ["comparison_graph_fig"] def get_fiscal_year_export_url(self, selected_fiscal_year): """ Resolves the endpoint URL for the client dataset based on the active temporal state. :param selected_fiscal_year: The formatted string representing the user's chosen fiscal year. :type selected_fiscal_year: str :return: The URL corresponding to the dataset export for the chosen year. :rtype: str """ export_urls:ExportModulePair = self.app_config.get_clients_list_urls() if selected_fiscal_year == self.fiscal_year_text: return export_urls.current_fy else: return export_urls.prev_fy def render_controls(self, container: DeltaGenerator) -> Dict[str, Any]: """ Establishes the report's parameterization boundaries for data querying and visual filtering. Captures inputs that mutate the base dataset (Fiscal Year, Centers) and inputs that only modify the presentation layer (Visible NAICS codes). This separation allows for flexible chart manipulation without requiring full data pipeline re-execution. :param container: The Streamlit container to draw the widgets onto. :type container: DeltaGenerator :return: A dictionary containing selected fiscal year, centers, and NAICS codes. :rtype: Dict[str, Any] """ report_settings_expander = container.expander( label="Report Options", expanded=True, key=self.get_widget_key("report_settings_expander") ) report_settings_expander.markdown("## Dataset Options") report_settings_expander.markdown("These settings will modify the input dataset used to generate the graphs.") selected_fiscal_year = report_settings_expander.selectbox( label="Fiscal Year", options=[self.prev_fiscal_year_text, self.fiscal_year_text], index=1, key=self.get_widget_key("selected_fiscal_year_selectbox") ) export_url = self.get_fiscal_year_export_url(selected_fiscal_year) try: all_centers = get_df_centers(export_url) except Exception as e: self.logger.exception(f"Failed to fetch the dataset for this page: {e}") container.error( f"Failed to get the list of all centers for the dataset for this page. A detailed error message has been added to the logs. {self.app_config.get_errors_contact_string()}") st.stop() selected_centers = report_settings_expander.multiselect(label="Centers", options=all_centers, default=all_centers, key=self.get_widget_key("selected_centers_multiselect")) report_settings_expander.markdown("## View Options") report_settings_expander.markdown( "These settings WILL NOT modify the input dataset, but will show or hide bars on the graph.") # Load the dataframe from the export module client_list_df = cached_generate_client_naics_dataset(export_url, self.usda_api_key, self.census_year, selected_centers) # Setup the filter that lets users show or hide bars from the graph all_naics_codes = client_list_df[OUT_COLUMNS.naics_2].unique() selected_naics_codes = report_settings_expander.multiselect(label="NAICS Codes", options=all_naics_codes, default=all_naics_codes, format_func=lambda val: f'{val:g}', key=self.get_widget_key( "selected_naics_multiselect") ) return { "selected_fiscal_year":selected_fiscal_year, "selected_centers":selected_centers, "selected_naics_codes":selected_naics_codes, } def generate_figures(self, parameters: Dict[str, Any]): """ Orchestrates the data processing pipeline and constructs the visualization objects. Fetches client demographics, external census baselines, and county metrics, applying user-defined filters to generate the final Plotly objects. Computes strict max-Y values for comparison charts to support external axis synchronization. :param parameters: The dictionary of user inputs captured from render_controls. :type parameters: Dict[str, Any] :return: A dictionary containing FigureWithMaxY objects for the visuals and raw dataframes. :rtype: Dict[str, Any] """ selected_fiscal_year:str = parameters["selected_fiscal_year"] selected_centers:List[str] = parameters["selected_centers"] selected_naics_codes:List[int] = parameters["selected_naics_codes"] export_url = self.get_fiscal_year_export_url(selected_fiscal_year) if len(selected_centers) == 0: st.warning("At least 1 center must be selected!") st.stop() else: try: client_list_df = cached_generate_client_naics_dataset(export_url, self.usda_api_key, self.census_year, selected_centers) naics_df = cached_create_naics_census_percentage_table(self.usda_api_key, self.census_year) filtered_df = filter_df_by_naics_codes(client_list_df, selected_naics_codes) county_df = cached_get_county_dataset( export_url, USDA_API_KEY, CENSUS_YEAR, selected_centers ) except Exception as e: self.logger.exception(f"Failed to load the dataset for this page, got {e}") st.error(f"Failed to load the dataset for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}") st.stop() try: comparison_graph_fig = make_client_census_comparison_graph( naics_df, filtered_df, title=f'Comparison between PA Census NAICS code distribution and PASBDC client NAICs distribution {selected_fiscal_year}', naics_df_naics_code_column_name=OUT_COLUMNS.unified_naics, naics_df_naics_label_column_name=OUT_COLUMNS.naics_label, naics_df_census_percentage_column_name=OUT_COLUMNS.census_pct, client_df_naics2_column_name=OUT_COLUMNS.naics_2, client_df_census_percentage=OUT_COLUMNS.pa_naics_pct, client_df_pasbdc_percentage=OUT_COLUMNS.pasbdc_pct ) comparison_graph_fig.update_layout(width=799, height=900) comparison_graph_max_y = filtered_df[OUT_COLUMNS.pasbdc_pct].max() if pd.isna(comparison_graph_max_y): comparison_graph_max_y = 0.0 except Exception as e: self.logger.exception(f"Failed to generate the SBDC vs Census NAICS comparison figure for this page, got {e}") st.error(f"Failed to generate the SBDC vs Census NAICS comparison figure for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}") st.stop() try: census_naics_fig = make_census_naics_chart( naics_df, naics_column_name=OUT_COLUMNS.unified_naics, label_column_name=OUT_COLUMNS.naics_label, census_data_column_name=OUT_COLUMNS.census_pct) except Exception as e: self.logger.exception( f"Failed to generate the PA census naics table figure for this page, got {e}") st.error( f"Failed to generate the PA census naics table figure for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}") st.stop() try: heatmap_fig = make_county_heatmap( county_df, value_column=OUT_COLUMNS.pct_missing_naics, title=f'Missing Client NAICS Codes Per County {selected_fiscal_year}', ) heatmap_fig.update_layout(height=799) except Exception as e: self.logger.exception( f"Failed to generate the county missing NAICS heatmap figure for this page, got {e}") st.error( f"Failed to generate the county missing NAICS heatmap figure for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}") st.stop() return { "comparison_graph_fig":FigureWithMaxY(figure=comparison_graph_fig, max_y=comparison_graph_max_y), "census_naics_fig":FigureWithMaxY(figure=census_naics_fig, max_y=0.0), "heatmap_fig":FigureWithMaxY(figure=heatmap_fig, max_y=0.0), "client_list_df":client_list_df, "naics_df":naics_df, "filtered_df":filtered_df, "county_df":county_df } def render_figures(self, container: DeltaGenerator, output_data: Dict[str, Any]): """ Executes the presentation tier, pushing generated artifacts to the Streamlit layout. Draws the computed Plotly figures onto the screen and utilizes an expander module to expose the raw, underlying dataframes for auditing and transparency. :param container: The Streamlit container to draw the visuals onto. :type container: DeltaGenerator :param output_data: The dictionary of figures and dataframes returned by generate_figures. :type output_data: Dict[str, Any] """ comparison_graph_fig:Figure = output_data["comparison_graph_fig"]["figure"] census_naics_fig:Figure = output_data["census_naics_fig"]["figure"] heatmap_fig:Figure = output_data["heatmap_fig"]["figure"] client_list_df = output_data["client_list_df"] naics_df = output_data["naics_df"] county_df = output_data["county_df"] container.plotly_chart(comparison_graph_fig, key=self.get_widget_key("comparison_graph")) container.plotly_chart(census_naics_fig, key=self.get_widget_key("census_naics_table")) container.plotly_chart(heatmap_fig, key=self.get_widget_key("heatmap_graph")) dataset_expander = container.expander( label="Source Datasets", expanded=True, key=self.get_widget_key("dataset_expander") ) dataset_expander.markdown("## Source Data") dataset_expander.markdown("### Neoserra Client List") dataset_expander.write(client_list_df) dataset_expander.markdown("### PA Census Data") dataset_expander.write(naics_df) dataset_expander.markdown("### Per County Missing NAICS Data") dataset_expander.write(county_df) @staticmethod def get_page_name(): """ Provides the static human-readable identifier for this specific report. Utilized by the ComparerPage to construct navigation menus. :return: The display name of the report. :rtype: str """ return "NAICS Report"