298 lines
14 KiB
Python
298 lines
14 KiB
Python
# Python modules
|
|
import datetime
|
|
from typing import Dict, Any, List
|
|
import logging
|
|
|
|
from pygments.styles import default
|
|
# Third Party Libraries
|
|
from streamlit.delta_generator import DeltaGenerator
|
|
from fiscalyear import *
|
|
from plotly.graph_objects import Figure
|
|
import streamlit as st
|
|
import pandas as pd
|
|
|
|
# Imports from this module
|
|
from cached_function_wrappers.client_list_cached_functions import cached_generate_client_naics_dataset, filter_df_by_naics_codes, cached_get_pa_naics_source_data, cached_get_bls_naics11_data, cached_csv_url_to_dataframe, cached_create_naics_census_percentage_table, cached_get_county_dataset
|
|
from cached_function_wrappers.shared import get_df_centers
|
|
from streamlit_constants import USDA_API_KEY, CENSUS_YEAR, DASHBOARD_CONFIG_OBJECT_KEY
|
|
from utility_classes.base_report_page import BaseReportPage
|
|
from utility_classes.figure_with_max_y import FigureWithMaxY
|
|
|
|
# Imports from the script version of the reports
|
|
from constants_module import OUT_COLUMNS, NEOSERRA_COLUMNS
|
|
from section_1_graph_library_module import make_census_naics_chart, make_client_census_comparison_graph, make_county_heatmap
|
|
from utility_classes.dashboard_config_parser import DashboardConfig, ExportModulePair
|
|
|
|
class NaicsReportPage(BaseReportPage):
|
|
"""
|
|
Concrete implementation of a report page comparing client NAICS distributions against census baselines.
|
|
|
|
This class manages the lifecycle of the NAICS report, bridging internal client demographic
|
|
data with external USDA Census APIs to evaluate service penetration across industries and counties.
|
|
|
|
:param usda_api_key: Authentication key for querying baseline census datasets.
|
|
:type usda_api_key: str
|
|
:param census_year: The specific census year to use as the population baseline.
|
|
:type census_year: str
|
|
"""
|
|
def __init__(self):
|
|
"""
|
|
Initializes external API context, temporal boundaries, and application configuration.
|
|
|
|
Captures authentication credentials and establishes the current/previous fiscal year
|
|
state to parameterize downstream data fetching protocols.
|
|
"""
|
|
super().__init__("NAICS Report")
|
|
self.usda_api_key = self.app_config.get_usda_api_key()
|
|
self.census_year = self.app_config.get_census_year()
|
|
|
|
# Set to the previous fiscal year
|
|
self.fiscal_year = FiscalYear.current()
|
|
self.prev_fiscal_year = self.fiscal_year.prev_fiscal_year
|
|
|
|
self.fiscal_year_text = f'FY{str(self.fiscal_year.fiscal_year)[2:]}'
|
|
self.prev_fiscal_year_text = f'FY{str(self.prev_fiscal_year.fiscal_year)[2:]}'
|
|
|
|
def get_syncable_figure_keys(self) -> List[str]:
|
|
"""
|
|
Exposes the primary comparison graph for external dynamic axis scaling.
|
|
|
|
Permits a parent orchestrator to synchronize the Y-axis of the client/census
|
|
comparison chart when multiple instances of this report are rendered side-by-side.
|
|
|
|
:return: A list containing the dictionary key for the comparison graph figure.
|
|
:rtype: List[str]
|
|
"""
|
|
return ["comparison_graph_fig"]
|
|
|
|
def get_fiscal_year_export_url(self, selected_fiscal_year):
|
|
"""
|
|
Resolves the endpoint URL for the client dataset based on the active temporal state.
|
|
|
|
:param selected_fiscal_year: The formatted string representing the user's chosen fiscal year.
|
|
:type selected_fiscal_year: str
|
|
:return: The URL corresponding to the dataset export for the chosen year.
|
|
:rtype: str
|
|
"""
|
|
export_urls:ExportModulePair = self.app_config.get_clients_list_urls()
|
|
if selected_fiscal_year == self.fiscal_year_text:
|
|
return export_urls.current_fy
|
|
else:
|
|
return export_urls.prev_fy
|
|
|
|
def render_controls(self, container: DeltaGenerator) -> Dict[str, Any]:
|
|
"""
|
|
Establishes the report's parameterization boundaries for data querying and visual filtering.
|
|
|
|
Captures inputs that mutate the base dataset (Fiscal Year, Centers) and inputs that
|
|
only modify the presentation layer (Visible NAICS codes). This separation allows for
|
|
flexible chart manipulation without requiring full data pipeline re-execution.
|
|
|
|
:param container: The Streamlit container to draw the widgets onto.
|
|
:type container: DeltaGenerator
|
|
:return: A dictionary containing selected fiscal year, centers, and NAICS codes.
|
|
:rtype: Dict[str, Any]
|
|
"""
|
|
report_settings_expander = container.expander(
|
|
label="Report Options",
|
|
expanded=True,
|
|
key=self.get_widget_key("report_settings_expander")
|
|
)
|
|
|
|
report_settings_expander.markdown("## Dataset Options")
|
|
report_settings_expander.markdown("These settings will modify the input dataset used to generate the graphs.")
|
|
selected_fiscal_year = report_settings_expander.selectbox(
|
|
label="Fiscal Year",
|
|
options=[self.prev_fiscal_year_text, self.fiscal_year_text],
|
|
index=1,
|
|
key=self.get_widget_key("selected_fiscal_year_selectbox")
|
|
)
|
|
|
|
export_url = self.get_fiscal_year_export_url(selected_fiscal_year)
|
|
|
|
try:
|
|
all_centers = get_df_centers(export_url)
|
|
except Exception as e:
|
|
self.logger.exception(f"Failed to fetch the dataset for this page: {e}")
|
|
container.error(
|
|
f"Failed to get the list of all centers for the dataset for this page. A detailed error message has been added to the logs. {self.app_config.get_errors_contact_string()}")
|
|
st.stop()
|
|
|
|
selected_centers = report_settings_expander.multiselect(label="Centers", options=all_centers,
|
|
default=all_centers,
|
|
key=self.get_widget_key("selected_centers_multiselect"))
|
|
|
|
|
|
|
|
report_settings_expander.markdown("## View Options")
|
|
report_settings_expander.markdown(
|
|
"These settings WILL NOT modify the input dataset, but will show or hide bars on the graph.")
|
|
|
|
# Load the dataframe from the export module
|
|
client_list_df = cached_generate_client_naics_dataset(export_url, self.usda_api_key, self.census_year,
|
|
selected_centers)
|
|
|
|
# Setup the filter that lets users show or hide bars from the graph
|
|
all_naics_codes = client_list_df[OUT_COLUMNS.naics_2].unique()
|
|
selected_naics_codes = report_settings_expander.multiselect(label="NAICS Codes",
|
|
options=all_naics_codes,
|
|
default=all_naics_codes,
|
|
format_func=lambda val: f'{val:g}',
|
|
key=self.get_widget_key(
|
|
"selected_naics_multiselect")
|
|
)
|
|
|
|
return {
|
|
"selected_fiscal_year":selected_fiscal_year,
|
|
"selected_centers":selected_centers,
|
|
"selected_naics_codes":selected_naics_codes,
|
|
}
|
|
|
|
def generate_figures(self, parameters: Dict[str, Any]):
|
|
"""
|
|
Orchestrates the data processing pipeline and constructs the visualization objects.
|
|
|
|
Fetches client demographics, external census baselines, and county metrics, applying
|
|
user-defined filters to generate the final Plotly objects. Computes strict max-Y values
|
|
for comparison charts to support external axis synchronization.
|
|
|
|
:param parameters: The dictionary of user inputs captured from render_controls.
|
|
:type parameters: Dict[str, Any]
|
|
:return: A dictionary containing FigureWithMaxY objects for the visuals and raw dataframes.
|
|
:rtype: Dict[str, Any]
|
|
"""
|
|
selected_fiscal_year:str = parameters["selected_fiscal_year"]
|
|
selected_centers:List[str] = parameters["selected_centers"]
|
|
selected_naics_codes:List[int] = parameters["selected_naics_codes"]
|
|
|
|
export_url = self.get_fiscal_year_export_url(selected_fiscal_year)
|
|
if len(selected_centers) == 0:
|
|
st.warning("At least 1 center must be selected!")
|
|
st.stop()
|
|
else:
|
|
try:
|
|
client_list_df = cached_generate_client_naics_dataset(export_url, self.usda_api_key, self.census_year,
|
|
selected_centers)
|
|
|
|
naics_df = cached_create_naics_census_percentage_table(self.usda_api_key, self.census_year)
|
|
|
|
filtered_df = filter_df_by_naics_codes(client_list_df, selected_naics_codes)
|
|
|
|
county_df = cached_get_county_dataset(
|
|
export_url,
|
|
USDA_API_KEY,
|
|
CENSUS_YEAR,
|
|
selected_centers
|
|
)
|
|
except Exception as e:
|
|
self.logger.exception(f"Failed to load the dataset for this page, got {e}")
|
|
st.error(f"Failed to load the dataset for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}")
|
|
st.stop()
|
|
|
|
try:
|
|
comparison_graph_fig = make_client_census_comparison_graph(
|
|
naics_df,
|
|
filtered_df,
|
|
title=f'Comparison between PA Census NAICS code distribution and PASBDC client NAICs distribution {selected_fiscal_year}',
|
|
naics_df_naics_code_column_name=OUT_COLUMNS.unified_naics,
|
|
naics_df_naics_label_column_name=OUT_COLUMNS.naics_label,
|
|
naics_df_census_percentage_column_name=OUT_COLUMNS.census_pct,
|
|
client_df_naics2_column_name=OUT_COLUMNS.naics_2,
|
|
client_df_census_percentage=OUT_COLUMNS.pa_naics_pct,
|
|
client_df_pasbdc_percentage=OUT_COLUMNS.pasbdc_pct
|
|
)
|
|
comparison_graph_fig.update_layout(width=799, height=900)
|
|
comparison_graph_max_y = filtered_df[OUT_COLUMNS.pasbdc_pct].max()
|
|
if pd.isna(comparison_graph_max_y):
|
|
comparison_graph_max_y = 0.0
|
|
except Exception as e:
|
|
self.logger.exception(f"Failed to generate the SBDC vs Census NAICS comparison figure for this page, got {e}")
|
|
st.error(f"Failed to generate the SBDC vs Census NAICS comparison figure for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}")
|
|
st.stop()
|
|
|
|
try:
|
|
census_naics_fig = make_census_naics_chart(
|
|
naics_df,
|
|
naics_column_name=OUT_COLUMNS.unified_naics,
|
|
label_column_name=OUT_COLUMNS.naics_label,
|
|
census_data_column_name=OUT_COLUMNS.census_pct)
|
|
except Exception as e:
|
|
self.logger.exception(
|
|
f"Failed to generate the PA census naics table figure for this page, got {e}")
|
|
st.error(
|
|
f"Failed to generate the PA census naics table figure for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}")
|
|
st.stop()
|
|
|
|
try:
|
|
heatmap_fig = make_county_heatmap(
|
|
county_df,
|
|
value_column=OUT_COLUMNS.pct_missing_naics,
|
|
title=f'Missing Client NAICS Codes Per County {selected_fiscal_year}',
|
|
)
|
|
heatmap_fig.update_layout(height=799)
|
|
except Exception as e:
|
|
self.logger.exception(
|
|
f"Failed to generate the county missing NAICS heatmap figure for this page, got {e}")
|
|
st.error(
|
|
f"Failed to generate the county missing NAICS heatmap figure for this page. A detailed error has been added to the logs. {self.app_config.get_errors_contact_string()}")
|
|
st.stop()
|
|
|
|
return {
|
|
"comparison_graph_fig":FigureWithMaxY(figure=comparison_graph_fig, max_y=comparison_graph_max_y),
|
|
"census_naics_fig":FigureWithMaxY(figure=census_naics_fig, max_y=0.0),
|
|
"heatmap_fig":FigureWithMaxY(figure=heatmap_fig, max_y=0.0),
|
|
"client_list_df":client_list_df,
|
|
"naics_df":naics_df,
|
|
"filtered_df":filtered_df,
|
|
"county_df":county_df
|
|
}
|
|
|
|
def render_figures(self, container: DeltaGenerator, output_data: Dict[str, Any]):
|
|
"""
|
|
Executes the presentation tier, pushing generated artifacts to the Streamlit layout.
|
|
|
|
Draws the computed Plotly figures onto the screen and utilizes an expander module
|
|
to expose the raw, underlying dataframes for auditing and transparency.
|
|
|
|
:param container: The Streamlit container to draw the visuals onto.
|
|
:type container: DeltaGenerator
|
|
:param output_data: The dictionary of figures and dataframes returned by generate_figures.
|
|
:type output_data: Dict[str, Any]
|
|
"""
|
|
|
|
comparison_graph_fig:Figure = output_data["comparison_graph_fig"]["figure"]
|
|
census_naics_fig:Figure = output_data["census_naics_fig"]["figure"]
|
|
heatmap_fig:Figure = output_data["heatmap_fig"]["figure"]
|
|
|
|
client_list_df = output_data["client_list_df"]
|
|
naics_df = output_data["naics_df"]
|
|
county_df = output_data["county_df"]
|
|
|
|
container.plotly_chart(comparison_graph_fig, key=self.get_widget_key("comparison_graph"))
|
|
container.plotly_chart(census_naics_fig, key=self.get_widget_key("census_naics_table"))
|
|
container.plotly_chart(heatmap_fig, key=self.get_widget_key("heatmap_graph"))
|
|
dataset_expander = container.expander(
|
|
label="Source Datasets",
|
|
expanded=True,
|
|
key=self.get_widget_key("dataset_expander")
|
|
)
|
|
|
|
dataset_expander.markdown("## Source Data")
|
|
dataset_expander.markdown("### Neoserra Client List")
|
|
dataset_expander.write(client_list_df)
|
|
dataset_expander.markdown("### PA Census Data")
|
|
dataset_expander.write(naics_df)
|
|
dataset_expander.markdown("### Per County Missing NAICS Data")
|
|
dataset_expander.write(county_df)
|
|
|
|
@staticmethod
|
|
def get_page_name():
|
|
"""
|
|
Provides the static human-readable identifier for this specific report.
|
|
|
|
Utilized by the ComparerPage to construct navigation menus.
|
|
|
|
:return: The display name of the report.
|
|
:rtype: str
|
|
"""
|
|
return "NAICS Report" |