first commit

This commit is contained in:
2026-05-21 08:40:24 -04:00
commit b084545275
711 changed files with 3659856 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
# scripts/graph_generation_library/pyproject.toml
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "section_1_graph_library_module"
version = "0.1.0"
description = "Internal PASBDC graph creation functions used to generate figures for the network wide desk reviews."
[tool.setuptools]
packages = ["section_1_graph_library_module"]

View File

@@ -0,0 +1,4 @@
Metadata-Version: 2.4
Name: section_1_graph_library_module
Version: 0.1.0
Summary: Internal PASBDC graph creation functions used to generate figures for the network wide desk reviews.

View File

@@ -0,0 +1,12 @@
pyproject.toml
section_1_graph_library_module/__init__.py
section_1_graph_library_module/counselling_interval_analysis.py
section_1_graph_library_module/funding_analysis.py
section_1_graph_library_module/naics_census_analysis.py
section_1_graph_library_module/nbs_analysis.py
section_1_graph_library_module/satisfaction_survey_analysis.py
section_1_graph_library_module/trainings_analysis.py
section_1_graph_library_module.egg-info/PKG-INFO
section_1_graph_library_module.egg-info/SOURCES.txt
section_1_graph_library_module.egg-info/dependency_links.txt
section_1_graph_library_module.egg-info/top_level.txt

View File

@@ -0,0 +1 @@
section_1_graph_library_module

View File

@@ -0,0 +1,82 @@
from .naics_census_analysis import (
make_census_naics_chart,
make_client_census_comparison_graph,
make_county_heatmap
)
from .funding_analysis import (
make_funding_attribution_network_wide,
make_funding_attribution_rate_chart,
make_theoretical_funding_attribution_rate_chart,
make_funding_director_confirmed_graph,
)
from .satisfaction_survey_analysis import (
make_survey_response_count_graph,
make_average_survey_score_graph,
make_responses_per_client_graph,
make_nps_graph
)
from .trainings_analysis import (
StatChartVariants,
make_network_trainings_count_statistics_charts,
make_attendee_bins_statistics_charts,
make_primary_training_topic_statistics_charts,
make_center_attendee_statistics_charts,
make_center_event_count_charts,
make_center_attendee_range_charts,
make_primary_training_topic_pie_charts,
build_total_trainings_count_chart,
build_total_trainings_percent_chart,
build_no_first_steps_count_chart,
build_no_first_steps_percent_chart,
build_no_first_no_pre_count_chart,
build_no_first_no_pre_percent_chart,
build_first_pre_only_count_chart,
build_first_pre_only_percent_chart,
build_ondemand_count_chart,
build_ondemand_percent_chart,
build_ondemand_no_first_count_chart,
build_ondemand_no_first_percent_chart,
build_ondemand_no_first_no_pre_count_chart,
build_ondemand_no_first_no_pre_percent_chart,
)
from .nbs_analysis import (
make_nbs_attribution_network_wide,
make_attribution_rate_chart,
make_theoretical_attribution_rate_chart,
make_director_confirmed_graph,
)
from .counselling_interval_analysis import (
make_interval_snapshot_chart
)
__all__ = [
'make_census_naics_chart',
'make_client_census_comparison_graph',
'make_county_heatmap',
'make_funding_attribution_network_wide',
'make_funding_attribution_rate_chart',
'make_theoretical_funding_attribution_rate_chart',
'make_funding_director_confirmed_graph',
'make_survey_response_count_graph',
'make_average_survey_score_graph',
'make_responses_per_client_graph',
'make_nps_graph',
'make_network_trainings_count_statistics_charts',
'make_attendee_bins_statistics_charts',
'make_primary_training_topic_statistics_charts',
'make_center_attendee_statistics_charts',
'make_center_event_count_charts',
'make_center_attendee_range_charts',
'make_primary_training_topic_pie_charts',
'make_nbs_attribution_network_wide',
'make_attribution_rate_chart',
'make_theoretical_attribution_rate_chart',
'make_director_confirmed_graph',
'StatChartVariants',
'make_interval_snapshot_chart'
]

View File

@@ -0,0 +1,76 @@
# FILE: counselling_interval_analysis.py
# CREATED: 12/31/25
# AUTHOR: Vincent Allen
# CONTACT: vincent@vtallen.com valle276@live.kutztown.edu
# PURPOSE:
# This file contains graph generation functions to visualize the data from the PASBDC Date Interval Snapshot
# scorecard in neoserra
# Third party libraries
from pandas.core.indexes.base import JoinHow
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
# Python modules
from enum import Enum
from typing import Dict, List
# Custom packages
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
def make_interval_snapshot_chart(
df:pd.DataFrame,
title:str,
fiscal_year_tag:str,
col_interval_data_value:str=NEOSERRA_COLUMNS.interval_data_value,
col_neo_center:str=NEOSERRA_COLUMNS.center,
):
"""
:param df: The input dataset
:param title: The title of the chart
:param fiscal_year_tag: The fiscal year to place in the title
:param col_interval_data_value: The column to take the mean of per center group
:param col_neo_center: The column in the dataset that contains the center
:return: go.Figure - The constructed plotly graph
:description:
Graphs the mean of the col_interval_data_value per center on a bar chart
"""
grouped_df = df.groupby(col_neo_center)[col_interval_data_value].mean().reset_index(name=col_interval_data_value) #pyright:ignore
fig = px.bar(
grouped_df,
x=col_neo_center,
y=col_interval_data_value,
text=col_interval_data_value,
title=f"{title} {fiscal_year_tag}",
width=1400,
height=1000,
text_auto='.1f' #pyright:ignore
)
fig.update_layout(
font_family="Futura",
title_font_family="Futura",
yaxis_title='Days'
)
fig.update_traces(
marker_color='#73e0c6',
)
fig.add_annotation(
xref='paper', yref='paper',
x=0.0, y=1.01,
showarrow=False,
text=f"Total clients: {df.shape[0]}"
)
net_avg = df[col_interval_data_value].mean()
fig.add_hline(y=net_avg,
line_dash="dash",
line_color="#004649",
annotation_text=f"Network Average: {net_avg:.1f}",
annotation_position="top right")
return fig

View File

@@ -0,0 +1,377 @@
# FILE: funding_analysis.py
# CREATED: 12/23/25
# AUTHOR: Vincent Allen
# CONTACT: vincent@vtallen.com valle276@live.kutztown.edu
# PURPOSE:
# Contains the functions used to generate the plotly graphs for the capital funding analysis in the network wide desk reviews.
# external libraries
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
#python modules
# Custom modules
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
def make_funding_attribution_network_wide(
funding_df: pd.DataFrame,
fiscal_year:str,
title: str = "Capital Funding Attributions Per Center",
network_label: str = "Network Wide",
graph_note: str = "<b>NOTE: Documentation levels were determined as follows.</b><br><br>"
"<b>Documented:</br>Will be submitted to Nexus as long as 'Director Verified is checked'</b></br>There is a non-blank, non-'Requested on eCenter' attribution source</br>AND Affirmation Statement was non-blank</br></br>"
"<b>Affirmation Statement Missing:</br>Will NOT be submitted to Nexus</b></br>Attribution source is non-blank, non-'Requested on eCenter'</br>BUT affirmation statement was blank.</br></br>"
"<b>Not Documented:</br>Will NOT be submitted to Nexus</b></br>There is a non-blank, non-'Requested on eCenter' attribution source </br>with a value in the affirmation column. If the attribution source is</br>eCenter,then no value is required in the affirmation column.",
col_neo_center: str = NEOSERRA_COLUMNS.center,
col_documentation_level: str = OUT_COLUMNS.milestone_documentation_level
) -> go.Figure:
"""
parameters:
funding_df:pd.DataFrame - The capital funding analysis data
fiscal_year:str - The fiscal year label to place at the end of titles
title:str - The title to place on the graph
network_label: str - The label to use in the title when title is 'Capital Funding Attributions Per Center'
graph_note:str - The note to place on the bottom right of the graph explaining how documentation levels were derived
col_neo_center:str - The column of the funding_df at which the center can be found
col_documentation_level:str - The column of the funding_df at which the documentation level for the milestone can be found
returns: go.Figure - The constructed figure object
description:
Takes in the capital funding analysis data on the documentation level of those milestones and visualizes
it network wide as a stacked bar graph of the documentation levels per center
"""
display_title = title
if network_label != "Network Wide":
if "Network" in display_title:
display_title = display_title.replace("Network Wide", network_label).replace("Network", network_label)
else:
display_title = f"{network_label} {display_title}"
funding_agg_df = funding_df.groupby([col_neo_center, col_documentation_level]).size().reset_index(name='Count') #pyright:ignore
funding_agg_df = funding_agg_df.sort_values(col_neo_center)
desired_order = ["Documented", "Affirmation Missing", "Not Documented"]
fig = px.bar(
funding_agg_df,
x=col_neo_center,
y='Count',
color=col_documentation_level,
text='Count',
color_discrete_map={"Documented":"#71bf44", "Affirmation Missing":"#ffba31", "Not Documented":"#004649"},
category_orders = {col_documentation_level: desired_order}
)
fig.update_traces(
textposition='inside',
textfont_size=12
)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Attribution Counts',
title=f"{display_title} {fiscal_year}",
height=700,
width=1500,
)
if graph_note != "":
fig.update_layout(margin=dict(r=470))
fig.add_annotation(x=1.49, y=-0.1, xref='paper', yref='paper', showarrow=False, align='left',
text=graph_note)
return fig
def make_funding_attribution_rate_chart(
funding_df: pd.DataFrame,
fiscal_year: str,
source_data_export_path: str = "",
documented_tag: str = OUT_COLUMNS.val_documented,
col_neo_center: str = NEOSERRA_COLUMNS.center,
col_documentation_level: str = OUT_COLUMNS.milestone_documentation_level
) -> go.Figure:
"""
parameters:
funding_df:pd.DataFrame - The capital funding analysis data
source_data_export_path:str - If a csv path is provided, the intermediate dataset will be exported there
documented_tag:str - The value to consider a milestone documneted. The script checks this value to determine what to count as a documented milestone
col_neo_center:str - The column of the dataset containing the center for a milestone
col_documentation_level:str - The column containing the documentation level assigned to a milestone
returns: go.Figure
description:
Contains the code to create a bar graph that displays the percentage of Funding milestones which were
considered "Documented". It uses the funding data to derive an intermediate dataset arranged for easy graphing which can be
exported by providing a valid CSV path + filename to the source_data_export_path parameter
"""
# Aggregate the counts of each documentation level per center
funding_agg_df = funding_df.groupby([col_neo_center, col_documentation_level]).size().reset_index(name='Count') #pyright:ignore
# Sum all of the milestones to get a denominator
funding_total = funding_agg_df.groupby(col_neo_center)['Count'].sum()
# Select only the documented counts and sum them to get a total
funding_documented = funding_agg_df[funding_agg_df[col_documentation_level] == documented_tag].groupby(col_neo_center)['Count'].sum()
funding_combined_df = pd.DataFrame({
'Total': funding_total,
'Documented Count': funding_documented
})
funding_combined_df['Documented Count'] = funding_combined_df['Documented Count'].fillna(0)
funding_combined_df['Percent Documented'] = funding_combined_df['Documented Count'] / funding_combined_df['Total']
funding_combined_df = funding_combined_df.reset_index()
total_funding_milestones = funding_df.shape[0]
funding_total_documented = funding_combined_df['Documented Count'].sum()
funding_network_total = funding_total_documented / total_funding_milestones
funding_network_total_count = pd.DataFrame({
col_neo_center: ["Network Total"],
'Total' : [total_funding_milestones],
'Documented Count': [funding_total_documented],
'Percent Documented': [funding_network_total]
})
funding_combined_df = pd.concat([funding_combined_df, funding_network_total_count], ignore_index=True)
# Save the derived dataset only if the user wants it
if source_data_export_path:
funding_combined_df.to_csv(source_data_export_path, index=False)
fig = px.bar(
funding_combined_df[funding_combined_df[col_neo_center] != 'Network Total'],
x=col_neo_center,
y='Percent Documented',
text='Percent Documented',
color_discrete_sequence=['#71bf44']
)
# Network total
net_total = funding_combined_df[funding_combined_df[col_neo_center].isin(['Network Total'])]['Percent Documented'].iloc[0] #pyright:ignore
fig.add_hline(y=net_total,
line_dash="dash",
line_color="#004649",
annotation_text=f"Network Total: {net_total * 100:.1f}%",
annotation_position="top left",
annotation=dict(
xref="paper",
x=1.02,
xanchor="left",
)
)
fig.update_traces(
textposition='inside',
textfont_size=12,
texttemplate="%{text:.0%}"
)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Documented Percentage',
yaxis_tickformat='.0%',
title=f'Capital Funding Attribution Rates Per Center {fiscal_year}',
height=700,
width=1500,
margin=dict(r=150)
)
return fig
def make_theoretical_funding_attribution_rate_chart(
funding_df: pd.DataFrame,
fiscal_year: str,
title: str = 'Documented Percentage if All Funding Milestones With an Attribution Source had an Affirmation',
source_data_export_path: str = "",
documented_tag: str = OUT_COLUMNS.val_documented,
affirmation_missing_tag: str = OUT_COLUMNS.val_affirmation_missing,
col_neo_center: str = NEOSERRA_COLUMNS.center,
col_documentation_level: str = OUT_COLUMNS.milestone_documentation_level
) -> go.Figure:
"""
parameters:
funding_df:pd.DataFrame - The capital funding analysis dataframe
title:str - The title to place on the graph
source_data_export_path - The path to save the intermediate dataset that produced the graph (if provided)
documented_tag:str - The value that tells the function to consider a milestone documented
documented_wrong_spot_tag:str - The value that tells the function to consider a milestone documented in the incorrect spot
col_neo_center:str - The column of the dataset that determines the center
col_documentation_level:str - The column of the dataset containing the documentation level
returns: go.Figure - The constructed figure object
description:
Generates a bar chart that displays what the correct documentation rate would be for Capital Funding if all of the milestones that had their
documentation in the incorrect spot had the documentation in the correct spot.
"""
funding_temp_agg = funding_df.groupby([col_neo_center, col_documentation_level]).size().reset_index(name='Count') #pyright:ignore
funding_combined_and_wrong_spot_df = funding_temp_agg[funding_temp_agg[col_documentation_level].isin(
[documented_tag, affirmation_missing_tag]
)]
funding_documented_agg_df = funding_combined_and_wrong_spot_df.groupby(col_neo_center).agg(
Documentation_Levels_Combined=(col_documentation_level, ','.join),
Documented_Count=('Count', 'sum')
).reset_index()
funding_total_agg_df = funding_temp_agg.groupby(col_neo_center).agg(
Grand_Total_Count=('Count', 'sum')
).reset_index()
funding_final_df = pd.merge(
funding_documented_agg_df,
funding_total_agg_df,
on=col_neo_center,
how='outer'
)
funding_final_df['Documented_Count'] = funding_final_df['Documented_Count'].fillna(0).astype(int)
funding_final_df['Documentation_Levels_Combined'] = funding_final_df['Documentation_Levels_Combined'].fillna(0)
funding_final_df['Percent_Documented'] = funding_final_df['Documented_Count'] / funding_final_df['Grand_Total_Count']
total_funding_milestones = funding_df.shape[0]
funding_total_group = funding_final_df['Documented_Count'].sum()
funding_network_total = funding_total_group / total_funding_milestones
funding_network_total_count = pd.DataFrame({
col_neo_center: ["Network Total"],
'Grand_Total_Count' : [total_funding_milestones],
'Documented_Count': [funding_total_group],
'Percent_Documented': [funding_network_total]
})
funding_final_df = pd.concat([funding_final_df, funding_network_total_count], ignore_index=True)
if source_data_export_path:
funding_final_df.to_csv(source_data_export_path, index=False)
fig = px.bar(
funding_final_df[funding_final_df[col_neo_center] != 'Network Total'],
x=col_neo_center,
y='Percent_Documented',
text='Percent_Documented',
color_discrete_sequence=['#71bf44']
)
# Network total
net_total = funding_final_df[funding_final_df[col_neo_center].isin(['Network Total'])]['Percent_Documented'].iloc[0] #pyright:ignore
fig.add_hline(y=net_total,
line_dash="dash",
line_color="#004649",
annotation_text=f"Network Total: {net_total * 100:.1f}%",
annotation_position="top right",
annotation=dict(
xref="paper",
x=1.02,
xanchor="left",
)
)
fig.update_traces(
textposition='inside',
textfont_size=12,
texttemplate="%{text:.0%}"
)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Documented and Affirmation Missing Percentage',
yaxis_tickformat='.0%',
title=f"{title} {fiscal_year}",
height=700,
width=1500,
margin=dict(r=150)
)
return fig
def make_funding_director_confirmed_graph(
funding_df: pd.DataFrame,
fiscal_year: str,
title: str = 'Percentage of Director Confirmed Capital Funding Attributions Per Center',
source_data_export_path: str = "",
col_neo_center: str = NEOSERRA_COLUMNS.center,
col_neo_attribution_source: str = NEOSERRA_COLUMNS.milestone_attribution_source
) -> go.Figure:
"""
parameters:
funding_df:pd.DataFrame - The capital funding analysis data
title:str - The title to place on the graph
source_data_export_path:str - The path + filename to save the intermediate calculation dataset to (if provided)
col_neo_center:str="Center" - The column in the dataset to consider the center column
col_neo_attribution_source:str - The column of the dataset that contains the attribution source for a milestone
returns: go.Figure - The generated graph figure
description:
Generates a bar graph displaying how many funding milestones from each center were director confirmed and not true confirmed milestones.
"""
total_counts = funding_df.groupby(col_neo_center).size()
director_counts = funding_df[funding_df[col_neo_attribution_source].str.contains("Director", na=False)] \
.groupby(col_neo_center) \
.size()
funding_director_combined_df = pd.DataFrame({
'Total Count': total_counts,
'Director Count': director_counts
})
funding_director_combined_df['Director Count'] = funding_director_combined_df['Director Count'].fillna(0).astype(int)
funding_director_combined_df['Percent Director'] = funding_director_combined_df['Director Count'] / funding_director_combined_df['Total Count']
funding_director_combined_df = funding_director_combined_df.reset_index()
if source_data_export_path:
funding_director_combined_df.to_csv(source_data_export_path, index=False)
fig = px.bar(
funding_director_combined_df,
x=col_neo_center,
y='Percent Director',
text='Percent Director',
color_discrete_sequence=['#ffba31']
)
fig.update_traces(
textposition='inside',
textfont_size=12,
texttemplate="%{text:.0%}",
)
#annotation=dict(
# xref="paper",
# x=1.02,
# xanchor="left",
# )
fig.update_layout(
xaxis_title='Center',
yaxis_title='Percent of Director Confirmed Attributions',
yaxis_tickformat='.0%',
title=f'{title} {fiscal_year}',
height=700,
width=1500,
margin=dict(r=150)
)
return fig

View File

@@ -0,0 +1,357 @@
# FILE: naics_census_analysis.py
# CREATED: 12/19/25
# AUTHOR: Vincent Allen
# CONTACT: vincent@vtallen.com valle276@live.kutztown.edu
# PURPOSE:
# Contains functions used to generate the figures for section 1.1 of the all centers desk review
# This section specifically contains an analysis of PASBDC client NAICs codes compared to the proportions
# found in the US census data for PA.
# It also visualizes which counties have PASBDC client profiles with missing NAICs codes
# Third party libraries
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import geopandas as gpd
import certifi
# Python modules
import textwrap
from typing import List
import ssl
import urllib.request
# Custom packages
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
def make_census_naics_chart(
naics_df:pd.DataFrame,
naics_column_name:str=OUT_COLUMNS.unified_naics,
label_column_name:str=OUT_COLUMNS.naics_label,
census_data_column_name:str=OUT_COLUMNS.census_pct) -> go.Figure:
"""
parameters:
naics_df:pd.DataFrame - The input dataframe containing NAICS codes, names for what the codes represent, and census data about their share of PA businesses
naics_column_name:str - The column at which the NAICs codes can be found in naics_df,
label_column_name:str - The column at which the NAICs code descriptions can be found in naics_df,
census_column_name:str - The column at which the census percentage can be found
description:
This function produces a plotly figure that displays a NAICS code, a description for that code, and a percentage value
in the third column. For the case of the report this was made for, this is the percentage that NAICs code makes up of all
businesses in PA.
This value should be a percentage out of 100 not out of 1 (99.34 not 0.9934)
returns: go.Figure - The constructed figure object
"""
table_headers = ["Sector", "Definition", "PA Census Percentage"]
naics_codes = naics_df[naics_column_name]
naics_code_names = naics_df[label_column_name]
percentages = [f"{x:.1f}%" for x in naics_df[census_data_column_name]]
char_width = 45 # Adjust this width as needed
wrapped_names = ['<br>'.join(textwrap.wrap(name, width=char_width)) for name in naics_code_names]
fig = go.Figure(data=[
go.Table(
header=dict(values=table_headers, fill_color='#6ebe4d'),
cells=dict(
values=[naics_codes, wrapped_names, percentages],
align=['left', 'left', 'right'],
font=dict(size=11, color='black'),
fill_color='#bad4d6'
),
columnwidth=[0.15, 0.5, 0.15],
)]
)
fig.update_layout(
height=575,
width=500,
margin=dict(l=10, r=10, t=10, b=10),
font_family="Futura",
title=None
)
return fig
def make_client_census_comparison_graph(
naics_df:pd.DataFrame,
client_df:pd.DataFrame,
title:str="Comparison between PA Census NAICS code distribution and PASBDC client NAICs distribution FY 25",
naics_df_naics_code_column_name:str=OUT_COLUMNS.unified_naics,
naics_df_naics_label_column_name:str=OUT_COLUMNS.naics_label,
naics_df_census_percentage_column_name:str=OUT_COLUMNS.census_pct,
client_df_naics2_column_name:str=OUT_COLUMNS.naics_2,
client_df_census_percentage:str=OUT_COLUMNS.pa_naics_pct,
client_df_pasbdc_percentage:str=OUT_COLUMNS.pasbdc_pct
) -> go.Figure:
"""
parameters:
naics_df:pd.DataFrame - The dataframe containing NAICs codes and their descriptions
client_df:pd.DataFrame - The datafram containing PASBDC client data
title:str - The title to give the graph
naics_df_naics_code_column_name:str - The column of naics_df at which 2 digit naics codes can be found
naics_df_naics_label_column_name:str - The column of naics_df at which the naics code descriptions can be found
naics_df_census_percentage_column_name:str - The column of naics_df at which census percentages can be found (must be in the form 99.34 not 0.934)
client_df_naics2_column_name:str - The column of client_df where 2 digit naics codes can be found
client_df_census_percentage:str - The column of client_df where the census data about what share of businesses have that code in PA
client_df_pasbdc_percentage:str - The column of client_df where the percentage of PASBDC businesses with that naics code can be found
returns:
go.Figure - The constructed figure object
description:
Constructs a bar chart displaying what percentage of PASBDC businesses fall into each NAICS code.
Overlayed on top of this is a line graph that displays the percentage of that NAICS code within the
census data.
NAICS codes 31, 32, and 33 are combined in the final graph
"""
# -------------------- Prepare NAICS mapping --------------------------------
naics_mapping = {}
for _, row in naics_df.iterrows():
split = str(row[naics_df_naics_code_column_name]).split('-')
if len(split) == 2:
for code in range(int(split[0]), int(split[1]) + 1):
naics_mapping[code] = {
"Industry": row[naics_df_naics_label_column_name],
"PA_Percentage": float(row[naics_df_census_percentage_column_name])
}
else:
naics_mapping[int(row[naics_df_naics_code_column_name])] = {
"Industry": row[naics_df_naics_label_column_name],
"PA_Percentage": float(row[naics_df_census_percentage_column_name])
}
# -------------------- Clean and Aggregate Client Data -----------------------
client_df[client_df_naics2_column_name] = client_df[client_df_naics2_column_name].astype(float).astype(int)
agg_df = (
client_df
.groupby(client_df_naics2_column_name, as_index=False)
.agg({
client_df_pasbdc_percentage: "mean",
client_df_census_percentage: "mean"
})
.sort_values(client_df_naics2_column_name)
) #pyright:ignore
# -------------------- Combine NAICS 31, 32, 33 ------------------------------
# Create a new column to group manufacturing codes
agg_df["NAICS_Combined"] = agg_df["NAICS_2"].apply(
lambda x: "31-33" if x in [31, 32, 33] else str(x)
)
# Aggregate the combined codes
combined_df = agg_df.groupby("NAICS_Combined", as_index=False).agg({
client_df_pasbdc_percentage: "sum", # Sum the percentages
client_df_census_percentage: "sum",
client_df_naics2_column_name: "first" # Keep first code for mapping
})
# Update the NAICS_2 for the combined row to get proper mapping
combined_df.loc[combined_df["NAICS_Combined"] == "31-33", "NAICS_2"] = 31
# Add industry name from mapping
combined_df["Industry"] = combined_df[client_df_naics2_column_name].map(#pyright:ignore
lambda x: naics_mapping.get(x, {}).get("Industry", "Unknown")
)
# For the combined manufacturing row, use a custom label
combined_df.loc[combined_df["NAICS_Combined"] == "31-33", "Industry"] = "Manufacturing"
# Get PA percentages - but don't sum for 31-33 since they're already the same value
combined_df["PA_Mapped_Percentage"] = combined_df[client_df_naics2_column_name].map(#pyright:ignore
lambda x: naics_mapping.get(x, {}).get("PA_Percentage", None)
) #pyright:ignore
# Combine label for x-axis
combined_df["NAICS_Label"] = combined_df["NAICS_Combined"].astype(str) + " - " + combined_df["Industry"].astype(str) #pyright:ignore
# Sort to maintain logical order
combined_df = combined_df.sort_values("NAICS_Combined") #pyright:ignore
# -------------------- Plot PASBDC Bars --------------------------------------
fig = px.bar(
combined_df,
x="NAICS_Label",
y="PASBDC NAICs Code Percentage",
text="PASBDC NAICs Code Percentage",
width=1500,
height=1000,
title=title
)
fig.update_traces(marker_color="#6ebe4d")
fig.update_traces(
name="PASBDC Percentage",
texttemplate="%{text:.1f}%",
textposition="outside",
textfont_size=14
)
fig.add_trace(go.Scatter(
x=combined_df["NAICS_Label"],
y=combined_df["PA_Mapped_Percentage"],
name="PA Census Percentage",
mode="lines+markers",
line=dict(color="#27323a"),
marker=dict(size=8)
))
max_y = combined_df[[client_df_pasbdc_percentage, "PA_Mapped_Percentage"]].max().max()
if pd.isna(max_y):
max_y = 0.0
y_offset = max_y * 0.06
"""
annotations = []
for x, y in zip(combined_df["NAICS_Label"], combined_df["PA_Mapped_Percentage"]):
annotations.append(dict(
x=x,
y=y + y_offset,
text=f"{y:.1f}%",
showarrow=False,
font=dict(size=12, color="#27323a"),
xanchor="center",
yanchor="bottom"
))
"""
fig.update_layout(
#annotations=annotations,
bargap=0.1,
barmode="group",
yaxis=dict(
range=[0, max_y * 1.15],
title="Percentage of PASBDC Businesses",
tickformat=".0f"
),
xaxis=dict(title="Industry (NAICS Code)"),
legend=dict(x=0.02, y=0.98)
)
return fig
def make_county_heatmap(
county_stats_df:pd.DataFrame,
value_column:str,
title:str,
tick_suffix:str="%",
color_continuous_scale:List[str]=['#cde0c3','#b4e09a', '#6dafb2', '#499699', '#2d797c', '#256e70', '#156264', '#094f51', '#004649', '#003234', '#002f30', '#002223', '#001111']
):
"""
parameters:
county_stats_df:pd.DataFrame - The dataframe containing a fips column and a value you want to visualize on a heatmap
value_column:str - The value you want to visualize on the state heat map
title:str - The title of the map
tick_suffix:str - The suffix to place after the numbes on the y axis label
color_continuous_scale:List[str] - Used to set the color gradient of the key
returns:
go.Figure - The constructed figure object
description:
Creates a heatmap of all of the counties within PA based on an input dataframe and a selected
value column.
"""
# Disable ssl cert checking just for this function call
# Python 3.14 has beef with windows for whatever reason. The joys of developing using Windows.
ssl._create_default_https_context = ssl._create_unverified_context
# This file has the FIPS codes and the county shape (geometry)
geojson_url = 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'
gdf = gpd.read_file(geojson_url)
# We merge your data *onto* the geospatial data
merged_gdf = gdf.merge(county_stats_df, left_on='id', right_on='fips')
# Filter for Pennsylvania (FIPS prefix '42')
pa_gdf = merged_gdf[merged_gdf['fips'].str.startswith('42')].copy()
#Calculate the centroid for each county's geometry
#Note: This might warn about CRS, but for plotting it's usually fine.
pa_gdf['centroid'] = pa_gdf.geometry.centroid
# Extract needed positional data
pa_gdf['lon'] = pa_gdf.centroid.x
pa_gdf['lat'] = pa_gdf.centroid.y
pa_gdf = pa_gdf.drop(columns=['centroid'])
# Create the text label
# Removed the percentage value from the end for readability
#pa_gdf['percent_str'] = (pa_gdf[value_column].round(2).astype(str)) + tick_suffix
# Plotly accepts html styling in text labels
pa_gdf['label_text'] = '<span style="color:white; font-size:10px;">' + pa_gdf['County'].astype(str) + "</span>"#+ '<br>' + pa_gdf['percent_str'] + "</span>"
# Create the Base Choropleth Map
fig = px.choropleth(pa_gdf,
geojson=pa_gdf,
locations='fips',
featureidkey='properties.fips',
color=value_column, # Column for color
hover_name='County',
hover_data={value_column: ':.1f', 'fips': False},
color_continuous_scale=color_continuous_scale,
range_color=[0, 100]
)
# Add a scatter plot of the state labels
fig.add_trace(
go.Scattergeo(
lat=pa_gdf['lat'],
lon=pa_gdf['lon'],
text=pa_gdf['label_text'],
mode='text',
hoverinfo='none'
)
)
fig.update_geos(
fitbounds="locations",
visible=False,
projection_scale=60,
center={"lat": pa_gdf['lat'].mean(), "lon": pa_gdf['lon'].mean()}
)
fig.update_coloraxes(
colorbar=dict(
title=value_column,
ticksuffix="%",
)
)
# Set the layout for saving an image
fig.update_layout(
title=dict(
text=title,
font=dict(size=22, color='black'),
x=0.5, # center the title horizontally (0=left, 1=right)
xanchor='center',
yanchor='top'
),
margin={"r":0,"t":80,"l":0,"b":0},
width=2000,
height=1200,
)
# Derive the network wide missing naics code value
total_clients = county_stats_df['Total Clients'].sum()
missing_naics_total = county_stats_df['Missing NAICS'].sum()
fig.add_annotation(
x=0.0,
y=-0.0,
xref="paper",
yref="paper",
text=f"Network Wide, {round((missing_naics_total/total_clients)*100, 2)}% of client profiles are missing NAICS codes.",
showarrow=False,
)
return fig

View File

@@ -0,0 +1,381 @@
# FILE: nbs_analysis.py
# CREATED: 12/23/25
# AUTHOR: Vincent Allen
# CONTACT: vincent@vtallen.com valle276@live.kutztown.edu
# PURPOSE:
# Contains the functions used to generate the plotly graphs for the milestone analysis in the network wide desk reviews.
# external libraries
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
#python modules
# Custom modules
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
def make_nbs_attribution_network_wide(
nbs_df:pd.DataFrame,
title:str="New Business Start Attributions Per Center FY 25",
network_label:str="Network",
graph_note:str="<b>NOTE:</b>Documentation levels were determined as follows.<br><br>"
"<b>Documented: </br>Will be submitted to Nexus as long as 'Director Verified' is checked</b></br>There is a non-blank, non-'Requested on eCenter' attribution source </br>AND Affirmation Statement was non-blank<br>\tNOTE: If the attribution source is eCenter, no affirmation is required.<br><br>"
"<b>Affirmation Statement Missing:</br>Will NOT be submitted to Nexus</b></br>Attribution source is non-blank, non-'Requested on eCenter'</br>BUT affirmation statement was blank.</br></br>"
"<b>Not Documented:</br>Will NOT be submitted to Nexus</b></br>There is a non-blank, non-'Requested on eCenter' attribution source </br>with a value in the affirmation column. If the attribution <br>source is eCenter, then no value is required in the affirmation column.",
col_neo_center:str=NEOSERRA_COLUMNS.center,
col_documentation_level:str=OUT_COLUMNS.milestone_documentation_level
):
"""
parameters:
nbs_df:pd.DataFrame - The new business starts analysis data
title:str - The title to place on the graph
network_label:str - The label to use in the title when it starts with 'Network'
graph_note:str - The note to place on the bottom right of the graph explaining how documentation levels were derived
col_neo_center:str - The column of the nbs_df at which the center can be found
col_documentation_level:str - The column of the nbs_df at which the documentation level for the milestone can be found
returns: go.Figure - The constructed figure object
description:
Takes in the new business starts analysis data on the documentation level of those milestones and visualizes
it network wide as a stacked bar graph of the documentation levels per center
"""
display_title = title
if network_label != "Network":
# If a custom label is provided, ensure it's represented in the title
if "Network" in display_title:
display_title = display_title.replace("Network", network_label)
else:
display_title = f"{network_label} {display_title}"
agg_df = nbs_df.groupby([col_neo_center, col_documentation_level]).size().reset_index(name='Count') #pyright:ignore
agg_df = agg_df.sort_values(col_neo_center)
desired_order = ["Documented", "Affirmation Missing", "Not Documented"]
fig = px.bar(
agg_df,
x=col_neo_center,
y='Count',
color=col_documentation_level,
text='Count',
#color_discrete_sequence=['#71bf44', '#ffba31', '#004649'],
color_discrete_map={"Documented": "#71bf44", "Affirmation Missing": "#ffba31", "Not Documented": "#004649"},
category_orders={col_documentation_level: desired_order}
)
fig.update_traces(
textposition='inside',
textfont_size=12
)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Attribution Counts',
title=display_title,
height=700,
width=1500,
xaxis={'categoryorder': 'category ascending'}
)
if graph_note != "":
fig.update_layout(margin=dict(r=470))
fig.add_annotation(x=1.49, y=0, xref='paper', yref='paper', showarrow=False, align='left',
text=graph_note)
return fig
def make_attribution_rate_chart(
nbs_df:pd.DataFrame,
source_data_export_path:str="",
fiscalyear:str="",
documented_tag:str=OUT_COLUMNS.val_documented,
col_neo_center:str=NEOSERRA_COLUMNS.center,
col_documentation_level:str=OUT_COLUMNS.milestone_documentation_level
):
"""
parameters:
nbs_df:pd.DataFrame - The new business starts analysis data
source_data_export_path:str - If a csv path is provided, the intermediate dataset will be exported there
documented_tag:str - The value to consider a milestone documneted. The script checks this value to determine what to count as a documented milestone
col_neo_center:str - The column of the dataset containing the center for a milestone
col_documentation_level:str - The column containing the documentation level assigned to a milestone
returns: go.Figure
description:
Contains the code to create a bar graph that displays the percentage of NBS milestones which were
considered "Documented". It uses the NBS data to derive an intermediate dataset arranged for easy graphing which can be
exported by providing a valid CSV path + filename to the source_data_export_path parameter
"""
# Aggregate the counts of each documentation level per center
agg_df = nbs_df.groupby([col_neo_center, col_documentation_level]).size().reset_index(name='Count') #pyright:ignore
# Select only the documented counts and sum them to get a total
nbs_documented = agg_df[agg_df[col_documentation_level] == documented_tag].groupby(col_neo_center)['Count'].sum()
# Sum all of the milestones to get a denominator
nbs_total = agg_df.groupby(col_neo_center)['Count'].sum()
# Both of these input pandas series' is indexed by center, so matching will occur automatically
combined_df = pd.DataFrame({'Total': nbs_total, 'Documented Count':nbs_documented})
# Remove any nan values so calculations do not fail
combined_df['Documented Count'] = combined_df['Documented Count'].fillna(0)
combined_df['Percent Documented'] = combined_df['Documented Count'] / combined_df['Total']
combined_df = combined_df.reset_index()
total_nbs_milestones = nbs_df.shape[0]
total_documented = combined_df['Documented Count'].sum()
network_total = total_documented / total_nbs_milestones
network_total_count = pd.DataFrame({
col_neo_center: ["Network Total"],
'Total' : [total_nbs_milestones],
'Documented Count': [total_documented],
'Percent Documented': [network_total]
})
combined_df = pd.concat([combined_df, network_total_count], ignore_index=True)
# Save the derived dataset only if the user wants it
if source_data_export_path:
combined_df.to_csv(source_data_export_path, index=False)
# The above code gets us a dataframe with the following columns:
# Center, Total, Documented Count, Percent Documented
# Now we can visualize it
fig = px.bar(
# Exclude the network total from the graphed data, will be added as a line
combined_df[~combined_df[col_neo_center].isin(['Network Total'])],
x=col_neo_center,
y='Percent Documented',
text='Percent Documented',
color_discrete_sequence=['#71bf44']
)
# Add a Network total average line across the bars
net_total = combined_df[combined_df[col_neo_center].isin(['Network Total'])]['Percent Documented'].iloc[0] #pyright:ignore
fig.add_hline(y=net_total,
line_dash="dash",
line_color="#004649",
annotation_text=f"Network Total: {net_total * 100:.1f}%",
annotation_position="top right",
annotation=dict(
xref="paper",
x=1.02,
xanchor="left",
)
)
fig.update_traces(
textposition='inside',
textfont_size=12,
texttemplate="%{text:.0%}"
)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Documented Percentage',
title=f'New Business Start Attribution Rates Per Center {fiscalyear}',
yaxis_tickformat='.0%',
height=700,
width=1500,
margin=dict(r=150)
)
return fig
def make_theoretical_attribution_rate_chart(
nbs_df:pd.DataFrame,
title:str='Documented Percentage if All NBS Milestones With an Attribution Source had an Affirmation FY 25',
source_data_export_path:str="",
documented_tag:str=OUT_COLUMNS.val_documented,
affirmation_missing_tag:str=OUT_COLUMNS.val_affirmation_missing,
col_neo_center:str=NEOSERRA_COLUMNS.center,
col_documentation_level:str=OUT_COLUMNS.milestone_documentation_level
) -> go.Figure:
"""
parameters:
nbs_df:pd.DataFrame - The new business starts analysis dataframe
title:str - The title to place on the graph
source_data_export_path - The path to save the intermediate dataset that produced the graph (if provided)
documented_tag:str - The value that tells the function to consider a milestone documented
documented_wrong_spot_tag:str - The value that tells the function to consider a milestone documented in the incorrect spot
col_neo_center:str - The column of the dataset that determines the center
col_documentation_level:str - The column of the dataset containing the documentation level
returns: go.Figure - The constructed figure object
description:
Generates a bar chart that displays what the correct documentation rate would be if all of the milestones that had their
documentation in the incorrect spot had the documentation in the correct spot.
"""
# group by center and documentation level, then count the number of milestones for each grouping
temp_agg = nbs_df.groupby([col_neo_center, col_documentation_level]).size().reset_index(name='Count') #pyright:ignore
# Select only the the milestones that fell into the documented or documented in wrong spot category
combined_and_wrong_spot_df = temp_agg[temp_agg[col_documentation_level].isin(
[documented_tag, affirmation_missing_tag]
)]
# Regroup by center, then concatenate the documentation level columns for traceability
# then sum up the counts that fall into each group
documented_agg_df = combined_and_wrong_spot_df.groupby(col_neo_center).agg(
Documentation_Levels_Combined=(col_documentation_level, ','.join),
Documented_Count=('Count', 'sum')
).reset_index()
# Count up the milestones regardless of their documentation level
total_agg_df = temp_agg.groupby(col_neo_center).agg(
Grand_Total_Count=('Count', 'sum')
).reset_index()
# Merge both the "good" results with the grand total
final_df = pd.merge(
documented_agg_df,
total_agg_df,
on=col_neo_center,
how='outer'
)
# Fill nan values so calculations do not fail
final_df['Documented_Count'] = final_df['Documented_Count'].fillna(0).astype(int)
final_df['Documentation_Levels_Combined'] = final_df['Documentation_Levels_Combined'].fillna(0)
# Calculate the documentation percentage for each center
final_df['Percent_Documented'] = final_df['Documented_Count'] / final_df['Grand_Total_Count']
total_nbs_milestones = nbs_df.shape[0]
total_group = final_df['Documented_Count'].sum()
network_total = total_group / total_nbs_milestones
# Determine the network total values, then merge it with the rest of the centers
network_total_count = pd.DataFrame({
'Center': ["Network Total"],
'Grand_Total_Count' : [total_nbs_milestones],
'Documented_Count': [total_group],
'Percent_Documented': [network_total]
})
final_df = pd.concat([final_df, network_total_count], ignore_index=True)
if source_data_export_path:
final_df.to_csv(source_data_export_path, index=False)
net_total = final_df[final_df[col_neo_center].isin(['Network Total'])]['Percent_Documented'].iloc[0] #pyright: ignore
fig = px.bar(
final_df[~final_df[col_neo_center].isin(['Network Total'])],
x=col_neo_center,
y='Percent_Documented',
text='Percent_Documented',
color_discrete_sequence=['#71bf44']
)
# Network total
fig.add_hline(y=net_total,
line_dash="dash",
line_color="#004649",
annotation_text=f"Network Total: {net_total * 100:.1f}%",
annotation_position="top right",
annotation=dict(
xref="paper",
x=1.02,
xanchor="left",
)
)
fig.update_traces(
textposition='inside',
textfont_size=12,
texttemplate="%{text:.0%}"
)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Documented and Affirmation Missing Percentage',
yaxis_tickformat='.0%',
title=title,
height=700,
width=1500,
margin=dict(r=150)
)
return fig
def make_director_confirmed_graph(
nbs_df:pd.DataFrame,
title:str='Percentage of Director Confirmed NBS Attributions Per Center FY 25',
source_data_export_path:str="",
col_neo_center:str=NEOSERRA_COLUMNS.center,
col_neo_attribution_source:str=NEOSERRA_COLUMNS.milestone_attribution_source
) -> go.Figure:
"""
parameters:
nbs_df:pd.DataFrame - The new business starts analysis data
title:str - The title to place on the graph
source_data_export_path:str - The path + filename to save the intermediate calculation dataset to (if provided)
col_neo_center:str="Center" - The column in the dataset to consider the center column
col_neo_attribution_source:str - The column of the dataset that contains the attribution source for a milestone
returns: go.Figure - The generated graph figure
description:
Generates a bar graph displaying how many milestones from each center were director confirmed and not true confirmed milestones.
"""
# Determine how many total milestones there are
total_counts = nbs_df.groupby(col_neo_center).size()
# Count how many milestones were director confirmed
# director_counts = nbs_df[nbs_df[col_neo_attribution_source].str.lower().contains("director", na=False)] \
# .groupby(col_neo_center) \
# .size()
director_counts = nbs_df[nbs_df[col_neo_attribution_source].str.contains("director", case=False, na=False)].groupby(col_neo_center).size()
# Create a new dataframe with this data, both inputs are indexed by center so they will be joined automatically here
combined_df = pd.DataFrame({
'Total Count': total_counts,
'Director Count': director_counts
})
# fill missing data and calculate a rate
combined_df['Director Count'] = combined_df['Director Count'].fillna(0).astype(int)
combined_df['Percent Director'] = combined_df['Director Count'] / combined_df['Total Count']
combined_df = combined_df.reset_index()
if source_data_export_path:
combined_df.to_csv(source_data_export_path, index=False)
fig = px.bar(
combined_df,
x=col_neo_center,
y='Percent Director',
text='Percent Director',
color_discrete_sequence=['#ffba31']
)
fig.update_traces(
textposition='inside',
textfont_size=12,
texttemplate="%{text:.0%}"
)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Percent of Director Confirmed Attributions',
yaxis_tickformat='.0%',
title=title,
height=700,
width=1500
)
return fig

View File

@@ -0,0 +1,254 @@
# FILE: survey_analysis.py
# CREATED: 12/23/25
# AUTHOR: Vincent Allen
# CONTACT: vincent@vtallen.com valle276@live.kutztown.edu
# PURPOSE:
# Contains the functions used to generate the plotly graphs for the client satisfaction survey analysis.
# external libraries
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
#python modules
# Custom modules
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
def make_survey_response_count_graph(
survey_df: pd.DataFrame,
title: str = 'Client Satisfaction Survey Responses Per Center FY 25',
source_data_export_path: str = "",
col_neo_center: str = NEOSERRA_COLUMNS.center
) -> go.Figure:
"""
parameters:
survey_df:pd.DataFrame - The raw survey data
title:str - The title to place on the graph
source_data_export_path:str - If a csv path is provided, the intermediate dataset will be exported there
col_neo_center:str - The column of the dataset containing the center name
returns: go.Figure - The generated graph figure
description:
Generates a bar graph displaying the total count of survey responses received per center.
"""
total_responses = survey_df.groupby(col_neo_center).size()
total_responses = total_responses.reset_index(name='Responses') # pyright: ignore
if source_data_export_path:
total_responses.to_csv(source_data_export_path, index=False)
fig = px.bar(
total_responses,
x=col_neo_center,
y='Responses',
text='Responses',
height=500
)
# Add a total sum of responses
grand_total = total_responses['Responses'].sum()
fig.add_annotation(xref='paper', yref='paper',
x=0.0, y=1.03,
showarrow=False,
text=f"{grand_total} total responses")
fig.update_layout(
xaxis_title='Center',
yaxis_title='Survey Responses',
title=title,
height=700,
width=1500,
)
fig.update_traces(showlegend=False, marker_color="#71bf44")
return fig
def make_average_survey_score_graph(
survey_df: pd.DataFrame,
title: str = 'Average Score FY 25 - How likely is it that you would recommend the SBDC to a friend or colleague? (1-10 scale)',
source_data_export_path: str = "",
col_neo_center: str = NEOSERRA_COLUMNS.center,
col_score: str = NEOSERRA_COLUMNS.satisfaction_score
) -> go.Figure:
"""
parameters:
survey_df:pd.DataFrame - The raw survey data
title:str - The title to place on the graph
source_data_export_path:str - If a csv path is provided, the intermediate dataset will be exported there
col_neo_center:str - The column of the dataset containing the center name
col_score:str - The column containing the satisfaction score (1-10)
returns: go.Figure - The generated graph figure
description:
Generates a bar graph displaying the average satisfaction score (Question 1) for each center,
along with a network-wide average line.
"""
# Clean up the answers
local_df = survey_df.copy()
local_df[col_score] = [int(str(x)[:2]) if len(str(x)) > 2 else int(x) for x in local_df[col_score]]
average_q1_score = local_df.groupby(col_neo_center)[col_score].mean().reset_index()
network_wide_q1_score = local_df[col_score].mean()
if source_data_export_path:
average_q1_score.to_csv(source_data_export_path, index=False)
fig = px.bar(average_q1_score, height=500, x=col_neo_center, y=col_score, text=col_score)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Average',
title=title,
height=700,
width=1500,
)
# Add a network wide value
fig.add_hline(
y=network_wide_q1_score,
line_dash="dash",
line_color="#73e0c6",
annotation_text=f"Network Total: {network_wide_q1_score:.1f}",
annotation_position="top right",
annotation_y=9.5)
fig.update_traces(
showlegend=False,
marker_color="#197f60",
texttemplate='%{text:.1f}'
)
return fig
def make_responses_per_client_graph(
survey_df: pd.DataFrame,
client_list_df: pd.DataFrame,
title: str = 'Survey Responses Per 100 Clients Served FY 25',
source_data_export_path: str = "",
col_neo_center: str = NEOSERRA_COLUMNS.center
) -> go.Figure:
"""
parameters:
survey_df:pd.DataFrame - The raw survey data
client_list_df:pd.DataFrame - The dataset containing the list of clients served (NAICS client list)
title:str - The title to place on the graph
source_data_export_path:str - If a csv path is provided, the intermediate dataset will be exported there
col_neo_center:str - The column of the dataset containing the center name
returns: go.Figure - The generated graph figure
description:
Generates a bar graph displaying the number of survey responses received per 100 clients served
by combining the survey data with the provided client list data.
"""
total_responses = survey_df.groupby(col_neo_center).size()
total_responses = total_responses.reset_index(name='Responses') #pyright: ignore
# Aggregate client list
client_counts = client_list_df.groupby(col_neo_center).size().reset_index(name='Client Count') #pyright:ignore
total_responses = total_responses.merge(client_counts, on=col_neo_center, how='left')
total_responses['Per Client Served'] = total_responses['Responses'] / total_responses['Client Count']
display_df = total_responses.copy()
display_df['Per Client Served'] = display_df['Per Client Served'] * 100
if source_data_export_path:
display_df.to_csv(source_data_export_path, index=False)
fig = px.bar(display_df, x=col_neo_center, y='Per Client Served', text='Per Client Served', height=500)
fig.update_layout(
xaxis_title='Center',
yaxis_title='Survey Responses Per 100 Clients Served',
title=title,
height=700,
width=1500,
)
fig.update_traces(showlegend=False, marker_color="#71bf44", texttemplate="%{text:.1f}")
return fig
def make_nps_graph(
survey_df: pd.DataFrame,
title: str = "Net Promoter Score (NPS) By Center FY 25",
source_data_export_path: str = "",
col_neo_center: str = NEOSERRA_COLUMNS.center,
col_score: str = NEOSERRA_COLUMNS.satisfaction_score
) -> go.Figure:
"""
parameters:
survey_df:pd.DataFrame - The raw survey data
title:str - The title to place on the graph
source_data_export_path:str - If a csv path is provided, the intermediate dataset will be exported there
col_neo_center:str - The column of the dataset containing the center name
col_score:str - The column containing the satisfaction score (1-10)
returns: go.Figure - The generated graph figure
description:
Generates a bar graph displaying the Net Promoter Score (NPS) for each center.
Includes a line indicating the Network-wide NPS.
"""
# Clean up the answers
local_df = survey_df.copy()
local_df[col_score] = [int(str(x)[:2]) if len(str(x)) > 2 else int(x) for x in local_df[col_score]]
# Calculating the network wide NPS
total_detractors_count_net = local_df[local_df[col_score] <= 6].shape[0]
total_promoters_count_net = local_df[local_df[col_score] >= 9].shape[0]
total_responses_net = total_detractors_count_net + total_promoters_count_net
if total_responses_net > 0:
network_nps = ((total_promoters_count_net / total_responses_net) - (total_detractors_count_net / total_responses_net)) * 100
else:
network_nps = 0
# Calculate per center
center_group_df = local_df[[col_neo_center, col_score]].groupby(col_neo_center)
nps_df = pd.DataFrame({col_neo_center:[], "Detractors":[], "Promoters":[], "NPS":[]})
for name, group in center_group_df:
detractors_count = group[group[col_score] <= 6].shape[0]
promoters_count = group[group[col_score] >= 9].shape[0]
total = detractors_count + promoters_count
if total > 0:
nps = ((promoters_count / total) - (detractors_count / total)) * 100
else:
nps = 0
row = pd.DataFrame({col_neo_center:[name], "Detractors": [detractors_count], "Promoters": [promoters_count], "NPS": [nps]})
nps_df = pd.concat([nps_df, row], ignore_index=True)
if source_data_export_path:
nps_df.to_csv(source_data_export_path, index=False)
fig = px.bar(nps_df, x=col_neo_center, y='NPS', text='NPS', title=title, height=600, width=1250)
fig.update_traces(showlegend=False, marker_color="#73e0c6", texttemplate="%{text:.1f}")
fig.add_hline(
y=network_nps,
line_dash="dash",
line_color="#004649",
annotation_text=f"Network NPS: {network_nps:.1f}",
annotation_position="bottom right",
)
fig.add_annotation(xref='paper', yref='paper',
x=0.0, y=1.08,
showarrow=False,
text=f'NOTE: NPS is calculated as the difference between promoter responses (9 or 10) and the % of detractor responses (1-6).<br> Participents are responding to the question "How likely is it that you would recommend the SBDC to a friend or colleague? (1-10 scale)"',
align='left')
return fig