first commit

This commit is contained in:
2026-05-21 08:40:24 -04:00
commit b084545275
711 changed files with 3659856 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
from .make_milestone_analysis import *
__all__ = [
''
]

View File

@@ -0,0 +1,240 @@
# external libraries
import pandas as pd
import numpy as np
import plotly.express as px
# python modules
import argparse
import re
import os
import sys
import json
# custom modules
from milestone_attribution_graph_library_module import make_attribution_pie, make_attribution_grouped_chart #pyright:ignore
from milestone_attribution_dataset_module import tag_documentation_level, sanitize_nbs_data, sanitize_funding_data#pyright:ignore
from pasbdc_data_cleaning import clean_center_name # pyright:ignore
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
from shared_tools_module import csv_url_to_dataframe
import plotly.io as pio
# This passes the flags to the background browser managed by Plotly, for some reason I was getting some errors on the server I was running on,
# This fixed it
# --no-sandbox: Handles Ubuntu's strict security permissions
# --disable-audio: Prevents the browser from looking for sound drivers
pio.defaults.chrome_args = ["--no-sandbox", "--disable-audio", "--disable-gpu"]
def parse_args():
parser = argparse.ArgumentParser()
funding_data_group = parser.add_mutually_exclusive_group(required=True)
nbs_data_group = parser.add_mutually_exclusive_group(required=True)
funding_data_group.add_argument("--fundingcsv",
type=str,
help="The path to the exported funding milestone data from Neoserra.")
funding_data_group.add_argument("--fundingexportmodule", type=str)
parser.add_argument("--fundingcsvtag",
type=str,
required=False,
default='cleaned_funding_milestones',
help='The tag to place in the the cleaned funding milestone data filename')
nbs_data_group.add_argument("--nbscsv",
type=str,
help="The path to the exported new business starts milestone data from Neoserra")
nbs_data_group.add_argument("--nbsexportmodule", type=str)
parser.add_argument("--nbscsvtag",
type=str,
required=False,
default='cleaned_nbs_milestones',
help='The tag to place in the the cleaned NBS milestone data filename')
parser.add_argument("--fiscalyear",
type=str,
required=True,
help="The fiscal year that the souce data came from, used to add a tag to the graph title.")
parser.add_argument("--outpath",
type=str,
required=True,
help='The path to place the generated graph files into.')
parser.add_argument('--attributionpietag',
type=str,
required=False,
default="attribution_pie",
help="The tag to place in the exported pie chart filename.")
parser.add_argument('--groupedattributiontag',
required=False,
default="attribution_grouped",
help="The tag to place in the exported bar chart filename")
parser.add_argument("--mapping",
type=str,
required=False,
default="",
help="The path to a .json file to override the column name mappings used by this script")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
if args.mapping:
NEOSERRA_COLUMNS.apply_json_mapping(args.mapping)
OUT_COLUMNS.apply_json_mapping(args.mapping)
if not os.path.exists(args.outpath):
os.makedirs(args.outpath)
print("Created Output Directory!")
print("Loading and cleaning funding data...")
if args.fundingcsv:
funding_df = pd.read_csv(args.fundingcsv)
elif args.fundingexportmodule:
funding_df = csv_url_to_dataframe(args.fundingexportmodule)
else:
raise RuntimeError("No funding datasource configured in this script.")
# Filter for reportable records only.
# This will fail with a KeyError if the column is missing, as required.
funding_df = funding_df[funding_df[NEOSERRA_COLUMNS.reportable] == 1]
funding_df = sanitize_funding_data(
funding_df,
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
col_neo_center=NEOSERRA_COLUMNS.center
)
funding_df.to_csv(os.path.join(args.outpath, f'{args.fundingcsvtag}_{args.fiscalyear}.csv'))
unique_centers = funding_df[NEOSERRA_COLUMNS.center].unique()
for center in unique_centers:
# Strip out all of the non-safe characters for filenames
filename_center = re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "", center)
print(f"Processing center {center}...")
center_df = funding_df[funding_df[NEOSERRA_COLUMNS.center] == center]
# Funding Milestones Pie chart
print("\tGenerating attribution pie chart...")
pie_fig = make_attribution_pie(
center_df,
title=f"{center} Documented vs. Not Documented Funding Milestones {args.fiscalyear}",
date_note="1/27/26",
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
)
pie_fig.write_image(os.path.join(args.outpath, f'{filename_center}_funding_{args.attributionpietag}_{args.fiscalyear}.png'))
# Funding Milestones Bar Graph
print("\tGenerating attribution source chart...")
bar_fig = make_attribution_grouped_chart(
center_df,
title=f"{center} Attribution Source vs. Documentation Level For Funding Milestones {args.fiscalyear}",
)
bar_fig.write_image(os.path.join(args.outpath, f"{filename_center}_funding_{args.groupedattributiontag}_{args.fiscalyear}.png"))
# Now we will process the new business starts analysis
print("Loading and cleaning new business starts data...")
if args.nbscsv:
nbs_df = pd.read_csv(args.nbscsv, parse_dates=[NEOSERRA_COLUMNS.milestone_date, NEOSERRA_COLUMNS.attribution_date])
elif args.nbsexportmodule:
nbs_df = csv_url_to_dataframe(args.nbsexportmodule)
else:
raise RuntimeError("No NBS datasource configred for this script.")
# Filter for reportable records only.
# This will fail with a KeyError if the column is missing, as required.
nbs_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.reportable] == 1]
nbs_df = sanitize_nbs_data(
nbs_df,
col_neo_center=NEOSERRA_COLUMNS.center,
col_neo_client_id=NEOSERRA_COLUMNS.client_id,
col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date,
col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date,
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name,
col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
col_neo_reportable=NEOSERRA_COLUMNS.reportable,
business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val,
business_established_val=NEOSERRA_COLUMNS.business_established_val
)
nbs_df[NEOSERRA_COLUMNS.milestone_attribution_source] = nbs_df[NEOSERRA_COLUMNS.milestone_attribution_source].fillna("Blank").astype(str).replace(['', ' ', 'nan', 'NaN'], "Blank")
unique_centers = nbs_df[NEOSERRA_COLUMNS.center].unique()
nbs_df.to_csv(os.path.join(args.outpath, f"cleaned_nbs_milestones_{args.fiscalyear}.csv"))
for center in unique_centers:
# Strip out all of the non-safe characters for filenames
filename_center = re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "", center)
print(f"Processing center {center}...")
center_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.center] == center]
# NBS Milestones Pie chart
print("\tGenerating attribution pie chart...")
pie_fig = make_attribution_pie(
center_df,
title=f"{center} Documented vs. Not Documented New Business Start Milestones {args.fiscalyear}",
date_note="1/27/26",
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
)
pie_fig.write_image(os.path.join(args.outpath, f'{filename_center}_nbs_{args.attributionpietag}_{args.fiscalyear}.png'))
# NBS Milestones Bar Graph
print("\tGenerating attribution source chart...")
bar_fig = make_attribution_grouped_chart(
center_df,
title=f"{center} Attribution Source vs. Documentation Level For New Business Start Milestones {args.fiscalyear}",
)
bar_fig.write_image(os.path.join(args.outpath, f"{filename_center}_nbs_{args.groupedattributiontag}_{args.fiscalyear}.png"))
# Make NBS network wide charts
print("\tCreating network wide charts...")
pie_fig = make_attribution_pie(
nbs_df,
title=f"Network Wide Documented vs. Not Documented New Business Start Milestones {args.fiscalyear}",
date_note="1/27/26",
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
)
pie_fig.write_image(os.path.join(args.outpath, f'network_nbs_{args.attributionpietag}_{args.fiscalyear}.png'))
bar_fig = make_attribution_grouped_chart(
nbs_df,
title=f"Network Wide Attribution Source vs. Documentation Level For New Business Start Milestones {args.fiscalyear}",
)
bar_fig.write_image(os.path.join(args.outpath, f"network_nbs_{args.groupedattributiontag}_{args.fiscalyear}.png"))
# Make funding network wide charts
pie_fig = make_attribution_pie(
funding_df,
title=f"Network Wide Documented vs. Not Documented Funding Milestones {args.fiscalyear}",
date_note="1/27/26",
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
)
pie_fig.write_image(os.path.join(args.outpath, f'network_funding_{args.attributionpietag}_{args.fiscalyear}.png'))
# Funding Milestones Bar Graph
print("\tGenerating attribution source chart...")
bar_fig = make_attribution_grouped_chart(
funding_df,
title=f"Network Wide Attribution Source vs. Documentation Level For Funding Milestones {args.fiscalyear}",
)
bar_fig.write_image(os.path.join(args.outpath, f"network_funding_{args.groupedattributiontag}_{args.fiscalyear}.png"))

View File

@@ -0,0 +1,11 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "milestone_attribution_graph_export_module"
version = "0.1.0"
description = "Internal PASBDC graph making scripts used to generate figures for the milestone attribution analysis."
[tool.setuptools]
packages = ["milestone_attribution_graph_export_module"]