first commit

2026-05-21 08:40:24 -04:00
commit b084545275
711 changed files with 3659856 additions and 0 deletions
--- a/milestone_attribution_graph_export_module/milestone_attribution_graph_export_module/init.py
+++ b/milestone_attribution_graph_export_module/milestone_attribution_graph_export_module/init.py
@@ -0,0 +1,5 @@
+from .make_milestone_analysis import *
+
+__all__ = [
+    ''
+]
--- a/milestone_attribution_graph_export_module/milestone_attribution_graph_export_module/make_milestone_analysis.py
+++ b/milestone_attribution_graph_export_module/milestone_attribution_graph_export_module/make_milestone_analysis.py
@@ -0,0 +1,240 @@
+# external libraries
+import pandas as pd
+import numpy as np
+import plotly.express as px
+
+# python modules
+import argparse
+import re
+import os
+import sys
+import json
+
+# custom modules
+from milestone_attribution_graph_library_module import make_attribution_pie, make_attribution_grouped_chart #pyright:ignore
+from milestone_attribution_dataset_module import tag_documentation_level, sanitize_nbs_data, sanitize_funding_data#pyright:ignore
+from pasbdc_data_cleaning import clean_center_name # pyright:ignore
+from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
+from shared_tools_module import csv_url_to_dataframe
+
+import plotly.io as pio
+
+# This passes the flags to the background browser managed by Plotly, for some reason I was getting some errors on the server I was running on,
+# This fixed it
+# --no-sandbox: Handles Ubuntu's strict security permissions
+# --disable-audio: Prevents the browser from looking for sound drivers
+pio.defaults.chrome_args = ["--no-sandbox", "--disable-audio", "--disable-gpu"]
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+
+    funding_data_group = parser.add_mutually_exclusive_group(required=True)
+    nbs_data_group = parser.add_mutually_exclusive_group(required=True)
+
+    funding_data_group.add_argument("--fundingcsv",
+                        type=str,
+                        help="The path to the exported funding milestone data from Neoserra.")
+    funding_data_group.add_argument("--fundingexportmodule", type=str)
+
+
+    parser.add_argument("--fundingcsvtag",
+                        type=str,
+                        required=False,
+                        default='cleaned_funding_milestones',
+                        help='The tag to place in the the cleaned funding milestone data filename')
+
+    nbs_data_group.add_argument("--nbscsv",
+                        type=str,
+                        help="The path to the exported new business starts milestone data from Neoserra")
+    nbs_data_group.add_argument("--nbsexportmodule", type=str)
+
+    parser.add_argument("--nbscsvtag",
+                        type=str,
+                        required=False,
+                        default='cleaned_nbs_milestones',
+                        help='The tag to place in the the cleaned NBS milestone data filename')
+
+    parser.add_argument("--fiscalyear",
+                        type=str,
+                        required=True,
+                        help="The fiscal year that the souce data came from, used to add a tag to the graph title.")
+
+    parser.add_argument("--outpath",
+                        type=str,
+                        required=True,
+                        help='The path to place the generated graph files into.')
+
+    parser.add_argument('--attributionpietag',
+                        type=str,
+                        required=False,
+                        default="attribution_pie",
+                        help="The tag to place in the exported pie chart filename.")
+
+    parser.add_argument('--groupedattributiontag',
+                        required=False,
+                        default="attribution_grouped",
+                        help="The tag to place in the exported bar chart filename")
+
+    parser.add_argument("--mapping",
+                        type=str,
+                        required=False,
+                        default="",
+                        help="The path to a .json file to override the column name mappings used by this script")
+
+
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    if args.mapping:
+        NEOSERRA_COLUMNS.apply_json_mapping(args.mapping)
+        OUT_COLUMNS.apply_json_mapping(args.mapping)
+
+    if not os.path.exists(args.outpath):
+        os.makedirs(args.outpath)
+        print("Created Output Directory!")
+
+    print("Loading and cleaning funding data...")
+
+    if args.fundingcsv:
+        funding_df = pd.read_csv(args.fundingcsv)
+    elif args.fundingexportmodule:
+        funding_df = csv_url_to_dataframe(args.fundingexportmodule)
+    else:
+        raise RuntimeError("No funding datasource configured in this script.")
+
+    # Filter for reportable records only.
+    # This will fail with a KeyError if the column is missing, as required.
+    funding_df = funding_df[funding_df[NEOSERRA_COLUMNS.reportable] == 1]
+
+    funding_df = sanitize_funding_data(
+        funding_df,
+        col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
+        col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
+        col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
+        col_neo_center=NEOSERRA_COLUMNS.center
+    )
+
+    funding_df.to_csv(os.path.join(args.outpath, f'{args.fundingcsvtag}_{args.fiscalyear}.csv'))
+
+    unique_centers = funding_df[NEOSERRA_COLUMNS.center].unique()
+    for center in unique_centers:
+        # Strip out all of the non-safe characters for filenames
+        filename_center = re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "", center)
+
+        print(f"Processing center {center}...")
+        center_df = funding_df[funding_df[NEOSERRA_COLUMNS.center] == center]
+
+        # Funding Milestones Pie chart
+        print("\tGenerating attribution pie chart...")
+        pie_fig = make_attribution_pie(
+            center_df,
+            title=f"{center} Documented vs. Not Documented Funding Milestones {args.fiscalyear}",
+            date_note="1/27/26",
+            col_documentation_level=OUT_COLUMNS.milestone_documentation_level
+        )
+        pie_fig.write_image(os.path.join(args.outpath, f'{filename_center}_funding_{args.attributionpietag}_{args.fiscalyear}.png'))
+
+        # Funding Milestones Bar Graph
+        print("\tGenerating attribution source chart...")
+        bar_fig = make_attribution_grouped_chart(
+            center_df,
+            title=f"{center} Attribution Source vs. Documentation Level For Funding Milestones {args.fiscalyear}",
+        )
+        bar_fig.write_image(os.path.join(args.outpath, f"{filename_center}_funding_{args.groupedattributiontag}_{args.fiscalyear}.png"))
+
+
+    # Now we will process the new business starts analysis
+    print("Loading and cleaning new business starts data...")
+
+    if args.nbscsv:
+        nbs_df = pd.read_csv(args.nbscsv, parse_dates=[NEOSERRA_COLUMNS.milestone_date, NEOSERRA_COLUMNS.attribution_date])
+    elif args.nbsexportmodule:
+        nbs_df = csv_url_to_dataframe(args.nbsexportmodule)
+    else:
+        raise RuntimeError("No NBS datasource configred for this script.")
+
+    # Filter for reportable records only.
+    # This will fail with a KeyError if the column is missing, as required.
+    nbs_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.reportable] == 1]
+
+    nbs_df = sanitize_nbs_data(
+        nbs_df,
+        col_neo_center=NEOSERRA_COLUMNS.center,
+        col_neo_client_id=NEOSERRA_COLUMNS.client_id,
+        col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date,
+        col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date,
+        col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
+        col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
+        col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name,
+        col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
+        col_neo_reportable=NEOSERRA_COLUMNS.reportable,
+        business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val,
+        business_established_val=NEOSERRA_COLUMNS.business_established_val
+    )
+
+    nbs_df[NEOSERRA_COLUMNS.milestone_attribution_source] = nbs_df[NEOSERRA_COLUMNS.milestone_attribution_source].fillna("Blank").astype(str).replace(['', ' ', 'nan', 'NaN'], "Blank")
+
+    unique_centers = nbs_df[NEOSERRA_COLUMNS.center].unique()
+
+    nbs_df.to_csv(os.path.join(args.outpath, f"cleaned_nbs_milestones_{args.fiscalyear}.csv"))
+
+    for center in unique_centers:
+        # Strip out all of the non-safe characters for filenames
+        filename_center = re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "", center)
+
+        print(f"Processing center {center}...")
+        center_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.center] == center]
+
+        # NBS Milestones Pie chart
+        print("\tGenerating attribution pie chart...")
+        pie_fig = make_attribution_pie(
+            center_df,
+            title=f"{center} Documented vs. Not Documented New Business Start Milestones {args.fiscalyear}",
+            date_note="1/27/26",
+            col_documentation_level=OUT_COLUMNS.milestone_documentation_level
+        )
+        pie_fig.write_image(os.path.join(args.outpath, f'{filename_center}_nbs_{args.attributionpietag}_{args.fiscalyear}.png'))
+
+        # NBS Milestones Bar Graph
+        print("\tGenerating attribution source chart...")
+        bar_fig = make_attribution_grouped_chart(
+            center_df,
+            title=f"{center} Attribution Source vs. Documentation Level For New Business Start Milestones {args.fiscalyear}",
+        )
+        bar_fig.write_image(os.path.join(args.outpath, f"{filename_center}_nbs_{args.groupedattributiontag}_{args.fiscalyear}.png"))
+
+    # Make NBS network wide charts
+    print("\tCreating network wide charts...")
+
+    pie_fig = make_attribution_pie(
+        nbs_df,
+        title=f"Network Wide Documented vs. Not Documented New Business Start Milestones {args.fiscalyear}",
+        date_note="1/27/26",
+        col_documentation_level=OUT_COLUMNS.milestone_documentation_level
+    )
+    pie_fig.write_image(os.path.join(args.outpath, f'network_nbs_{args.attributionpietag}_{args.fiscalyear}.png'))
+
+    bar_fig = make_attribution_grouped_chart(
+        nbs_df,
+        title=f"Network Wide Attribution Source vs. Documentation Level For New Business Start Milestones {args.fiscalyear}",
+    )
+    bar_fig.write_image(os.path.join(args.outpath, f"network_nbs_{args.groupedattributiontag}_{args.fiscalyear}.png"))
+
+    # Make funding network wide charts
+    pie_fig = make_attribution_pie(
+        funding_df,
+        title=f"Network Wide Documented vs. Not Documented Funding Milestones {args.fiscalyear}",
+        date_note="1/27/26",
+        col_documentation_level=OUT_COLUMNS.milestone_documentation_level
+    )
+    pie_fig.write_image(os.path.join(args.outpath, f'network_funding_{args.attributionpietag}_{args.fiscalyear}.png'))
+
+    # Funding Milestones Bar Graph
+    print("\tGenerating attribution source chart...")
+    bar_fig = make_attribution_grouped_chart(
+        funding_df,
+        title=f"Network Wide Attribution Source vs. Documentation Level For Funding Milestones {args.fiscalyear}",
+    )
+    bar_fig.write_image(os.path.join(args.outpath, f"network_funding_{args.groupedattributiontag}_{args.fiscalyear}.png"))
--- a/milestone_attribution_graph_export_module/pyproject.toml
+++ b/milestone_attribution_graph_export_module/pyproject.toml
@@ -0,0 +1,11 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "milestone_attribution_graph_export_module"
+version = "0.1.0"
+description = "Internal PASBDC graph making scripts used to generate figures for the milestone attribution analysis."
+
+[tool.setuptools]
+packages = ["milestone_attribution_graph_export_module"]