# FILE: nbs_analysis_script.py # CREATED: 12/26/25 # AUTHOR: Vincent Allen # PURPOSE: Script to generate New Business Starts (NBS) analysis graphs and datasets from prepared Neoserra data. # Third party libraries import pandas as pd import sys import os.path import argparse import json # Custom modules # Importing the functions from the library code provided from section_1_graph_library_module import ( # pyright:ignore make_nbs_attribution_network_wide, make_attribution_rate_chart, make_theoretical_attribution_rate_chart, make_director_confirmed_graph ) from milestone_attribution_dataset_module import sanitize_nbs_data from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS from shared_tools_module import csv_url_to_dataframe def parse_args(): parser = argparse.ArgumentParser(description="Generate New Business Starts (NBS) Analysis Graphs") dataset_group = parser.add_mutually_exclusive_group(required=True) dataset_group.add_argument("--inputcsv", type=str, help="The path to the raw NBS analysis CSV dataset.") dataset_group.add_argument("--exportmoduleurl", type=str, help="The url to the configured export module for the NBS milestones data in Neoserra.") parser.add_argument("--outpath", type=str, required=True, help="The base directory path to place generated files into.") parser.add_argument("--fiscalyear", required=True, type=str, help="The fiscal year tag to place at the end of graph titles.") parser.add_argument("--mapping", type=str, required=False, help="Path to a JSON file to override default column names mappings.") # --- GRAPH 1: Network Wide Stacked Bar --- parser.add_argument("--netwidefilename", type=str, default="nbsattributionnetworkwide", help="Filename for the network-wide attribution stacked bar chart.") parser.add_argument("--netwidetitle", type=str, default="New Business Start Attributions Per Center FY 25", help="Title for the network-wide attribution graph.") # --- GRAPH 2: Attribution Rate Chart --- parser.add_argument("--ratefilename", type=str, default="nbsattributionrate", help="Filename for the attribution rate bar chart.") parser.add_argument("--ratedatafilename", type=str, default="nbs_attribution_rate_data.csv", help="Filename for the intermediate dataset used for the attribution rate chart.") # --- GRAPH 3: Theoretical Rate Chart --- parser.add_argument("--theoreticalfilename", type=str, default="theoreticalnbsattributionrate", help="Filename for the theoretical attribution rate bar chart.") parser.add_argument("--theoreticaltitle", type=str, default="Documented Percentage if All NBS Milestones With an Attribution Source had an Affirmation FY 25", help="Title for the theoretical attribution rate graph.") parser.add_argument("--theoreticaldatafilename", type=str, default="theoretical_nbs_rate_data.csv", help="Filename for the intermediate dataset used for the theoretical rate chart.") # --- GRAPH 4: Director Confirmed Chart --- parser.add_argument("--directorfilename", type=str, default="directorconfirmednbs", help="Filename for the director confirmed NBS bar chart.") parser.add_argument("--directortitle", type=str, default="Percentage of Director Confirmed NBS Attributions Per Center FY 25", help="Title for the director confirmed graph.") parser.add_argument("--directordatafilename", type=str, default="director_confirmed_nbs_data.csv", help="Filename for the intermediate dataset used for the director confirmed chart.") parser.add_argument("--report", type=str, required=False, default="nbsanalysis", help="The prefix used to name report files such that the word generation scripts can find them with the image registry") return parser.parse_args() if __name__ == "__main__": args = parse_args() # Handle optional JSON mapping override if args.mapping: NEOSERRA_COLUMNS.apply_json_mapping(args.mapping) OUT_COLUMNS.apply_json_mapping(args.mapping) # Ensure output directory exists if not os.path.exists(args.outpath): try: os.makedirs(args.outpath) except OSError as e: print(f"Error creating output directory: {e}") sys.exit(1) print(f"Loading input data from {args.inputcsv}...\n") if args.inputcsv: try: nbs_df = pd.read_csv(args.inputcsv) except Exception as e: print(f"Failed to read input CSV: {e}") sys.exit(1) elif args.exportmoduleurl: try: nbs_df = csv_url_to_dataframe(args.exportmoduleurl) except Exception as e: print("Failed to grab the csv data from the Neoserra export module") print(f'Got={e}') else: raise RuntimeError("No input data source was defined, this should not be possible unless you have changed the code") # Filter for reportable records only. # This will fail with a KeyError if the column is missing, as required. nbs_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.reportable] == 1] # Do the data cleaning on the dataset nbs_df = sanitize_nbs_data( nbs_df, col_neo_center=NEOSERRA_COLUMNS.center, col_neo_client_id=NEOSERRA_COLUMNS.client_id, col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date, col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date, col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source, col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation, col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name, col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level, col_neo_reportable=NEOSERRA_COLUMNS.reportable, business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val, business_established_val=NEOSERRA_COLUMNS.business_established_val ) """ tag_documentation_level( nbs_df, col_neo_attribution_source=active_config["col_neo_attribution_source"], col_neo_affirmation=active_config["col_neo_affirmation"], col_out_documentation_level=active_config["col_out_documentation_level"] ) """ nbs_df.to_csv(os.path.join(args.outpath, f"cleaned_nbs_dataset_{args.fiscalyear}.csv")) # 1. Network Wide Attribution print("Generating Network Wide Attribution Graph...\n") network_fig = make_nbs_attribution_network_wide( nbs_df, title=args.netwidetitle, col_neo_center=NEOSERRA_COLUMNS.center, col_documentation_level=OUT_COLUMNS.milestone_documentation_level ) network_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.netwidefilename}_.png")) # 2. Attribution Rate Chart print("Generating Attribution Rate Chart and Dataset...\n") rate_fig = make_attribution_rate_chart( nbs_df, fiscalyear=args.fiscalyear, source_data_export_path=str(os.path.join(args.outpath, args.ratedatafilename)), documented_tag=OUT_COLUMNS.val_documented, col_neo_center=NEOSERRA_COLUMNS.center, col_documentation_level=OUT_COLUMNS.milestone_documentation_level ) rate_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.ratefilename}_.png")) # 3. Theoretical Attribution Rate Chart print("Generating Theoretical Attribution Rate Chart and Dataset...\n") theoretical_fig = make_theoretical_attribution_rate_chart( nbs_df, title=args.theoreticaltitle, source_data_export_path=str(os.path.join(args.outpath, args.theoreticaldatafilename)), documented_tag=OUT_COLUMNS.val_documented, affirmation_missing_tag=OUT_COLUMNS.val_affirmation_missing, col_neo_center=NEOSERRA_COLUMNS.center, col_documentation_level=OUT_COLUMNS.milestone_documentation_level ) theoretical_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.theoreticalfilename}_.png")) # 4. Director Confirmed Graph print("Generating Director Confirmed Graph and Dataset...\n") director_fig = make_director_confirmed_graph( nbs_df, title=args.directortitle, source_data_export_path=str(os.path.join(args.outpath, args.directordatafilename)), col_neo_center=NEOSERRA_COLUMNS.center, col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source ) director_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.directorfilename}_.png")) print("DONE!")