# FILE: pasbdc_funding_analysis_script.py # CREATED: 12/26/25 # AUTHOR: Vincent Allen # PURPOSE: Script to generate capital funding analysis graphs and datasets from prepared Neoserra data. # Third party libraries import pandas as pd import sys import os.path import argparse import json # Custom modules from section_1_graph_library_module import ( #pyright:ignore make_funding_attribution_network_wide, make_funding_attribution_rate_chart, make_theoretical_funding_attribution_rate_chart, make_funding_director_confirmed_graph, ) from milestone_attribution_dataset_module import sanitize_funding_data from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS from shared_tools_module import csv_url_to_dataframe def parse_args(): parser = argparse.ArgumentParser(description="Generate Capital Funding Analysis Graphs") input_data_group = parser.add_mutually_exclusive_group(required=True) input_data_group.add_argument("--inputcsv", type=str, help="The path to the raw capital funding CSV dataset. Either a path to a CSV file is required OR a value must be provided for --exportmoduleurl") input_data_group.add_argument("--exportmoduleurl", type=str, help="The URL pointing to the configured Neoserra export module for the funding analysis data. Either the export module url is required, or a value must be provided for --inputcsv.") parser.add_argument("--outpath", type=str, required=True, help="The base directory path to place generated files into.") parser.add_argument("--fiscalyear", type=str, required=True, help="The fiscal year the input data comes from") parser.add_argument("--mapping", type=str, required=False, help="Path to a JSON file to override default column names mappings.") # --- GRAPH 1: Network Wide Stacked Bar --- parser.add_argument("--netwidefilename", type=str, default="fundingattributionnetworkwide", help="Filename for the network-wide attribution stacked bar chart.") parser.add_argument("--netwidetitle", type=str, default="Capital Funding Attributions Per Center", help="Title for the network-wide attribution graph.") # --- GRAPH 2: Attribution Rate Chart --- parser.add_argument("--ratefilename", type=str, default="fundingattributionrate", help="Filename for the attribution rate bar chart.") parser.add_argument("--ratedatafilename", type=str, default="funding_attribution_rate_data.csv", help="Filename for the intermediate dataset used for the attribution rate chart.") # --- GRAPH 3: Theoretical Rate Chart --- parser.add_argument("--theoreticalfilename", type=str, default="theoreticalfundingattributionrate", help="Filename for the theoretical attribution rate bar chart.") parser.add_argument("--theoreticaltitle", type=str, default="Documented Percentage if All Funding Milestones With an Attribution Source had an Affirmation", help="Title for the theoretical attribution rate graph.") parser.add_argument("--theoreticaldatafilename", type=str, default="theoretical_funding_rate_data.csv", help="Filename for the intermediate dataset used for the theoretical rate chart.") # --- GRAPH 4: Director Confirmed Chart --- parser.add_argument("--directorfilename", type=str, default="directorconfirmedfunding", help="Filename for the director confirmed funding bar chart.") parser.add_argument("--directortitle", type=str, default="Percentage of Director Confirmed Capital Funding Attributions Per Center", help="Title for the director confirmed graph.") parser.add_argument("--directordatafilename", type=str, default="director_confirmed_data.csv", help="Filename for the intermediate dataset used for the director confirmed chart.") parser.add_argument("--report", type=str, default="fundinganalysis", help="The report name to use in filenames so that the word generation script can use the image registry to find this report's images.") return parser.parse_args() if __name__ == "__main__": args = parse_args() # Handle optional JSON mapping override if args.mapping: NEOSERRA_COLUMNS.apply_json_mapping(args.mapping) OUT_COLUMNS.apply_json_mapping(args.mapping) # Ensure output directory exists if not os.path.exists(args.outpath): try: os.makedirs(args.outpath) except OSError as e: print(f"Error creating output directory: {e}") sys.exit(1) if args.inputcsv: print(f"Loading input data from {args.inputcsv}...\n") try: funding_df = pd.read_csv(args.inputcsv) except Exception as e: print(f"Failed to read input CSV: {e}") sys.exit(1) elif args.exportmoduleurl: try: funding_df = csv_url_to_dataframe(args.exportmoduleurl) except Exception as e: print("Failed to fetch data from the export module.") print(f"got={e}") exit(1) # Filter for reportable records only. # This will fail with a KeyError if the column is missing, as required. funding_df = funding_df[funding_df[NEOSERRA_COLUMNS.reportable] == 1] funding_df = sanitize_funding_data( df=funding_df, col_neo_center=NEOSERRA_COLUMNS.center, col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source, col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation, col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level ) funding_df.to_csv(os.path.join(args.outpath, f"cleaned_funding_{args.fiscalyear}.csv"), index=False) # 1. Network Wide Attribution print("Generating Network Wide Attribution Graph...\n") network_fig = make_funding_attribution_network_wide( funding_df, fiscal_year=args.fiscalyear, title=args.netwidetitle, col_neo_center=NEOSERRA_COLUMNS.center, col_documentation_level=OUT_COLUMNS.milestone_documentation_level ) network_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.netwidefilename}_.png")) # 2. Attribution Rate Chart print("Generating Attribution Rate Chart and Dataset...\n") rate_fig = make_funding_attribution_rate_chart( funding_df, fiscal_year=args.fiscalyear, source_data_export_path=str(os.path.join(args.outpath, args.ratedatafilename)), documented_tag=OUT_COLUMNS.val_documented, col_neo_center=NEOSERRA_COLUMNS.center, col_documentation_level=OUT_COLUMNS.milestone_documentation_level ) rate_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.ratefilename}_.png")) # 3. Theoretical Attribution Rate Chart print("Generating Theoretical Attribution Rate Chart and Dataset...\n") theoretical_fig = make_theoretical_funding_attribution_rate_chart( funding_df, title=args.theoreticaltitle, fiscal_year=args.fiscalyear, source_data_export_path=str(os.path.join(args.outpath, args.theoreticaldatafilename)), documented_tag=OUT_COLUMNS.val_documented, affirmation_missing_tag=OUT_COLUMNS.val_affirmation_missing, col_neo_center=NEOSERRA_COLUMNS.center, col_documentation_level=OUT_COLUMNS.milestone_documentation_level ) theoretical_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.theoreticalfilename}_.png")) # 4. Director Confirmed Graph print("Generating Director Confirmed Graph and Dataset...\n") director_fig = make_funding_director_confirmed_graph( funding_df, fiscal_year=args.fiscalyear, title=args.directortitle, source_data_export_path=str(os.path.join(args.outpath, args.directordatafilename)), col_neo_center=NEOSERRA_COLUMNS.center, col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source ) director_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.directorfilename}_.png")) print("DONE!")