testing123/section_1_graph_export_module/funding_analysis_script.py

# FILE: pasbdc_funding_analysis_script.py
# CREATED: 12/26/25
# AUTHOR: Vincent Allen
# PURPOSE: Script to generate capital funding analysis graphs and datasets from prepared Neoserra data.

# Third party libraries
import pandas as pd
import sys
import os.path
import argparse
import json

# Custom modules
from section_1_graph_library_module import ( #pyright:ignore
    make_funding_attribution_network_wide,
    make_funding_attribution_rate_chart,
    make_theoretical_funding_attribution_rate_chart,
    make_funding_director_confirmed_graph,
)

from milestone_attribution_dataset_module import sanitize_funding_data
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
from shared_tools_module import csv_url_to_dataframe

def parse_args():
    parser = argparse.ArgumentParser(description="Generate Capital Funding Analysis Graphs")

    input_data_group = parser.add_mutually_exclusive_group(required=True)

    input_data_group.add_argument("--inputcsv",
                        type=str,
                        help="The path to the raw capital funding CSV dataset. Either a path to a CSV file is required OR a value must be provided for --exportmoduleurl")

    input_data_group.add_argument("--exportmoduleurl",
                                  type=str,
                                  help="The URL pointing to the configured Neoserra export module for the funding analysis data. Either the export module url is required, or a value must be provided for --inputcsv.")

    parser.add_argument("--outpath",
                        type=str,
                        required=True,
                        help="The base directory path to place generated files into.")

    parser.add_argument("--fiscalyear",
                        type=str,
                        required=True,
                        help="The fiscal year the input data comes from")

    parser.add_argument("--mapping",
                        type=str,
                        required=False,
                        help="Path to a JSON file to override default column names mappings.")

    # --- GRAPH 1: Network Wide Stacked Bar ---
    parser.add_argument("--netwidefilename",
                        type=str,
                        default="fundingattributionnetworkwide",
                        help="Filename for the network-wide attribution stacked bar chart.")

    parser.add_argument("--netwidetitle",
                        type=str,
                        default="Capital Funding Attributions Per Center",
                        help="Title for the network-wide attribution graph.")

    # --- GRAPH 2: Attribution Rate Chart ---
    parser.add_argument("--ratefilename",
                        type=str,
                        default="fundingattributionrate",
                        help="Filename for the attribution rate bar chart.")

    parser.add_argument("--ratedatafilename",
                        type=str,
                        default="funding_attribution_rate_data.csv",
                        help="Filename for the intermediate dataset used for the attribution rate chart.")

    # --- GRAPH 3: Theoretical Rate Chart ---
    parser.add_argument("--theoreticalfilename",
                        type=str,
                        default="theoreticalfundingattributionrate",
                        help="Filename for the theoretical attribution rate bar chart.")

    parser.add_argument("--theoreticaltitle",
                        type=str,
                        default="Documented Percentage if All Funding Milestones With an Attribution Source had an Affirmation",
                        help="Title for the theoretical attribution rate graph.")

    parser.add_argument("--theoreticaldatafilename",
                        type=str,
                        default="theoretical_funding_rate_data.csv",
                        help="Filename for the intermediate dataset used for the theoretical rate chart.")

    # --- GRAPH 4: Director Confirmed Chart ---
    parser.add_argument("--directorfilename",
                        type=str,
                        default="directorconfirmedfunding",
                        help="Filename for the director confirmed funding bar chart.")

    parser.add_argument("--directortitle",
                        type=str,
                        default="Percentage of Director Confirmed Capital Funding Attributions Per Center",
                        help="Title for the director confirmed graph.")

    parser.add_argument("--directordatafilename",
                        type=str,
                        default="director_confirmed_data.csv",
                        help="Filename for the intermediate dataset used for the director confirmed chart.")

    parser.add_argument("--report",
                        type=str,
                        default="fundinganalysis",
                        help="The report name to use in filenames so that the word generation script can use the image registry to find this report's images.")

    return parser.parse_args()

if __name__ == "__main__":
    args = parse_args()

    # Handle optional JSON mapping override
    if args.mapping:
        NEOSERRA_COLUMNS.apply_json_mapping(args.mapping)
        OUT_COLUMNS.apply_json_mapping(args.mapping)

    # Ensure output directory exists
    if not os.path.exists(args.outpath):
        try:
            os.makedirs(args.outpath)
        except OSError as e:
            print(f"Error creating output directory: {e}")
            sys.exit(1)

    if args.inputcsv:
        print(f"Loading input data from {args.inputcsv}...\n")
        try:
            funding_df = pd.read_csv(args.inputcsv)
        except Exception as e:
            print(f"Failed to read input CSV: {e}")
            sys.exit(1)
    elif args.exportmoduleurl:
        try:
            funding_df = csv_url_to_dataframe(args.exportmoduleurl)
        except Exception as e:
            print("Failed to fetch data from the export module.")
            print(f"got={e}")
            exit(1)

    # Filter for reportable records only.
    # This will fail with a KeyError if the column is missing, as required.
    funding_df = funding_df[funding_df[NEOSERRA_COLUMNS.reportable] == 1]

    funding_df = sanitize_funding_data(
        df=funding_df,
        col_neo_center=NEOSERRA_COLUMNS.center,
        col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
        col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
        col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level
    )

    funding_df.to_csv(os.path.join(args.outpath, f"cleaned_funding_{args.fiscalyear}.csv"), index=False)

    # 1. Network Wide Attribution
    print("Generating Network Wide Attribution Graph...\n")
    network_fig = make_funding_attribution_network_wide(
        funding_df,
        fiscal_year=args.fiscalyear,
        title=args.netwidetitle,
        col_neo_center=NEOSERRA_COLUMNS.center,
        col_documentation_level=OUT_COLUMNS.milestone_documentation_level
    )
    network_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.netwidefilename}_.png"))

    # 2. Attribution Rate Chart
    print("Generating Attribution Rate Chart and Dataset...\n")
    rate_fig = make_funding_attribution_rate_chart(
        funding_df,
        fiscal_year=args.fiscalyear,
        source_data_export_path=str(os.path.join(args.outpath, args.ratedatafilename)),
        documented_tag=OUT_COLUMNS.val_documented,
        col_neo_center=NEOSERRA_COLUMNS.center,
        col_documentation_level=OUT_COLUMNS.milestone_documentation_level
    )
    rate_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.ratefilename}_.png"))

    # 3. Theoretical Attribution Rate Chart
    print("Generating Theoretical Attribution Rate Chart and Dataset...\n")
    theoretical_fig = make_theoretical_funding_attribution_rate_chart(
        funding_df,
        title=args.theoreticaltitle,
        fiscal_year=args.fiscalyear,
        source_data_export_path=str(os.path.join(args.outpath, args.theoreticaldatafilename)),
        documented_tag=OUT_COLUMNS.val_documented,
        affirmation_missing_tag=OUT_COLUMNS.val_affirmation_missing,
        col_neo_center=NEOSERRA_COLUMNS.center,
        col_documentation_level=OUT_COLUMNS.milestone_documentation_level
    )
    theoretical_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.theoreticalfilename}_.png"))

    # 4. Director Confirmed Graph
    print("Generating Director Confirmed Graph and Dataset...\n")
    director_fig = make_funding_director_confirmed_graph(
        funding_df,
        fiscal_year=args.fiscalyear,
        title=args.directortitle,
        source_data_export_path=str(os.path.join(args.outpath, args.directordatafilename)),
        col_neo_center=NEOSERRA_COLUMNS.center,
        col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source
    )
    director_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.directorfilename}_.png"))

    print("DONE!")