first commit
This commit is contained in:
225
section_1_graph_export_module/nbs_analysis_script.py
Normal file
225
section_1_graph_export_module/nbs_analysis_script.py
Normal file
@@ -0,0 +1,225 @@
|
||||
# FILE: nbs_analysis_script.py
|
||||
# CREATED: 12/26/25
|
||||
# AUTHOR: Vincent Allen
|
||||
# PURPOSE: Script to generate New Business Starts (NBS) analysis graphs and datasets from prepared Neoserra data.
|
||||
|
||||
# Third party libraries
|
||||
import pandas as pd
|
||||
import sys
|
||||
import os.path
|
||||
import argparse
|
||||
import json
|
||||
|
||||
# Custom modules
|
||||
# Importing the functions from the library code provided
|
||||
from section_1_graph_library_module import ( # pyright:ignore
|
||||
make_nbs_attribution_network_wide,
|
||||
make_attribution_rate_chart,
|
||||
make_theoretical_attribution_rate_chart,
|
||||
make_director_confirmed_graph
|
||||
)
|
||||
|
||||
from milestone_attribution_dataset_module import sanitize_nbs_data
|
||||
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
|
||||
|
||||
from shared_tools_module import csv_url_to_dataframe
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Generate New Business Starts (NBS) Analysis Graphs")
|
||||
|
||||
dataset_group = parser.add_mutually_exclusive_group(required=True)
|
||||
|
||||
dataset_group.add_argument("--inputcsv",
|
||||
type=str,
|
||||
help="The path to the raw NBS analysis CSV dataset.")
|
||||
|
||||
dataset_group.add_argument("--exportmoduleurl",
|
||||
type=str,
|
||||
help="The url to the configured export module for the NBS milestones data in Neoserra.")
|
||||
|
||||
parser.add_argument("--outpath",
|
||||
type=str,
|
||||
required=True,
|
||||
help="The base directory path to place generated files into.")
|
||||
|
||||
parser.add_argument("--fiscalyear",
|
||||
required=True,
|
||||
type=str,
|
||||
help="The fiscal year tag to place at the end of graph titles.")
|
||||
|
||||
parser.add_argument("--mapping",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Path to a JSON file to override default column names mappings.")
|
||||
|
||||
# --- GRAPH 1: Network Wide Stacked Bar ---
|
||||
parser.add_argument("--netwidefilename",
|
||||
type=str,
|
||||
default="nbsattributionnetworkwide",
|
||||
help="Filename for the network-wide attribution stacked bar chart.")
|
||||
|
||||
parser.add_argument("--netwidetitle",
|
||||
type=str,
|
||||
default="New Business Start Attributions Per Center FY 25",
|
||||
help="Title for the network-wide attribution graph.")
|
||||
|
||||
# --- GRAPH 2: Attribution Rate Chart ---
|
||||
parser.add_argument("--ratefilename",
|
||||
type=str,
|
||||
default="nbsattributionrate",
|
||||
help="Filename for the attribution rate bar chart.")
|
||||
|
||||
parser.add_argument("--ratedatafilename",
|
||||
type=str,
|
||||
default="nbs_attribution_rate_data.csv",
|
||||
help="Filename for the intermediate dataset used for the attribution rate chart.")
|
||||
|
||||
# --- GRAPH 3: Theoretical Rate Chart ---
|
||||
parser.add_argument("--theoreticalfilename",
|
||||
type=str,
|
||||
default="theoreticalnbsattributionrate",
|
||||
help="Filename for the theoretical attribution rate bar chart.")
|
||||
|
||||
parser.add_argument("--theoreticaltitle",
|
||||
type=str,
|
||||
default="Documented Percentage if All NBS Milestones With an Attribution Source had an Affirmation FY 25",
|
||||
help="Title for the theoretical attribution rate graph.")
|
||||
|
||||
parser.add_argument("--theoreticaldatafilename",
|
||||
type=str,
|
||||
default="theoretical_nbs_rate_data.csv",
|
||||
help="Filename for the intermediate dataset used for the theoretical rate chart.")
|
||||
|
||||
# --- GRAPH 4: Director Confirmed Chart ---
|
||||
parser.add_argument("--directorfilename",
|
||||
type=str,
|
||||
default="directorconfirmednbs",
|
||||
help="Filename for the director confirmed NBS bar chart.")
|
||||
|
||||
parser.add_argument("--directortitle",
|
||||
type=str,
|
||||
default="Percentage of Director Confirmed NBS Attributions Per Center FY 25",
|
||||
help="Title for the director confirmed graph.")
|
||||
|
||||
parser.add_argument("--directordatafilename",
|
||||
type=str,
|
||||
default="director_confirmed_nbs_data.csv",
|
||||
help="Filename for the intermediate dataset used for the director confirmed chart.")
|
||||
|
||||
parser.add_argument("--report",
|
||||
type=str,
|
||||
required=False,
|
||||
default="nbsanalysis",
|
||||
help="The prefix used to name report files such that the word generation scripts can find them with the image registry")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
# Handle optional JSON mapping override
|
||||
if args.mapping:
|
||||
NEOSERRA_COLUMNS.apply_json_mapping(args.mapping)
|
||||
OUT_COLUMNS.apply_json_mapping(args.mapping)
|
||||
|
||||
# Ensure output directory exists
|
||||
if not os.path.exists(args.outpath):
|
||||
try:
|
||||
os.makedirs(args.outpath)
|
||||
except OSError as e:
|
||||
print(f"Error creating output directory: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Loading input data from {args.inputcsv}...\n")
|
||||
if args.inputcsv:
|
||||
try:
|
||||
nbs_df = pd.read_csv(args.inputcsv)
|
||||
except Exception as e:
|
||||
print(f"Failed to read input CSV: {e}")
|
||||
sys.exit(1)
|
||||
elif args.exportmoduleurl:
|
||||
try:
|
||||
nbs_df = csv_url_to_dataframe(args.exportmoduleurl)
|
||||
except Exception as e:
|
||||
print("Failed to grab the csv data from the Neoserra export module")
|
||||
print(f'Got={e}')
|
||||
else:
|
||||
raise RuntimeError("No input data source was defined, this should not be possible unless you have changed the code")
|
||||
|
||||
# Filter for reportable records only.
|
||||
# This will fail with a KeyError if the column is missing, as required.
|
||||
nbs_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.reportable] == 1]
|
||||
|
||||
# Do the data cleaning on the dataset
|
||||
nbs_df = sanitize_nbs_data(
|
||||
nbs_df,
|
||||
col_neo_center=NEOSERRA_COLUMNS.center,
|
||||
col_neo_client_id=NEOSERRA_COLUMNS.client_id,
|
||||
col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date,
|
||||
col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date,
|
||||
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
|
||||
col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
|
||||
col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name,
|
||||
col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
|
||||
col_neo_reportable=NEOSERRA_COLUMNS.reportable,
|
||||
business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val,
|
||||
business_established_val=NEOSERRA_COLUMNS.business_established_val
|
||||
)
|
||||
|
||||
"""
|
||||
tag_documentation_level(
|
||||
nbs_df,
|
||||
col_neo_attribution_source=active_config["col_neo_attribution_source"],
|
||||
col_neo_affirmation=active_config["col_neo_affirmation"],
|
||||
col_out_documentation_level=active_config["col_out_documentation_level"]
|
||||
)
|
||||
"""
|
||||
nbs_df.to_csv(os.path.join(args.outpath, f"cleaned_nbs_dataset_{args.fiscalyear}.csv"))
|
||||
|
||||
# 1. Network Wide Attribution
|
||||
print("Generating Network Wide Attribution Graph...\n")
|
||||
network_fig = make_nbs_attribution_network_wide(
|
||||
nbs_df,
|
||||
title=args.netwidetitle,
|
||||
col_neo_center=NEOSERRA_COLUMNS.center,
|
||||
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
|
||||
)
|
||||
network_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.netwidefilename}_.png"))
|
||||
|
||||
# 2. Attribution Rate Chart
|
||||
print("Generating Attribution Rate Chart and Dataset...\n")
|
||||
rate_fig = make_attribution_rate_chart(
|
||||
nbs_df,
|
||||
fiscalyear=args.fiscalyear,
|
||||
source_data_export_path=str(os.path.join(args.outpath, args.ratedatafilename)),
|
||||
documented_tag=OUT_COLUMNS.val_documented,
|
||||
col_neo_center=NEOSERRA_COLUMNS.center,
|
||||
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
|
||||
)
|
||||
rate_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.ratefilename}_.png"))
|
||||
|
||||
# 3. Theoretical Attribution Rate Chart
|
||||
print("Generating Theoretical Attribution Rate Chart and Dataset...\n")
|
||||
theoretical_fig = make_theoretical_attribution_rate_chart(
|
||||
nbs_df,
|
||||
title=args.theoreticaltitle,
|
||||
source_data_export_path=str(os.path.join(args.outpath, args.theoreticaldatafilename)),
|
||||
documented_tag=OUT_COLUMNS.val_documented,
|
||||
affirmation_missing_tag=OUT_COLUMNS.val_affirmation_missing,
|
||||
col_neo_center=NEOSERRA_COLUMNS.center,
|
||||
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
|
||||
)
|
||||
theoretical_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.theoreticalfilename}_.png"))
|
||||
|
||||
# 4. Director Confirmed Graph
|
||||
print("Generating Director Confirmed Graph and Dataset...\n")
|
||||
director_fig = make_director_confirmed_graph(
|
||||
nbs_df,
|
||||
title=args.directortitle,
|
||||
source_data_export_path=str(os.path.join(args.outpath, args.directordatafilename)),
|
||||
col_neo_center=NEOSERRA_COLUMNS.center,
|
||||
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source
|
||||
)
|
||||
director_fig.write_image(os.path.join(args.outpath, f"{args.report}_{args.directorfilename}_.png"))
|
||||
|
||||
print("DONE!")
|
||||
Reference in New Issue
Block a user