first commit
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
from .make_milestone_analysis import *
|
||||
|
||||
__all__ = [
|
||||
''
|
||||
]
|
||||
@@ -0,0 +1,240 @@
|
||||
# external libraries
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import plotly.express as px
|
||||
|
||||
# python modules
|
||||
import argparse
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
# custom modules
|
||||
from milestone_attribution_graph_library_module import make_attribution_pie, make_attribution_grouped_chart #pyright:ignore
|
||||
from milestone_attribution_dataset_module import tag_documentation_level, sanitize_nbs_data, sanitize_funding_data#pyright:ignore
|
||||
from pasbdc_data_cleaning import clean_center_name # pyright:ignore
|
||||
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
|
||||
from shared_tools_module import csv_url_to_dataframe
|
||||
|
||||
import plotly.io as pio
|
||||
|
||||
# This passes the flags to the background browser managed by Plotly, for some reason I was getting some errors on the server I was running on,
|
||||
# This fixed it
|
||||
# --no-sandbox: Handles Ubuntu's strict security permissions
|
||||
# --disable-audio: Prevents the browser from looking for sound drivers
|
||||
pio.defaults.chrome_args = ["--no-sandbox", "--disable-audio", "--disable-gpu"]
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
funding_data_group = parser.add_mutually_exclusive_group(required=True)
|
||||
nbs_data_group = parser.add_mutually_exclusive_group(required=True)
|
||||
|
||||
funding_data_group.add_argument("--fundingcsv",
|
||||
type=str,
|
||||
help="The path to the exported funding milestone data from Neoserra.")
|
||||
funding_data_group.add_argument("--fundingexportmodule", type=str)
|
||||
|
||||
|
||||
parser.add_argument("--fundingcsvtag",
|
||||
type=str,
|
||||
required=False,
|
||||
default='cleaned_funding_milestones',
|
||||
help='The tag to place in the the cleaned funding milestone data filename')
|
||||
|
||||
nbs_data_group.add_argument("--nbscsv",
|
||||
type=str,
|
||||
help="The path to the exported new business starts milestone data from Neoserra")
|
||||
nbs_data_group.add_argument("--nbsexportmodule", type=str)
|
||||
|
||||
parser.add_argument("--nbscsvtag",
|
||||
type=str,
|
||||
required=False,
|
||||
default='cleaned_nbs_milestones',
|
||||
help='The tag to place in the the cleaned NBS milestone data filename')
|
||||
|
||||
parser.add_argument("--fiscalyear",
|
||||
type=str,
|
||||
required=True,
|
||||
help="The fiscal year that the souce data came from, used to add a tag to the graph title.")
|
||||
|
||||
parser.add_argument("--outpath",
|
||||
type=str,
|
||||
required=True,
|
||||
help='The path to place the generated graph files into.')
|
||||
|
||||
parser.add_argument('--attributionpietag',
|
||||
type=str,
|
||||
required=False,
|
||||
default="attribution_pie",
|
||||
help="The tag to place in the exported pie chart filename.")
|
||||
|
||||
parser.add_argument('--groupedattributiontag',
|
||||
required=False,
|
||||
default="attribution_grouped",
|
||||
help="The tag to place in the exported bar chart filename")
|
||||
|
||||
parser.add_argument("--mapping",
|
||||
type=str,
|
||||
required=False,
|
||||
default="",
|
||||
help="The path to a .json file to override the column name mappings used by this script")
|
||||
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
if args.mapping:
|
||||
NEOSERRA_COLUMNS.apply_json_mapping(args.mapping)
|
||||
OUT_COLUMNS.apply_json_mapping(args.mapping)
|
||||
|
||||
if not os.path.exists(args.outpath):
|
||||
os.makedirs(args.outpath)
|
||||
print("Created Output Directory!")
|
||||
|
||||
print("Loading and cleaning funding data...")
|
||||
|
||||
if args.fundingcsv:
|
||||
funding_df = pd.read_csv(args.fundingcsv)
|
||||
elif args.fundingexportmodule:
|
||||
funding_df = csv_url_to_dataframe(args.fundingexportmodule)
|
||||
else:
|
||||
raise RuntimeError("No funding datasource configured in this script.")
|
||||
|
||||
# Filter for reportable records only.
|
||||
# This will fail with a KeyError if the column is missing, as required.
|
||||
funding_df = funding_df[funding_df[NEOSERRA_COLUMNS.reportable] == 1]
|
||||
|
||||
funding_df = sanitize_funding_data(
|
||||
funding_df,
|
||||
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
|
||||
col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
|
||||
col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
|
||||
col_neo_center=NEOSERRA_COLUMNS.center
|
||||
)
|
||||
|
||||
funding_df.to_csv(os.path.join(args.outpath, f'{args.fundingcsvtag}_{args.fiscalyear}.csv'))
|
||||
|
||||
unique_centers = funding_df[NEOSERRA_COLUMNS.center].unique()
|
||||
for center in unique_centers:
|
||||
# Strip out all of the non-safe characters for filenames
|
||||
filename_center = re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "", center)
|
||||
|
||||
print(f"Processing center {center}...")
|
||||
center_df = funding_df[funding_df[NEOSERRA_COLUMNS.center] == center]
|
||||
|
||||
# Funding Milestones Pie chart
|
||||
print("\tGenerating attribution pie chart...")
|
||||
pie_fig = make_attribution_pie(
|
||||
center_df,
|
||||
title=f"{center} Documented vs. Not Documented Funding Milestones {args.fiscalyear}",
|
||||
date_note="1/27/26",
|
||||
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
|
||||
)
|
||||
pie_fig.write_image(os.path.join(args.outpath, f'{filename_center}_funding_{args.attributionpietag}_{args.fiscalyear}.png'))
|
||||
|
||||
# Funding Milestones Bar Graph
|
||||
print("\tGenerating attribution source chart...")
|
||||
bar_fig = make_attribution_grouped_chart(
|
||||
center_df,
|
||||
title=f"{center} Attribution Source vs. Documentation Level For Funding Milestones {args.fiscalyear}",
|
||||
)
|
||||
bar_fig.write_image(os.path.join(args.outpath, f"{filename_center}_funding_{args.groupedattributiontag}_{args.fiscalyear}.png"))
|
||||
|
||||
|
||||
# Now we will process the new business starts analysis
|
||||
print("Loading and cleaning new business starts data...")
|
||||
|
||||
if args.nbscsv:
|
||||
nbs_df = pd.read_csv(args.nbscsv, parse_dates=[NEOSERRA_COLUMNS.milestone_date, NEOSERRA_COLUMNS.attribution_date])
|
||||
elif args.nbsexportmodule:
|
||||
nbs_df = csv_url_to_dataframe(args.nbsexportmodule)
|
||||
else:
|
||||
raise RuntimeError("No NBS datasource configred for this script.")
|
||||
|
||||
# Filter for reportable records only.
|
||||
# This will fail with a KeyError if the column is missing, as required.
|
||||
nbs_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.reportable] == 1]
|
||||
|
||||
nbs_df = sanitize_nbs_data(
|
||||
nbs_df,
|
||||
col_neo_center=NEOSERRA_COLUMNS.center,
|
||||
col_neo_client_id=NEOSERRA_COLUMNS.client_id,
|
||||
col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date,
|
||||
col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date,
|
||||
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
|
||||
col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
|
||||
col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name,
|
||||
col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
|
||||
col_neo_reportable=NEOSERRA_COLUMNS.reportable,
|
||||
business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val,
|
||||
business_established_val=NEOSERRA_COLUMNS.business_established_val
|
||||
)
|
||||
|
||||
nbs_df[NEOSERRA_COLUMNS.milestone_attribution_source] = nbs_df[NEOSERRA_COLUMNS.milestone_attribution_source].fillna("Blank").astype(str).replace(['', ' ', 'nan', 'NaN'], "Blank")
|
||||
|
||||
unique_centers = nbs_df[NEOSERRA_COLUMNS.center].unique()
|
||||
|
||||
nbs_df.to_csv(os.path.join(args.outpath, f"cleaned_nbs_milestones_{args.fiscalyear}.csv"))
|
||||
|
||||
for center in unique_centers:
|
||||
# Strip out all of the non-safe characters for filenames
|
||||
filename_center = re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "", center)
|
||||
|
||||
print(f"Processing center {center}...")
|
||||
center_df = nbs_df[nbs_df[NEOSERRA_COLUMNS.center] == center]
|
||||
|
||||
# NBS Milestones Pie chart
|
||||
print("\tGenerating attribution pie chart...")
|
||||
pie_fig = make_attribution_pie(
|
||||
center_df,
|
||||
title=f"{center} Documented vs. Not Documented New Business Start Milestones {args.fiscalyear}",
|
||||
date_note="1/27/26",
|
||||
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
|
||||
)
|
||||
pie_fig.write_image(os.path.join(args.outpath, f'{filename_center}_nbs_{args.attributionpietag}_{args.fiscalyear}.png'))
|
||||
|
||||
# NBS Milestones Bar Graph
|
||||
print("\tGenerating attribution source chart...")
|
||||
bar_fig = make_attribution_grouped_chart(
|
||||
center_df,
|
||||
title=f"{center} Attribution Source vs. Documentation Level For New Business Start Milestones {args.fiscalyear}",
|
||||
)
|
||||
bar_fig.write_image(os.path.join(args.outpath, f"{filename_center}_nbs_{args.groupedattributiontag}_{args.fiscalyear}.png"))
|
||||
|
||||
# Make NBS network wide charts
|
||||
print("\tCreating network wide charts...")
|
||||
|
||||
pie_fig = make_attribution_pie(
|
||||
nbs_df,
|
||||
title=f"Network Wide Documented vs. Not Documented New Business Start Milestones {args.fiscalyear}",
|
||||
date_note="1/27/26",
|
||||
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
|
||||
)
|
||||
pie_fig.write_image(os.path.join(args.outpath, f'network_nbs_{args.attributionpietag}_{args.fiscalyear}.png'))
|
||||
|
||||
bar_fig = make_attribution_grouped_chart(
|
||||
nbs_df,
|
||||
title=f"Network Wide Attribution Source vs. Documentation Level For New Business Start Milestones {args.fiscalyear}",
|
||||
)
|
||||
bar_fig.write_image(os.path.join(args.outpath, f"network_nbs_{args.groupedattributiontag}_{args.fiscalyear}.png"))
|
||||
|
||||
# Make funding network wide charts
|
||||
pie_fig = make_attribution_pie(
|
||||
funding_df,
|
||||
title=f"Network Wide Documented vs. Not Documented Funding Milestones {args.fiscalyear}",
|
||||
date_note="1/27/26",
|
||||
col_documentation_level=OUT_COLUMNS.milestone_documentation_level
|
||||
)
|
||||
pie_fig.write_image(os.path.join(args.outpath, f'network_funding_{args.attributionpietag}_{args.fiscalyear}.png'))
|
||||
|
||||
# Funding Milestones Bar Graph
|
||||
print("\tGenerating attribution source chart...")
|
||||
bar_fig = make_attribution_grouped_chart(
|
||||
funding_df,
|
||||
title=f"Network Wide Attribution Source vs. Documentation Level For Funding Milestones {args.fiscalyear}",
|
||||
)
|
||||
bar_fig.write_image(os.path.join(args.outpath, f"network_funding_{args.groupedattributiontag}_{args.fiscalyear}.png"))
|
||||
11
milestone_attribution_graph_export_module/pyproject.toml
Normal file
11
milestone_attribution_graph_export_module/pyproject.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "milestone_attribution_graph_export_module"
|
||||
version = "0.1.0"
|
||||
description = "Internal PASBDC graph making scripts used to generate figures for the milestone attribution analysis."
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["milestone_attribution_graph_export_module"]
|
||||
Reference in New Issue
Block a user