first commit

This commit is contained in:
2026-05-21 08:40:24 -04:00
commit b084545275
711 changed files with 3659856 additions and 0 deletions

39
reproduce_issue.py Normal file
View File

@@ -0,0 +1,39 @@
import pandas as pd
from milestone_attribution_dataset_module import sanitize_nbs_data
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
def test_client(file_path, client_id):
print(f"\nTesting {file_path} for client {client_id}")
df = pd.read_csv(file_path)
# Filter for the client to see raw data
raw_client = df[df[NEOSERRA_COLUMNS.client_id] == client_id]
print("Raw client data:")
print(raw_client)
# Run the sanitization
cleaned_df = sanitize_nbs_data(
df,
col_neo_center=NEOSERRA_COLUMNS.center,
col_neo_client_id=NEOSERRA_COLUMNS.client_id,
col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date,
col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date,
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name,
col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
col_neo_reportable=NEOSERRA_COLUMNS.reportable,
business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val,
business_established_val=NEOSERRA_COLUMNS.business_established_val
)
# Filter for the client in cleaned data
cleaned_client = cleaned_df[cleaned_df[NEOSERRA_COLUMNS.client_id] == client_id]
print("Cleaned client data:")
print(cleaned_client)
if __name__ == "__main__":
client_id = "KUP270610"
test_client("export_module_no_bus_estab.csv", client_id)
test_client("export_module_with_bus_estab.csv", client_id)