import pandas as pd from milestone_attribution_dataset_module import sanitize_nbs_data from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS def test_client(file_path, client_id): print(f"\nTesting {file_path} for client {client_id}") df = pd.read_csv(file_path) # Filter for the client to see raw data raw_client = df[df[NEOSERRA_COLUMNS.client_id] == client_id] print("Raw client data:") print(raw_client) # Run the sanitization cleaned_df = sanitize_nbs_data( df, col_neo_center=NEOSERRA_COLUMNS.center, col_neo_client_id=NEOSERRA_COLUMNS.client_id, col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date, col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date, col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source, col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation, col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name, col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level, col_neo_reportable=NEOSERRA_COLUMNS.reportable, business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val, business_established_val=NEOSERRA_COLUMNS.business_established_val ) # Filter for the client in cleaned data cleaned_client = cleaned_df[cleaned_df[NEOSERRA_COLUMNS.client_id] == client_id] print("Cleaned client data:") print(cleaned_client) if __name__ == "__main__": client_id = "KUP270610" test_client("export_module_no_bus_estab.csv", client_id) test_client("export_module_with_bus_estab.csv", client_id)