40 lines
1.6 KiB
Python
40 lines
1.6 KiB
Python
|
|
import pandas as pd
|
|
from milestone_attribution_dataset_module import sanitize_nbs_data
|
|
from constants_module import NEOSERRA_COLUMNS, OUT_COLUMNS
|
|
|
|
def test_client(file_path, client_id):
|
|
print(f"\nTesting {file_path} for client {client_id}")
|
|
df = pd.read_csv(file_path)
|
|
|
|
# Filter for the client to see raw data
|
|
raw_client = df[df[NEOSERRA_COLUMNS.client_id] == client_id]
|
|
print("Raw client data:")
|
|
print(raw_client)
|
|
|
|
# Run the sanitization
|
|
cleaned_df = sanitize_nbs_data(
|
|
df,
|
|
col_neo_center=NEOSERRA_COLUMNS.center,
|
|
col_neo_client_id=NEOSERRA_COLUMNS.client_id,
|
|
col_neo_milestone_date=NEOSERRA_COLUMNS.milestone_date,
|
|
col_neo_attribution_date=NEOSERRA_COLUMNS.attribution_date,
|
|
col_neo_attribution_source=NEOSERRA_COLUMNS.milestone_attribution_source,
|
|
col_neo_affirmation=NEOSERRA_COLUMNS.milestone_affirmation,
|
|
col_neo_milestone_type=NEOSERRA_COLUMNS.milestone_type_name,
|
|
col_out_documentation_level=OUT_COLUMNS.milestone_documentation_level,
|
|
col_neo_reportable=NEOSERRA_COLUMNS.reportable,
|
|
business_start_impact_val=NEOSERRA_COLUMNS.business_start_impact_val,
|
|
business_established_val=NEOSERRA_COLUMNS.business_established_val
|
|
)
|
|
|
|
# Filter for the client in cleaned data
|
|
cleaned_client = cleaned_df[cleaned_df[NEOSERRA_COLUMNS.client_id] == client_id]
|
|
print("Cleaned client data:")
|
|
print(cleaned_client)
|
|
|
|
if __name__ == "__main__":
|
|
client_id = "KUP270610"
|
|
test_client("export_module_no_bus_estab.csv", client_id)
|
|
test_client("export_module_with_bus_estab.csv", client_id)
|