Files
testing123/notebooks/section1_2/.ipynb_checkpoints/section1_2-checkpoint.ipynb
2026-05-21 08:40:24 -04:00

594 lines
20 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "036980a4-7309-4ea4-8dac-e8901c4525cf",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "ba89b925-5fc4-41bc-8b5e-81e53cfe1e4e",
"metadata": {},
"source": [
"# Getting the client counseling sessions data\n",
"---\n",
"The data can be obtained from this filter. You will need to break it up into smaller chunks and export them into the folder client_counseling\n",
"https://pasbdc.neoserra.com/activity/list/10?__formid=10&remove=&savename=&sort=DATE&sortdir=DESC&expr=&field_1=DATE&opt_auto_1=pfy&field_2=CLI_TYPE2&opt_2=&opt_2=AC&opt_2=IC&field_3=F_CENTER_ID&opt_3=2805&opt_3=2790&opt_3=2782&opt_3=2784&opt_3=2806&opt_3=2789&opt_3=4491&opt_3=2783&opt_3=2807&opt_3=2809&opt_3=2788&opt_3=2780&opt_3=2808&opt_3=2786&opt_3=2785&opt_3=2787&opt_3=2791&opt_3=2781&field_4=&sortdir=DESC\n",
"\n",
"Then combine the csvs into one big file"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7c3bf1e4-57bc-419b-909d-fca0043c1df7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 828614 entries, 0 to 828613\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Session Date 828614 non-null object \n",
" 1 Client 828614 non-null object \n",
" 2 Client ID 828614 non-null object \n",
" 3 Counselor 823892 non-null object \n",
" 4 Session Type 828611 non-null object \n",
" 5 Contact Type 828614 non-null object \n",
" 6 Center 828614 non-null object \n",
" 7 Prep+Contact 828614 non-null float64\n",
" 8 Total Hours 828614 non-null float64\n",
"dtypes: float64(2), object(7)\n",
"memory usage: 56.9+ MB\n"
]
}
],
"source": [
"sessions_df = pd.read_csv('counselling_sessions_fy2225.csv')\n",
"sessions_df.info()"
]
},
{
"cell_type": "markdown",
"id": "acb44859-ef4f-4dcb-82a9-80e5c30ce778",
"metadata": {},
"source": [
"# Get the unique clients list\n",
"---\n",
"Download and load the unique clients list with these columns\n",
"https://pasbdc.neoserra.com/clients?__formid=3&remove=&savename=&sort=CLIENT_ID&sortdir=ASC&expr=&field_1=REVIEWID&opt_1=13213656&field_2=&sortdir=ASC"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4563ea40-4d94-41c5-86f4-6d620a45c1de",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Client ID</th>\n",
" <th>Client</th>\n",
" <th>Last Counseling</th>\n",
" <th>Center</th>\n",
" <th>Physical Address County</th>\n",
" <th>NAICs</th>\n",
" <th>Primary NAICS</th>\n",
" <th>NAICS_2</th>\n",
" <th>PA NAICs Code Percentage</th>\n",
" <th>PASBDC NAICs Code Percentage</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>WD04170</td>\n",
" <td>\\tProinnov@ LLC (WD04170)</td>\n",
" <td>9/9/2025 12:00 AM</td>\n",
" <td>WD - WIDENER SBDC</td>\n",
" <td>Philadelphia</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>13.809955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>WD02759</td>\n",
" <td>\"C.J.A.\"/ Crawley Jones and Allen real estate...</td>\n",
" <td>10/20/2025 12:00 AM</td>\n",
" <td>WD - WIDENER SBDC</td>\n",
" <td>Delaware</td>\n",
" <td>531390-OtherActivitiesRelatedtoRealEstate\\r\\r\\...</td>\n",
" <td>531390 - Other Activities Related to Real Esta...</td>\n",
" <td>53.0</td>\n",
" <td>2.510127</td>\n",
" <td>2.723982</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>PS018402</td>\n",
" <td>Anjie's Cleaning Bees (PS018402)</td>\n",
" <td>10/14/2024 12:00 AM</td>\n",
" <td>Penn State SBDC</td>\n",
" <td>Lycoming</td>\n",
" <td>561720-JanitorialServices\\r\\r\\n\\r\\r\\n</td>\n",
" <td>561720 - Janitorial Services \\r\\r\\n</td>\n",
" <td>56.0</td>\n",
" <td>3.605647</td>\n",
" <td>4.398190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>C8538</td>\n",
" <td>BRENIMAN PROPERTIES, LLC (C8538)</td>\n",
" <td>10/17/2025 12:00 AM</td>\n",
" <td>PennWest University Clarion SBDC</td>\n",
" <td>Clarion</td>\n",
" <td>531120-LessorsofNonresidentialBuildings(except...</td>\n",
" <td>531120 - Lessors of Nonresidential Buildings (...</td>\n",
" <td>53.0</td>\n",
" <td>2.510127</td>\n",
" <td>2.723982</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>BU016079</td>\n",
" <td>Civil War Cider Co., Inc. (BU016079)</td>\n",
" <td>10/21/2024 12:00 AM</td>\n",
" <td>Bucknell SBDC</td>\n",
" <td>Union</td>\n",
" <td>312130-Wineries\\r\\r\\n\\r\\r\\n</td>\n",
" <td>312130 - Wineries \\r\\r\\n</td>\n",
" <td>31.0</td>\n",
" <td>2.876304</td>\n",
" <td>4.995475</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Client ID Client \\\n",
"0 0 WD04170 \\tProinnov@ LLC (WD04170) \n",
"1 1 WD02759 \"C.J.A.\"/ Crawley Jones and Allen real estate... \n",
"2 2 PS018402 Anjie's Cleaning Bees (PS018402) \n",
"3 3 C8538 BRENIMAN PROPERTIES, LLC (C8538) \n",
"4 4 BU016079 Civil War Cider Co., Inc. (BU016079) \n",
"\n",
" Last Counseling Center \\\n",
"0 9/9/2025 12:00 AM WD - WIDENER SBDC \n",
"1 10/20/2025 12:00 AM WD - WIDENER SBDC \n",
"2 10/14/2024 12:00 AM Penn State SBDC \n",
"3 10/17/2025 12:00 AM PennWest University Clarion SBDC \n",
"4 10/21/2024 12:00 AM Bucknell SBDC \n",
"\n",
" Physical Address County NAICs \\\n",
"0 Philadelphia NaN \n",
"1 Delaware 531390-OtherActivitiesRelatedtoRealEstate\\r\\r\\... \n",
"2 Lycoming 561720-JanitorialServices\\r\\r\\n\\r\\r\\n \n",
"3 Clarion 531120-LessorsofNonresidentialBuildings(except... \n",
"4 Union 312130-Wineries\\r\\r\\n\\r\\r\\n \n",
"\n",
" Primary NAICS NAICS_2 \\\n",
"0 NaN 0.0 \n",
"1 531390 - Other Activities Related to Real Esta... 53.0 \n",
"2 561720 - Janitorial Services \\r\\r\\n 56.0 \n",
"3 531120 - Lessors of Nonresidential Buildings (... 53.0 \n",
"4 312130 - Wineries \\r\\r\\n 31.0 \n",
"\n",
" PA NAICs Code Percentage PASBDC NAICs Code Percentage \n",
"0 0.000000 13.809955 \n",
"1 2.510127 2.723982 \n",
"2 3.605647 4.398190 \n",
"3 2.510127 2.723982 \n",
"4 2.876304 4.995475 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client_df = pd.read_csv('naics_client_list_tagged.csv')\n",
"client_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9307bc7a-8ba1-4c7f-b7eb-e2b5f6d4c8d7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Session Type</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Administrative</td>\n",
" <td>476099</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Follow-up</td>\n",
" <td>316915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Initial/New</td>\n",
" <td>17024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Close-out</td>\n",
" <td>14902</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Impact</td>\n",
" <td>3671</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Session Type count\n",
"0 Administrative 476099\n",
"1 Follow-up 316915\n",
"2 Initial/New 17024\n",
"3 Close-out 14902\n",
"4 Impact 3671"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"session_type_value_counts = sessions_df['Session Type'].value_counts().reset_index()\n",
"session_type_value_counts"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "97465a49-4e13-4e23-88ba-6bd7d72c6520",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Session Type</th>\n",
" <th>Client ID</th>\n",
" <th>Center</th>\n",
" <th>Administrative</th>\n",
" <th>Close-out</th>\n",
" <th>Follow-up</th>\n",
" <th>Impact</th>\n",
" <th>Initial/New</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>D 14632</td>\n",
" <td>Duquesne University SBDC</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>S015040</td>\n",
" <td>The University of Scranton SBDC</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>00000043</td>\n",
" <td>TE - TEMPLE SBDC</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>00000052</td>\n",
" <td>WD - WIDENER SBDC</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>000002</td>\n",
" <td>Kutztown University SBDC</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Session Type Client ID Center Administrative \\\n",
"0 D 14632 Duquesne University SBDC 0 \n",
"1 S015040 The University of Scranton SBDC 0 \n",
"2 00000043 TE - TEMPLE SBDC 1 \n",
"3 00000052 WD - WIDENER SBDC 3 \n",
"4 000002 Kutztown University SBDC 2 \n",
"\n",
"Session Type Close-out Follow-up Impact Initial/New \n",
"0 0 2 0 1 \n",
"1 1 7 0 0 \n",
"2 0 0 0 0 \n",
"3 0 8 0 0 \n",
"4 0 0 0 0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"session_counts = sessions_df.groupby('Client ID')['Session Type'].value_counts()\n",
"client_centers = sessions_df.groupby('Client ID')['Center'].first()\n",
"\n",
"\n",
"unified_counts_df = session_counts.unstack(fill_value=0)\n",
"unified_counts_df['Center'] = client_centers\n",
"\n",
"\n",
"\n",
"all_cols = list(unified_counts_df.columns)\n",
"all_cols.remove('Center')\n",
"all_cols.insert(0, 'Center')\n",
"\n",
"unified_counts_df = unified_counts_df[all_cols].reset_index()\n",
"unified_counts_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0eb344e3-a1e2-4ae8-b2ad-a40cc775eec9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Center\n",
"University of Pittsburgh SBDC 11263\n",
"TE - TEMPLE SBDC 8851\n",
"Kutztown University SBDC 8454\n",
"WD - WIDENER SBDC 4452\n",
"The University of Scranton SBDC 3798\n",
"K - Kutztown SBDC 2608\n",
"PennWest University Clarion SBDC 2574\n",
"WI - WILKES SBDC 2387\n",
"LE - LEHIGH UNIVERSITY SBDC 2211\n",
"G - GANNON SBDC 1794\n",
"Penn State SBDC 1782\n",
"SH - SHIPPENSBURG SBDC 1751\n",
"Duquesne University SBDC 1604\n",
"Bucknell SBDC 1149\n",
"SF - ST. FRANCIS UNIVERSITY SBDC 1033\n",
"SV - ST. VINCENT COLLEGE SBDC 738\n",
"G - Meadville 186\n",
"SV - Fayette Outreach 176\n",
"G - Mercer 131\n",
"Indiana County 125\n",
"Clarion CARES Act 20\n",
"G - Warren 1\n",
"SC - Monroe Outreach 1\n",
"LE - Bucks County/Lehigh SBDC 1\n",
"Name: count, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unified_counts_df['Center'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e38f57b8-f362-4bba-ae20-b858f2be4504",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Center\n",
"Pittsburgh 11263\n",
"Temple 8851\n",
"Kutztown University SBDC 8454\n",
"Widner 4452\n",
"Scranton 3798\n",
"Kutztown 2608\n",
"Clarion 2574\n",
"Wilkes 2387\n",
"Lehigh 2212\n",
"Gannon 1794\n",
"Penn State 1782\n",
"Shippensburg 1751\n",
"Duquesne 1604\n",
"Bucknell 1149\n",
"St. Francis 1033\n",
"St. Vincent 738\n",
"G - Meadville 186\n",
"SV - Fayette Outreach 176\n",
"G - Mercer 131\n",
"Indiana County 125\n",
"Clarion CARES Act 20\n",
"G - Warren 1\n",
"SC - Monroe Outreach 1\n",
"Name: count, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"center_mapping = {\n",
" \"University of Pittsburgh SBDC\":\"Pittsburgh\",\n",
" \"TE - TEMPLE SBDC\":\"Temple\",\n",
" \"Kutztown University SBDC\": \"Kutztown\",\n",
" \"K - Kutztown SBDC\":\"Kutztown\",\n",
" \"WD - WIDENER SBDC\": \"Widner\",\n",
" \"The University of Scranton SBDC\": \"Scranton\",\n",
" \"PennWest University Clarion SBDC\":\"Clarion\",\n",
" \"WI - WILKES SBDC\":\"Wilkes\",\n",
" \"LE - LEHIGH UNIVERSITY SBDC\":\"Lehigh\",\n",
" \"G - GANNON SBDC\":\"Gannon\",\n",
" \"Penn State SBDC\":\"Penn State\",\n",
" \"SH - SHIPPENSBURG SBDC\":\"Shippensburg\",\n",
" \"Duquesne University SBDC\":\"Duquesne\",\n",
" \"Bucknell SBDC\":\"Bucknell\",\n",
" \"SF - ST. FRANCIS UNIVERSITY SBDC\": \"St. Francis\",\n",
" \"SV - ST. VINCENT COLLEGE SBDC\":\"St. Vincent\",\n",
" \"LE - Bucks County/Lehigh SBDC\":\"Lehigh\",\n",
"}\n",
"\n",
"unified_counts_df['Center'] = unified_counts_df['Center'].replace(center_mapping)\n",
"unified_counts_df['Center'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39772f4d-0c9b-40cb-ab7a-0d3e152bd7a3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "369dddf9-4b1a-47ea-aa08-cc0404c0f60a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}