408 lines
15 KiB
Plaintext
408 lines
15 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "4201132e-811d-4c88-86cb-d24bb9e55549",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "67e3b5f9-b662-4ba4-85fc-debc0ad33d12",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "b8d125cf-c22a-4a1b-8c8a-6eb5e9939969",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Clients: 11049\n",
|
|
"Sessions: 828614\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"clients_df = pd.read_csv('naics_tagged_client_list.csv')\n",
|
|
"counseling_df = pd.read_csv('client_counsoling_sessions.csv')\n",
|
|
"\n",
|
|
"print(f\"Clients: {clients_df.shape[0]}\")\n",
|
|
"print(f\"Sessions: {counseling_df.shape[0]}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "f8bb3f3e-7d23-45b7-b6d0-6193631590ab",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Session Date</th>\n",
|
|
" <th>Client</th>\n",
|
|
" <th>Client ID</th>\n",
|
|
" <th>Counselor</th>\n",
|
|
" <th>Session Type</th>\n",
|
|
" <th>Contact Type</th>\n",
|
|
" <th>Center</th>\n",
|
|
" <th>Prep+Contact</th>\n",
|
|
" <th>Total Hours</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>9/30/2022 12:00 AM</td>\n",
|
|
" <td>Butler Technologies (PI704874)</td>\n",
|
|
" <td>PI704874</td>\n",
|
|
" <td>Towers, Kate</td>\n",
|
|
" <td>Follow-up</td>\n",
|
|
" <td>Online (email or chat)</td>\n",
|
|
" <td>University of Pittsburgh SBDC</td>\n",
|
|
" <td>4.5</td>\n",
|
|
" <td>4.5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>9/30/2022 12:00 AM</td>\n",
|
|
" <td>Louri Bean Creative (PI706063)</td>\n",
|
|
" <td>PI706063</td>\n",
|
|
" <td>Wholihan, Michael</td>\n",
|
|
" <td>Initial/New</td>\n",
|
|
" <td>Outreach (face-to-face)</td>\n",
|
|
" <td>University of Pittsburgh SBDC</td>\n",
|
|
" <td>3.0</td>\n",
|
|
" <td>3.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>9/30/2022 12:00 AM</td>\n",
|
|
" <td>Leverage Cleaning (PI705380)</td>\n",
|
|
" <td>PI705380</td>\n",
|
|
" <td>Wholihan, Michael</td>\n",
|
|
" <td>Follow-up</td>\n",
|
|
" <td>Phone</td>\n",
|
|
" <td>University of Pittsburgh SBDC</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>9/30/2022 12:00 AM</td>\n",
|
|
" <td>Scratch & co (PI704915)</td>\n",
|
|
" <td>PI704915</td>\n",
|
|
" <td>Wholihan, Michael</td>\n",
|
|
" <td>Follow-up</td>\n",
|
|
" <td>Outreach (face-to-face)</td>\n",
|
|
" <td>University of Pittsburgh SBDC</td>\n",
|
|
" <td>3.0</td>\n",
|
|
" <td>3.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>9/30/2022 12:00 AM</td>\n",
|
|
" <td>Diverse Industrial Solutions, LLC (IN001291)</td>\n",
|
|
" <td>IN001291</td>\n",
|
|
" <td>Wholihan, Michael</td>\n",
|
|
" <td>Initial/New</td>\n",
|
|
" <td>Phone</td>\n",
|
|
" <td>University of Pittsburgh SBDC</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Session Date Client Client ID \\\n",
|
|
"0 9/30/2022 12:00 AM Butler Technologies (PI704874) PI704874 \n",
|
|
"1 9/30/2022 12:00 AM Louri Bean Creative (PI706063) PI706063 \n",
|
|
"2 9/30/2022 12:00 AM Leverage Cleaning (PI705380) PI705380 \n",
|
|
"3 9/30/2022 12:00 AM Scratch & co (PI704915) PI704915 \n",
|
|
"4 9/30/2022 12:00 AM Diverse Industrial Solutions, LLC (IN001291) IN001291 \n",
|
|
"\n",
|
|
" Counselor Session Type Contact Type \\\n",
|
|
"0 Towers, Kate Follow-up Online (email or chat) \n",
|
|
"1 Wholihan, Michael Initial/New Outreach (face-to-face) \n",
|
|
"2 Wholihan, Michael Follow-up Phone \n",
|
|
"3 Wholihan, Michael Follow-up Outreach (face-to-face) \n",
|
|
"4 Wholihan, Michael Initial/New Phone \n",
|
|
"\n",
|
|
" Center Prep+Contact Total Hours \n",
|
|
"0 University of Pittsburgh SBDC 4.5 4.5 \n",
|
|
"1 University of Pittsburgh SBDC 3.0 3.0 \n",
|
|
"2 University of Pittsburgh SBDC 2.0 2.0 \n",
|
|
"3 University of Pittsburgh SBDC 3.0 3.0 \n",
|
|
"4 University of Pittsburgh SBDC 2.0 2.0 "
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"counseling_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "6cd5bca1-8895-4133-a71e-1a9e9cdd9f71",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Unnamed: 0</th>\n",
|
|
" <th>Client</th>\n",
|
|
" <th>Client ID</th>\n",
|
|
" <th>County Out of State</th>\n",
|
|
" <th>Email</th>\n",
|
|
" <th>Last Counseling</th>\n",
|
|
" <th>NAICs</th>\n",
|
|
" <th>Phone</th>\n",
|
|
" <th>Physical Address</th>\n",
|
|
" <th>Physical Address County</th>\n",
|
|
" <th>Physical Address State</th>\n",
|
|
" <th>Primary Contact</th>\n",
|
|
" <th>Primary NAICS</th>\n",
|
|
" <th>NAICS_2</th>\n",
|
|
" <th>PA NAICs Code Percentage</th>\n",
|
|
" <th>PASBDC NAICs Code Percentage</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>0</td>\n",
|
|
" <td>\\tProinnov@ LLC (WD04170)</td>\n",
|
|
" <td>WD04170</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>JardensonC@ICLOUD.com</td>\n",
|
|
" <td>9/9/2025 12:00 AM</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>(267) 748-4465</td>\n",
|
|
" <td>6752 Oakland St.</td>\n",
|
|
" <td>Philadelphia</td>\n",
|
|
" <td>Pennsylvania</td>\n",
|
|
" <td>Jardenson Castro</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.000000</td>\n",
|
|
" <td>14.915377</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>\"C.J.A.\"/ Crawley Jones and Allen real estate...</td>\n",
|
|
" <td>WD02759</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>mrkcrawley@gmail.com</td>\n",
|
|
" <td>10/20/2025 12:00 AM</td>\n",
|
|
" <td>531390-OtherActivitiesRelatedtoRealEstate\\r\\r\\...</td>\n",
|
|
" <td>(215) 290-9828</td>\n",
|
|
" <td>673 Rively ave</td>\n",
|
|
" <td>Delaware</td>\n",
|
|
" <td>Pennsylvania</td>\n",
|
|
" <td>mark crawley</td>\n",
|
|
" <td>531390 - Other Activities Related to Real Esta...</td>\n",
|
|
" <td>53.0</td>\n",
|
|
" <td>2.510127</td>\n",
|
|
" <td>2.688026</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>2</td>\n",
|
|
" <td>Anjie's Cleaning Bees (PS018402)</td>\n",
|
|
" <td>PS018402</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>anjelicagonzalez2001@gmail.com</td>\n",
|
|
" <td>10/14/2024 12:00 AM</td>\n",
|
|
" <td>561720-JanitorialServices\\r\\r\\n\\r\\r\\n</td>\n",
|
|
" <td>(717) 521-3625</td>\n",
|
|
" <td>1129 High St</td>\n",
|
|
" <td>Lycoming</td>\n",
|
|
" <td>Pennsylvania</td>\n",
|
|
" <td>Anjelica Gonzez</td>\n",
|
|
" <td>561720 - Janitorial Services \\r\\r\\n</td>\n",
|
|
" <td>56.0</td>\n",
|
|
" <td>3.605647</td>\n",
|
|
" <td>4.344285</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>BRENIMAN PROPERTIES, LLC (C8538)</td>\n",
|
|
" <td>C8538</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>r_breniman@yahoo.com</td>\n",
|
|
" <td>10/17/2025 12:00 AM</td>\n",
|
|
" <td>531120-LessorsofNonresidentialBuildings(except...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>147 Heeter Rd</td>\n",
|
|
" <td>Clarion</td>\n",
|
|
" <td>Pennsylvania</td>\n",
|
|
" <td>RYAN BRENIMAN</td>\n",
|
|
" <td>531120 - Lessors of Nonresidential Buildings (...</td>\n",
|
|
" <td>53.0</td>\n",
|
|
" <td>2.510127</td>\n",
|
|
" <td>2.688026</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>4</td>\n",
|
|
" <td>Civil War Cider Co., Inc. (BU016079)</td>\n",
|
|
" <td>BU016079</td>\n",
|
|
" <td>False</td>\n",
|
|
" <td>rob@civilwarcider.com</td>\n",
|
|
" <td>10/21/2024 12:00 AM</td>\n",
|
|
" <td>312130-Wineries\\r\\r\\n\\r\\r\\n</td>\n",
|
|
" <td>(570) 523-3414</td>\n",
|
|
" <td>606 Market St.</td>\n",
|
|
" <td>Union</td>\n",
|
|
" <td>Pennsylvania</td>\n",
|
|
" <td>Robert Antanitis, II</td>\n",
|
|
" <td>312130 - Wineries \\r\\r\\n</td>\n",
|
|
" <td>31.0</td>\n",
|
|
" <td>2.876304</td>\n",
|
|
" <td>4.923522</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Unnamed: 0 Client Client ID \\\n",
|
|
"0 0 \\tProinnov@ LLC (WD04170) WD04170 \n",
|
|
"1 1 \"C.J.A.\"/ Crawley Jones and Allen real estate... WD02759 \n",
|
|
"2 2 Anjie's Cleaning Bees (PS018402) PS018402 \n",
|
|
"3 3 BRENIMAN PROPERTIES, LLC (C8538) C8538 \n",
|
|
"4 4 Civil War Cider Co., Inc. (BU016079) BU016079 \n",
|
|
"\n",
|
|
" County Out of State Email Last Counseling \\\n",
|
|
"0 False JardensonC@ICLOUD.com 9/9/2025 12:00 AM \n",
|
|
"1 False mrkcrawley@gmail.com 10/20/2025 12:00 AM \n",
|
|
"2 False anjelicagonzalez2001@gmail.com 10/14/2024 12:00 AM \n",
|
|
"3 False r_breniman@yahoo.com 10/17/2025 12:00 AM \n",
|
|
"4 False rob@civilwarcider.com 10/21/2024 12:00 AM \n",
|
|
"\n",
|
|
" NAICs Phone \\\n",
|
|
"0 NaN (267) 748-4465 \n",
|
|
"1 531390-OtherActivitiesRelatedtoRealEstate\\r\\r\\... (215) 290-9828 \n",
|
|
"2 561720-JanitorialServices\\r\\r\\n\\r\\r\\n (717) 521-3625 \n",
|
|
"3 531120-LessorsofNonresidentialBuildings(except... NaN \n",
|
|
"4 312130-Wineries\\r\\r\\n\\r\\r\\n (570) 523-3414 \n",
|
|
"\n",
|
|
" Physical Address Physical Address County Physical Address State \\\n",
|
|
"0 6752 Oakland St. Philadelphia Pennsylvania \n",
|
|
"1 673 Rively ave Delaware Pennsylvania \n",
|
|
"2 1129 High St Lycoming Pennsylvania \n",
|
|
"3 147 Heeter Rd Clarion Pennsylvania \n",
|
|
"4 606 Market St. Union Pennsylvania \n",
|
|
"\n",
|
|
" Primary Contact Primary NAICS \\\n",
|
|
"0 Jardenson Castro NaN \n",
|
|
"1 mark crawley 531390 - Other Activities Related to Real Esta... \n",
|
|
"2 Anjelica Gonzez 561720 - Janitorial Services \\r\\r\\n \n",
|
|
"3 RYAN BRENIMAN 531120 - Lessors of Nonresidential Buildings (... \n",
|
|
"4 Robert Antanitis, II 312130 - Wineries \\r\\r\\n \n",
|
|
"\n",
|
|
" NAICS_2 PA NAICs Code Percentage PASBDC NAICs Code Percentage \n",
|
|
"0 0.0 0.000000 14.915377 \n",
|
|
"1 53.0 2.510127 2.688026 \n",
|
|
"2 56.0 3.605647 4.344285 \n",
|
|
"3 53.0 2.510127 2.688026 \n",
|
|
"4 31.0 2.876304 4.923522 "
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"clients_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "694bb828-bbdc-4f6d-a002-ec9d8f603ed5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|