first commit

2026-05-21 08:40:24 -04:00
commit b084545275
711 changed files with 3659856 additions and 0 deletions
--- a/libs/pasbdc_data_cleaning/README.md
+++ b/libs/pasbdc_data_cleaning/README.md
@@ -0,0 +1,6 @@
+# Common Cleaning Functions Library
+---
+This python library aims to provide some common utility functions you can use while working with data that has come from Neoserra.
+
+## Functions:
+clean_center_name - Turns the messy center names from Neoserra to the standard center labels we should be using in graphs
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/PKG-INFO
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/PKG-INFO
@@ -0,0 +1,4 @@
+Metadata-Version: 2.4
+Name: pasbdc_data_cleaning
+Version: 0.1.0
+Summary: Internal data cleaning library for the PASBDC written by Vincent Allen
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/SOURCES.txt
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/SOURCES.txt
@@ -0,0 +1,8 @@
+README.md
+pyproject.toml
+pasbdc_data_cleaning/__init__.py
+pasbdc_data_cleaning/sbdclibrary.py
+pasbdc_data_cleaning.egg-info/PKG-INFO
+pasbdc_data_cleaning.egg-info/SOURCES.txt
+pasbdc_data_cleaning.egg-info/dependency_links.txt
+pasbdc_data_cleaning.egg-info/top_level.txt
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/dependency_links.txt
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/top_level.txt
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning.egg-info/top_level.txt
@@ -0,0 +1 @@
+pasbdc_data_cleaning
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/init.py
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/init.py
@@ -0,0 +1,3 @@
+# libs/pasbdc_data_cleaning/__init__.py
+from .sbdclibrary import clean_center_name, tag_county_out_of_state, remove_duplicate_client_records, remove_api_testing_clients
+__all__ = ['clean_center_name', 'tag_county_out_of_state', 'remove_duplicate_client_records', 'remove_api_testing_clients']
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/init.cpython-312.pyc
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/init.cpython-312.pyc
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/init.cpython-313.pyc
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/init.cpython-313.pyc
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/sbdclibrary.cpython-312.pyc
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/sbdclibrary.cpython-312.pyc
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/sbdclibrary.cpython-313.pyc
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/pycache/sbdclibrary.cpython-313.pyc
--- a/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/sbdclibrary.py
+++ b/libs/pasbdc_data_cleaning/pasbdc_data_cleaning/sbdclibrary.py
@@ -0,0 +1,169 @@
+import pandas as pd
+
+from constants_module import NEOSERRA_COLUMNS
+
+CENTER_NAME_MAPPING = {
+    "PI - Washington County":"Pittsburgh",
+    "University of Pittsburgh SBDC":"Pittsburgh",
+    "Indiana County":"Pittsburgh",
+    "PI - Greene County":"Pittsburgh",
+    "TE - TEMPLE SBDC":"Temple",
+    "TE - WCU Procurement Assistance Center":"Temple",
+    "Kutztown University SBDC": "Kutztown",
+    "K - Kutztown SBDC":"Kutztown",
+    "WD - WIDENER SBDC": "Widener",
+    "WD - Norristown Outreach":"Widener",
+    "The University of Scranton SBDC": "Scranton",
+    "SC - Monroe Outreach":"Scranton",
+    "SC - Wyoming Outreach":"Scranton",
+    "SC - Susquehanna Outreach":"Scranton",
+    "SC - Scranton Outreach":"Scranton",
+    "SC - Pike Outreach":"Scranton",
+    "SC - Wayne Outreach":"Scranton",
+    "SC - Lackawanna Outreach":"Scranton",
+    "PennWest University Clarion SBDC":"Clarion",
+    "Clarion CARES Act":"Clarion",
+    "WI - WILKES SBDC":"Wilkes",
+    "WI - WIlkes":"Wilkes",
+    "WI - Bloomsburg":"Wilkes",
+    "LE - LEHIGH UNIVERSITY SBDC":"Lehigh",
+    "G - GANNON SBDC":"Gannon",
+    "G - Meadville":"Gannon",
+    "G - Mercer":"Gannon",
+    "G - Warren":"Gannon",
+    "Penn State SBDC":"Penn State",
+    "SH - SHIPPENSBURG SBDC":"Shippensburg",
+    "Duquesne University SBDC":"Duquesne",
+    "Bucknell SBDC":"Bucknell",
+    "SF - ST. FRANCIS UNIVERSITY SBDC": "St. Francis",
+    "SF - Somerset Outreach":"St. Francis",
+    "SF - Bedford Outreach":"St. Francis",
+    "SF - Blair Outreach":"St. Francis",
+    "SF - St Francis University SBDC":"St. Francis",
+    "SF - St Francis Universty SBDC":"St. Francis",
+    "SF - St Francis Univeristy SBDC":"St. Francis",
+    "SV - ST. VINCENT COLLEGE SBDC":"St. Vincent",
+    "SV - AIHP Outreach":"St. Vincent",
+    "SV - WEDC Outreach":"St. Vincent",
+    "SV - Fayette Outreach":"St. Vincent",
+    "SV - Jeannette Outreach":"St. Vincent",
+    "LE - Bucks County/Lehigh SBDC":"Lehigh",
+    "Southeast Pennsylvania APEX Accelerator":"Z-Lead Office",
+    "EMAP":"Z-Lead Office",
+    "Pennsylvania SBDC Lead Office":"Z-Lead Office",
+    "State Small Business Credit Initiative (SSBCI)":"Z-Lead Office",
+    "D - Beaver":"Duquesne",
+    "AA - Stakeholders/Partners":"Z-Lead Office",
+    "AA - Temple Stakeholders/Partners":"Z-Lead Office",
+    "Wharton":"Z-Lead Office",
+    "Wharton SBDC":"Z-Lead Office",
+    "Lock Haven SBDC":"Penn State"
+
+    # "Kutztown University SBDC ": "Kutztown",
+    #" Kutztown University SBDC": "Kutztown",
+    #" Pennsylvania SBDC Lead Office":"Lead Office",
+}
+
+VALID_PA_COUNTIES = [
+    "juniata", 
+    "montour", 
+    "northumberland", 
+    "perry", 
+    "snyder", 
+    "union", 
+    "allegheny", 
+    "beaver", 
+    "butler", 
+    "lawrence", 
+    "washington", 
+    "crawford", 
+    "erie", 
+    "mercer", 
+    "warren", 
+    "dauphin", 
+    "lancaster", 
+    "lebanon", 
+    "berks", 
+    "chester", 
+    "bucks", 
+    "lehigh", 
+    "northampton", 
+    "centre", 
+    "clinton", 
+    "lycoming", 
+    "mifflin", 
+    "armstrong", 
+    "cameron", 
+    "clarion", 
+    "clearfield", 
+    "elk", 
+    "forest", 
+    "jefferson", 
+    "mckean", 
+    "potter", 
+    "venango", 
+    "bedford", 
+    "blair", 
+    "cambria", 
+    "fulton", 
+    "huntingdon", 
+    "somerset", 
+    "fayette", 
+    "westmoreland", 
+    "adams", 
+    "cumberland", 
+    "franklin", 
+    "york", 
+    "montgomery", 
+    "philadelphia", 
+    "greene", 
+    "indiana", 
+    "bradford", 
+    "lackawanna", 
+    "monroe", 
+    "pike", 
+    "susquehanna", 
+    "tioga", 
+    "wayne", 
+    "wyoming", 
+    "delaware", 
+    "carbon", 
+    "columbia", 
+    "luzerne", 
+    "schuylkill", 
+    "sullivan"
+]
+def tag_county_out_of_state(df):
+        if str(df['Physical Address County']).lower() not in VALID_PA_COUNTIES:
+            df['County Out of State'] = True
+        else:
+            df['County Out of State'] = False
+
+        return df
+
+def remove_duplicate_client_records(df:pd.DataFrame) -> pd.DataFrame:
+    new_df = df.copy()
+    new_df = new_df[new_df[NEOSERRA_COLUMNS.center] != "Duplicate Client Records"]
+    return new_df
+
+def remove_api_testing_clients(df:pd.DataFrame) -> pd.DataFrame:
+    new_df = df.copy()
+    new_df = new_df[new_df[NEOSERRA_COLUMNS.center] != "API Testing Sandbox"]
+    return new_df
+
+def clean_center_name(df: pd.DataFrame, center_name_column:str="Center"):
+    '''
+    Cleans up the Center column of Neoserra data export using the conversion mapping defined in the library code which can be accessed
+    via the constant CENTER_NAME_MAPPING
+
+    Parameters:
+    df - the dataframe to clean up
+    center_name_column - the column of the datafram containing the center names
+
+    Returns:
+    None
+    '''
+    df[center_name_column] = df[center_name_column].astype(str).str.strip()
+    df[center_name_column] = df[center_name_column].replace(CENTER_NAME_MAPPING)
+
+
--- a/libs/pasbdc_data_cleaning/pyproject.toml
+++ b/libs/pasbdc_data_cleaning/pyproject.toml
@@ -0,0 +1,13 @@
+# libs/pasbdc_data_cleaning/pyproject.toml
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pasbdc_data_cleaning"
+version = "0.1.0"
+description = "Internal data cleaning library for the PASBDC written by Vincent Allen"
+
+# MOVED: Configuration specific to setuptools goes here
+[tool.setuptools]
+packages = ["pasbdc_data_cleaning"]
--- a/libs/word_library/README.md
+++ b/libs/word_library/README.md
@@ -0,0 +1,64 @@
+## Easy Word Docs in Python
+---
+This library implements a word document builder class that allows you to pass a list of functions and it will generate a word document using each function to generate a page.
+
+This has the benefit of providing modularity in scripts that make word documents as their outputs as a script can be configured to use any combination
+of page functions to build a word doc.
+
+### Tips:
+---
+The current page number can be accessed by accessing the current_section member variable of the document builder.
+
+The current figure number and table number can be accessed with the member variables figure_number and table_number. This allows you to have dynanmically
+labeled graphs and tables in your document pages
+
+The PageConfig class allows you to customize the page breaking behavior to ensure the correct number of pages are present in the final document
+
+## Usage:
+---
+```
+from docx import Document
+from docx.shared import Inches, Pt
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from typing import Callable, List, Any
+
+# Example page function
+def title_page(builder: WordDocumentBuilder, title: str = "Document Title", author: str = "Author", **kwargs):
+    """Create a title page."""
+    title_para = builder.doc.add_paragraph()
+    title_run = title_para.add_run(title)
+    title_run.font.size = Pt(28)
+    title_run.bold = True
+    title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    
+    builder.doc.add_paragraph()  # Spacing
+    
+    author_para = builder.doc.add_paragraph(author)
+    author_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+
+# Create document builder
+builder = WordDocumentBuilder()
+
+# Define pages, these are functions
+pages = [
+    PageConfig(title_page, add_page_break=False)
+]
+
+# Create document with custom parameters. Only pages and the output path are required, the following parameters will be passed into the functions that 
+# request them through **kwargs. Think of it kinda like dependancy injection.
+doc = builder.create_document(
+    pages,
+    "output/my_document.docx",
+    title="My Custom Report",
+    author="John Doe",
+    heading="Introduction",
+    content="This is the introduction section with detailed information.",
+    table_data=[
+        ["Product", "Price", "Quantity"],
+        ["Widget", "$10", "100"],
+        ["Gadget", "$25", "50"]
+    ],
+    table_title="Sales Data"
+)
+```
+
--- a/libs/word_library/pasbdc_word_library.egg-info/PKG-INFO
+++ b/libs/word_library/pasbdc_word_library.egg-info/PKG-INFO
@@ -0,0 +1,4 @@
+Metadata-Version: 2.4
+Name: pasbdc_word_library
+Version: 0.1.0
+Summary: A tool used to make the generation of word documents in python a little easier.
--- a/libs/word_library/pasbdc_word_library.egg-info/SOURCES.txt
+++ b/libs/word_library/pasbdc_word_library.egg-info/SOURCES.txt
@@ -0,0 +1,9 @@
+README.md
+pyproject.toml
+pasbdc_word_library/__init__.py
+pasbdc_word_library/doclibrary.py
+pasbdc_word_library/theme_helpers.py
+pasbdc_word_library.egg-info/PKG-INFO
+pasbdc_word_library.egg-info/SOURCES.txt
+pasbdc_word_library.egg-info/dependency_links.txt
+pasbdc_word_library.egg-info/top_level.txt
--- a/libs/word_library/pasbdc_word_library.egg-info/dependency_links.txt
+++ b/libs/word_library/pasbdc_word_library.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
--- a/libs/word_library/pasbdc_word_library.egg-info/top_level.txt
+++ b/libs/word_library/pasbdc_word_library.egg-info/top_level.txt
@@ -0,0 +1 @@
+pasbdc_word_library
--- a/libs/word_library/pasbdc_word_library/init.py
+++ b/libs/word_library/pasbdc_word_library/init.py
@@ -0,0 +1,4 @@
+# libs/word_library/__init__.py
+from .doclibrary import WordDocumentBuilder, PageConfig, title_page, content_page, table_page, image_page
+from .theme_helpers import theme_paragraph, theme_title
+__all__ = ['WordDocumentBuilder', 'PageConfig', 'title_page', 'content_page', 'table_page', 'image_page', 'theme_paragraph', 'theme_title']
--- a/libs/word_library/pasbdc_word_library/pycache/init.cpython-312.pyc
+++ b/libs/word_library/pasbdc_word_library/pycache/init.cpython-312.pyc
--- a/libs/word_library/pasbdc_word_library/pycache/init.cpython-313.pyc
+++ b/libs/word_library/pasbdc_word_library/pycache/init.cpython-313.pyc
--- a/libs/word_library/pasbdc_word_library/pycache/doclibrary.cpython-312.pyc
+++ b/libs/word_library/pasbdc_word_library/pycache/doclibrary.cpython-312.pyc
--- a/libs/word_library/pasbdc_word_library/pycache/doclibrary.cpython-313.pyc
+++ b/libs/word_library/pasbdc_word_library/pycache/doclibrary.cpython-313.pyc
--- a/libs/word_library/pasbdc_word_library/pycache/theme_helpers.cpython-312.pyc
+++ b/libs/word_library/pasbdc_word_library/pycache/theme_helpers.cpython-312.pyc
--- a/libs/word_library/pasbdc_word_library/doclibrary.py
+++ b/libs/word_library/pasbdc_word_library/doclibrary.py
@@ -0,0 +1,127 @@
+from docx import Document
+from docx.shared import Inches, Pt
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from typing import Callable, List, Any
+import os
+from dataclasses import dataclass
+
+@dataclass
+class PageConfig:
+    page_function: Callable
+    add_page_break: bool = True
+
+class WordDocumentBuilder:
+    """Build Word documents using configurable page functions."""
+    
+    def __init__(self):
+        self.doc = Document()
+        self.current_section = 1
+        self.figure_number = 0;
+        self.table_number = 0;
+    
+    def add_page_break(self):
+        """Add a page break to the document."""
+        self.doc.add_page_break()
+    
+    def create_document(self, page_functions: List[PageConfig], output_path: str, **kwargs):
+        """
+        Create a Word document by executing page functions.
+        
+        Args:
+            page_functions: List of functions that add content to the document
+            output_path: Path where the document will be saved
+            **kwargs: Additional parameters passed to all page functions
+        
+        Returns:
+            The created Document object
+        """
+        for i, page_conf in enumerate(page_functions):
+            # Execute page function with builder and any additional kwargs
+            page_conf.page_function(self, **kwargs)
+            
+            # Add page break after each page except the last
+            if i < len(page_functions) - 1 and page_conf.add_page_break:
+                self.add_page_break()
+            
+            # Increment the section state
+            self.current_section += 1
+        
+        # Ensure directory exists
+        os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else '.', exist_ok=True)
+        
+        # Save document
+        self.doc.save(output_path)
+        return self.doc
+
+
+# Example page functions
+def title_page(builder: WordDocumentBuilder, title: str = "Document Title", author: str = "Author", **kwargs):
+    """Create a title page."""
+    title_para = builder.doc.add_paragraph()
+    title_run = title_para.add_run(title)
+    title_run.font.size = Pt(28)
+    title_run.bold = True
+    title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    
+    builder.doc.add_paragraph()  # Spacing
+    
+    author_para = builder.doc.add_paragraph(author)
+    author_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
+
+
+def content_page(builder: WordDocumentBuilder, heading: str = "Section", content: str = "", **kwargs):
+    """Create a content page with heading and body text."""
+    builder.doc.add_heading(heading, level=1)
+    builder.doc.add_paragraph(content)
+
+
+def table_page(builder: WordDocumentBuilder, table_data: List[List[str]] = [], table_title: str = "Data Table", **kwargs):
+    """Create a page with a table."""
+    if table_data is None:
+        table_data = [["Header 1", "Header 2"], ["Data 1", "Data 2"]]
+    
+    builder.doc.add_heading(table_title, level=1)
+    
+    table = builder.doc.add_table(rows=len(table_data), cols=len(table_data[0]))
+    table.style = 'Light Grid Accent 1'
+    
+    for i, row in enumerate(table_data):
+        for j, cell_value in enumerate(row):
+            table.rows[i].cells[j].text = str(cell_value)
+
+
+def image_page(builder: WordDocumentBuilder, image_path: str = "", caption: str = "", **kwargs):
+    """Create a page with an image."""
+    if image_path and os.path.exists(image_path):
+        builder.doc.add_paragraph(caption)
+        builder.doc.add_picture(image_path, width=Inches(5))
+
+# Usage example
+if __name__ == "__main__":
+    # Create document builder
+    builder = WordDocumentBuilder()
+    
+    # Define pages
+    pages = [
+        PageConfig(title_page, add_page_break=True),
+        PageConfig(content_page, add_page_break=True),
+        PageConfig(table_page, add_page_break=False),
+    ]
+    
+    # Create document with custom parameters
+    doc = builder.create_document(
+        pages,
+        "output/my_document.docx",
+        title="My Custom Report",
+        author="John Doe",
+        heading="Introduction",
+        content="This is the introduction section with detailed information.",
+        table_data=[
+            ["Product", "Price", "Quantity"],
+            ["Widget", "$10", "100"],
+            ["Gadget", "$25", "50"]
+        ],
+        table_title="Sales Data"
+    )
+    
+    print("Document created successfully!")
--- a/libs/word_library/pasbdc_word_library/theme_helpers.py
+++ b/libs/word_library/pasbdc_word_library/theme_helpers.py
@@ -0,0 +1,22 @@
+import docx
+from docx.shared import RGBColor, Pt
+from docx.shared import Pt
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+
+def theme_paragraph(
+        paragraph,
+        font_size_pt:int=9,
+        font_name:str="Futera",
+        color:RGBColor=RGBColor(15,27,38)
+):
+    for run in paragraph.runs:
+        run.font.name = font_name
+        run.font.size = Pt(font_size_pt)
+        run.font.color.rgb = color
+
+def theme_title(title_run):
+    title_run.bold = True
+    title_run.font.name = 'Futera'
+    title_run.font.size = Pt(12)
+    title_run.font.color.rgb = RGBColor(113, 191, 68)
+    title_run.alignment = WD_ALIGN_PARAGRAPH.LEFT
--- a/libs/word_library/pyproject.toml
+++ b/libs/word_library/pyproject.toml
@@ -0,0 +1,12 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pasbdc_word_library"
+version = "0.1.0"
+description = "A tool used to make the generation of word documents in python a little easier."
+
+[tool.setuptools]
+packages = ["pasbdc_word_library"]
+