first commit

This commit is contained in:
2026-05-21 08:40:24 -04:00
commit b084545275
711 changed files with 3659856 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
# scripts/dataset/pyproject.toml
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "shared_tools_module"
version = "0.1.0"
description = "Internal word doc generation library "
# MOVED: Configuration specific to setuptools goes here
[tool.setuptools]
packages = ["shared_tools_module"]

View File

@@ -0,0 +1,4 @@
Metadata-Version: 2.4
Name: shared_tools_module
Version: 0.1.0
Summary: Internal word doc generation library

View File

@@ -0,0 +1,9 @@
pyproject.toml
shared_tools_module/__init__.py
shared_tools_module/chart_variants.py
shared_tools_module/export_module.py
shared_tools_module/image_registry.py
shared_tools_module.egg-info/PKG-INFO
shared_tools_module.egg-info/SOURCES.txt
shared_tools_module.egg-info/dependency_links.txt
shared_tools_module.egg-info/top_level.txt

View File

@@ -0,0 +1 @@
shared_tools_module

View File

@@ -0,0 +1,15 @@
from .image_registry import (
ImageRegistry,
)
from .export_module import (
csv_url_to_dataframe
)
from .chart_variants import (
StatChartVariants,
save_variant_charts,
VARIANT_SUFFIX_MAP
)
__all__ = ['ImageRegistry', 'StatChartVariants', 'save_variant_charts', 'VARIANT_SUFFIX_MAP', 'csv_url_to_dataframe']

View File

@@ -0,0 +1,99 @@
# FILE: trainings_analysis.py
# CREATED: 1/12/25
# AUTHOR: Vincent Allen
# CONTACT: vincent@vtallen.com valle276@live.kutztown.edu
# PURPOSE:
# This file implements a mapping between StatChartVariant enums and plain text descriptions that can be used as the suffixes for graph filenames
# This also implements a function that takes an input dictionary between StatChartVariants enums and actual graph objects and systematically writes them
# out to the disk. This is to enable the easy parsing of the filenames to identify each chart without explicit file paths.
import os
from enum import Enum
class StatChartVariants(Enum):
"""
Used as a constant to identify individual graphs in the dictionary of
graphs created by functions within this file
"""
TOTAL_COUNT = 1 # No filtering applied
TOTAL_PERCENT = 2 # No filtering applied (but uses percentages instead of counts)
NO_FIRST_STEPS_COUNT = 3 # Excludes first steps trainings
NO_FIRST_STEPS_PERCENT = 4 # Excludes first steps trainings (but uses percentages instead of counts)
NO_FIRST_NO_PREPLANNNG_COUNT = 5 # Excludes first steps and business pre planning trainings
NO_FIRST_NO_PREPLANNNG_PERCENT = 6# Excludes first steps and business pre planning trainings (but uses percentages instead of counts)
ON_DEMAND_COUNT = 7 # Only all on demand training events
ON_DEMAND_PERCENT = 8 # Only all on demand training events (but uses percentages instead of counts)
ON_DEMAND_NO_FIRST_STEPS_COUNT = 9 # On demand trainings excluding first steps
ON_DEMAND_NO_FIRST_STEPS_PERCENT = 10 # On demand trainings excluding first steps (but uses percentages instead of counts)
ON_DEMAND_NO_FIRST_STEPS_NO_PREPLANNING_COUNT = 11 # On demand trainings excluding first steps and business pre planning trainings
ON_DEMAND_NO_FIRST_STEPS_NO_PREPLANNING_PERCENT = 12 # On demand trainings excluding first steps and business pre planning trainings (but uses percentages instead of counts)
TOTAL_ATTENDED = 13 # Only includes trainings that had 1 or more attendees
PERCENT_ATTENDED = 14 # Only includes trainings that had 1 or more attendees (but uses percentages instead of counts)
NO_FIRST_STEPS_ATTENDED_COUNT = 15 # Only includes trainings that had 1 or more attendees excluding first steps trainings
NO_FIRST_STEPS_ATTENDED_PERCENT = 16 # Only includes trainings that had 1 or more attendees excluding first steps trainings (but uses percentages instead of counts)
SMALL_BARS_TRAININGS = 17 # Only includes primary training topics that had 5 or less trainign events associated
SMALL_BARS_TRAININGS_PERCENT = 18 # Only includes primary training topics that had 5 or less trainign events associated but as a percentage chart
FIRST_AND_PREPLANNING_ONLY = 19
FIRST_AND_PREPLANNING_ONLY_PERCENT = 20
FIRST_ONLY = 21
FIRST_ONLY_PERCENT = 22
# Helper to map Enum to filename suffix
VARIANT_SUFFIX_MAP = {
StatChartVariants.TOTAL_COUNT: "total-count",
StatChartVariants.TOTAL_PERCENT: "total-percent",
StatChartVariants.NO_FIRST_STEPS_COUNT: "no-first-steps-count",
StatChartVariants.NO_FIRST_STEPS_PERCENT: "no-first-steps-percent",
StatChartVariants.NO_FIRST_NO_PREPLANNNG_COUNT: "no-first-no-pre-count",
StatChartVariants.NO_FIRST_NO_PREPLANNNG_PERCENT: "no-first-no-pre-percent",
StatChartVariants.TOTAL_ATTENDED: "total-attended-count",
StatChartVariants.PERCENT_ATTENDED: "total-attended-percent",
StatChartVariants.NO_FIRST_STEPS_ATTENDED_COUNT: "no-first-attended-count",
StatChartVariants.NO_FIRST_STEPS_ATTENDED_PERCENT: "no-first-attended-percent",
# New suffixes for the Statistics Chart variants
StatChartVariants.ON_DEMAND_COUNT: "ondemand-count",
StatChartVariants.ON_DEMAND_PERCENT: "ondemand-percent",
StatChartVariants.ON_DEMAND_NO_FIRST_STEPS_COUNT: "ondemand-no-first-count",
StatChartVariants.ON_DEMAND_NO_FIRST_STEPS_PERCENT: "ondemand-no-first-percent",
StatChartVariants.ON_DEMAND_NO_FIRST_STEPS_NO_PREPLANNING_COUNT: "ondemand-no-first-no-pre-count",
StatChartVariants.ON_DEMAND_NO_FIRST_STEPS_NO_PREPLANNING_PERCENT: "ondemand-no-first-no-pre-percent",
StatChartVariants.SMALL_BARS_TRAININGS: "small-bars",
StatChartVariants.SMALL_BARS_TRAININGS_PERCENT: "small-bars-percent",
StatChartVariants.FIRST_AND_PREPLANNING_ONLY: "first-steps-preplanning-only",
StatChartVariants.FIRST_AND_PREPLANNING_ONLY_PERCENT: "first-steps-preplanning-only-percent",
StatChartVariants.FIRST_ONLY:"first-steps-only",
StatChartVariants.FIRST_ONLY_PERCENT:"first-steps-only-percent"
}
def save_variant_charts(chart_dict, base_path, report, chart_type):
"""
:param chart_dict: The dictionary mapping StatChartVariants to plotly figure objects
:param base_path: The path to save the charts into
:param report: The name of the report theese charts belong to (so they can be retrieved later by the word doc code which parses out the filenames)
:param chart_type: The type of chart being saved (also used as part of the filename to identify the chart)
:return: None
description:
Iterates through the provided dictionary saving all the plotly graph objects to image files on the disk. It uses an internal VARIANT_SUFFIX_MAP to provide
the filename suffixes that correspond to each StatChartVariant enum value.
Images are saved in the following way: They will be placed in base_path with filenames formatted as follows:
<report>_<chart_type>_<value in VARIANT_SUFFIX_MAP corresponding to the mapped enum>.png
"""
for variant, fig in chart_dict.items():
chart_variant_name = VARIANT_SUFFIX_MAP.get(variant, None)
if chart_variant_name is None:
raise Exception(f"Variant {variant} not mapped in VARIANT_SUFFIX_MAP")
filename = f"{report}_{chart_type}_{chart_variant_name}.png"
full_path = os.path.join(base_path, filename)
try:
fig.write_image(full_path)
except Exception as e:
print(f"Error saving {filename}: {e}")

View File

@@ -0,0 +1,23 @@
import requests
from io import StringIO
import pandas as pd
def csv_url_to_dataframe(csv_url):
export_csv_stream = requests.get(
csv_url,
stream=True)
if export_csv_stream.status_code == 403:
print("Could not get export content, public IP not whitelisted.")
print(f'error: {export_csv_stream.text}')
print(f'CSV URL: {csv_url}\n\n')
raise Exception(
f"Public ip not whitelisted in Neoserra export module. Find your public IP address and update your Neoserra preferences accordingly. {export_csv_stream.text}")
elif export_csv_stream.status_code != 200:
raise Exception(
f"Got a status code other than 200 when trying to download export module csv. got={export_csv_stream.text}")
decoded_csv = export_csv_stream.content.decode('utf-8')
df = pd.read_csv(StringIO(decoded_csv))
return df

View File

@@ -0,0 +1,55 @@
# FILE: image_registry.py
# CREATED: 1/12/25
# AUTHOR: Vincent Allen
# CONTACT: valle276@live.kutztown.edu vincent@vtallen.com
# PURPOSE:
# This file implements a helper class called the ImageRegistry. This class is given an image folder and a report name.
# The class then ingests all .png files in the target folder. It only grabs the images that start with the defined report_name.
#
# Then a mapping is created using dictionaries in the manner of
# report name -> graph type -> graph variant
#
# Therefore any particular graph can be accessed using the .get function using a graph's type and variant. Blank variants are allowed, however the filename must have a trailing _ with nothing after it for this to work.
from pathlib import Path
from collections import defaultdict
class ImageRegistry:
def __init__(self, image_folder: str, report_name: str):
"""
Scans a folder and builds a mapping for a SPECIFIC report.
Hierarchy: {category: {variant: path}}
"""
self.folder_path = Path(image_folder)
self.report_name = report_name.lower()
self._registry = defaultdict(dict)
self._load_images()
def _load_images(self):
for img_path in self.folder_path.glob("*.png"):
filename = img_path.stem
# Expecting: report_category_variant
parts = filename.split("_")
if len(parts) == 3:
report, category, variant = [p.lower() for p in parts]
# Only register images that belong to the current report tool
if report == self.report_name:
self._registry[category][variant] = str(img_path)
else:
# Log a warning if a file doesn't follow the 3-part rule
print(f"Skipping: {filename} (Requires 3 parts: report_cat_var)")
def get(self, category: str, variant: str) -> str:
"""Fetches the specific image path."""
path = self._registry.get(category.lower(), {}).get(variant.lower())
if not path:
raise FileNotFoundError(
f"Missing Image for Report '{self.report_name}': "
f"Category '{category}' -> Variant '{variant}'"
)
return path