Skip to content

Commit 3b2b7b0

Browse files
authored
Merge pull request #85 from anfredette/refactor
refactor: Reorganize data/ directory into benchmarks, configuration, and archive
2 parents d48d257 + a9e746b commit 3b2b7b0

File tree

51 files changed

+68
-70
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+68
-70
lines changed

.gitignore

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,13 @@ logs/
6868
# Data
6969
*.sql
7070
!scripts/schema.sql
71-
data/benchmarks_redhat_performance.json
72-
data/benchmarks_GuideLLM.json
71+
data/benchmarks/performance/benchmarks_GuideLLM.json
7372

7473
# Presentation images (large files, regeneratable)
75-
data/benchmarks/models/presentation/
74+
data/archive/presentation/
7675

7776
# Old branding (replaced by neuralnav-*)
7877
docs/ai_assistant_*
7978

8079
# macOS system files (all directories)
8180
**/.DS_Store
82-
83-
# Redundant/unused data files (identified in data audit)
84-
data/slo_ranges_from_benchmarks.json
85-
data/research/benchmark_slo_ranges.json

CLAUDE.md

Lines changed: 18 additions & 16 deletions

Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ SIMULATOR_FULL_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(SIMULATOR_IMAGE):$(SIMULAT
4848
OLLAMA_MODEL ?= qwen2.5:7b
4949
KIND_CLUSTER_NAME ?= neuralnav
5050

51-
PGDUMP_INPUT ?= data/integ-oct-29.sql
52-
PGDUMP_OUTPUT ?= data/benchmarks_GuideLLM.json
51+
PGDUMP_INPUT ?= data/benchmarks/performance/integ-oct-29.sql
52+
PGDUMP_OUTPUT ?= data/benchmarks/performance/benchmarks_GuideLLM.json
5353

5454
BACKEND_DIR := backend
5555
UI_DIR := ui
@@ -419,32 +419,32 @@ db-init: db-start ## Initialize PostgreSQL schema
419419

420420
db-load-synthetic: db-start ## Load synthetic benchmark data (appends)
421421
@printf "$(BLUE)Loading synthetic benchmark data...$(NC)\n"
422-
@uv run python scripts/load_benchmarks.py data/benchmarks_synthetic.json
422+
@uv run python scripts/load_benchmarks.py data/benchmarks/performance/benchmarks_synthetic.json
423423
@printf "$(GREEN)✓ Synthetic data loaded$(NC)\n"
424424

425425
db-load-blis: db-start ## Load BLIS benchmark data (appends)
426426
@printf "$(BLUE)Loading BLIS benchmark data...$(NC)\n"
427-
@uv run python scripts/load_benchmarks.py data/benchmarks_BLIS.json
427+
@uv run python scripts/load_benchmarks.py data/benchmarks/performance/benchmarks_BLIS.json
428428
@printf "$(GREEN)✓ BLIS data loaded$(NC)\n"
429429

430430
db-load-estimated: db-start ## Load estimated performance benchmarks (appends)
431431
@printf "$(BLUE)Loading estimated performance data...$(NC)\n"
432-
@uv run python scripts/load_benchmarks.py data/benchmarks_estimated_performance.json
432+
@uv run python scripts/load_benchmarks.py data/benchmarks/performance/benchmarks_estimated_performance.json
433433
@printf "$(GREEN)✓ Estimated data loaded$(NC)\n"
434434

435435
db-load-interpolated: db-start ## Load interpolated benchmark data (appends)
436436
@printf "$(BLUE)Loading interpolated benchmark data...$(NC)\n"
437-
@uv run python scripts/load_benchmarks.py data/benchmarks_interpolated_v2.json
437+
@uv run python scripts/load_benchmarks.py data/benchmarks/performance/benchmarks_interpolated_v2.json
438438
@printf "$(GREEN)✓ Interpolated data loaded$(NC)\n"
439439

440440
db-load-guidellm: db-start ## Load GuideLLM benchmark data (appends)
441441
@printf "$(BLUE)Loading GuideLLM benchmark data...$(NC)\n"
442-
@if [ ! -f data/benchmarks_GuideLLM.json ]; then \
443-
printf "$(RED)✗ data/benchmarks_GuideLLM.json not found$(NC)\n"; \
442+
@if [ ! -f data/benchmarks/performance/benchmarks_GuideLLM.json ]; then \
443+
printf "$(RED)✗ data/benchmarks/performance/benchmarks_GuideLLM.json not found$(NC)\n"; \
444444
printf "$(YELLOW)Run 'make db-convert-pgdump' first to create it from a pg_dump file$(NC)\n"; \
445445
exit 1; \
446446
fi
447-
@uv run python scripts/load_benchmarks.py data/benchmarks_GuideLLM.json
447+
@uv run python scripts/load_benchmarks.py data/benchmarks/performance/benchmarks_GuideLLM.json
448448
@printf "$(GREEN)✓ GuideLLM data loaded$(NC)\n"
449449

450450
db-convert-pgdump: db-start ## Convert PostgreSQL dump to JSON format

README.md

Lines changed: 1 addition & 1 deletion

backend/src/api/routes/reference_data.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ async def list_use_cases():
6262
async def get_benchmarks():
6363
"""Get all 206 models benchmark data from opensource_all_benchmarks.csv."""
6464
try:
65-
csv_path = _get_data_path() / "benchmarks" / "models" / "opensource_all_benchmarks.csv"
65+
csv_path = _get_data_path() / "benchmarks" / "accuracy" / "opensource_all_benchmarks.csv"
6666

6767
if not csv_path.exists():
6868
logger.error(f"Benchmark CSV not found at: {csv_path}")
@@ -95,7 +95,7 @@ async def get_priority_weights():
9595
when setting initial weights based on priority dropdowns.
9696
"""
9797
try:
98-
json_path = _get_data_path() / "priority_weights.json"
98+
json_path = _get_data_path() / "configuration" / "priority_weights.json"
9999

100100
if not json_path.exists():
101101
logger.error(f"Priority weights config not found at: {json_path}")
@@ -137,7 +137,7 @@ async def get_weighted_scores(use_case: str):
137137
detail=f"Invalid use case: {use_case}. Valid options: {list(use_case_to_file.keys())}",
138138
)
139139

140-
csv_path = _get_data_path() / "business_context" / "use_case" / "weighted_scores" / filename
140+
csv_path = _get_data_path() / "benchmarks" / "accuracy" / "weighted_scores" / filename
141141

142142
if not csv_path.exists():
143143
logger.error(f"Weighted scores CSV not found at: {csv_path}")

backend/src/api/routes/specification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def _calculate_percentile_value(min_val: int, max_val: int, percentile: float =
2424

2525
def _get_slo_workload_path() -> Path:
2626
"""Get path to the SLO workload config file."""
27-
return Path(__file__).parent.parent.parent.parent.parent / "data" / "usecase_slo_workload.json"
27+
return Path(__file__).parent.parent.parent.parent.parent / "data" / "configuration" / "usecase_slo_workload.json"
2828

2929

3030
@router.get("/slo-defaults/{use_case}")

backend/src/knowledge_base/model_catalog.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def __init__(self, data_path: Path | None = None):
112112
data_path: Path to model_catalog.json
113113
"""
114114
if data_path is None:
115-
data_path = Path(__file__).parent.parent.parent.parent / "data" / "model_catalog.json"
115+
data_path = Path(__file__).parent.parent.parent.parent / "data" / "configuration" / "model_catalog.json"
116116

117117
self.data_path = data_path
118118
self._models: dict[str, ModelInfo] = {}

backend/src/knowledge_base/slo_templates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def __init__(self, data_path: Path | None = None):
7373
data_path: Path to slo_templates.json
7474
"""
7575
if data_path is None:
76-
data_path = Path(__file__).parent.parent.parent.parent / "data" / "slo_templates.json"
76+
data_path = Path(__file__).parent.parent.parent.parent / "data" / "configuration" / "slo_templates.json"
7777

7878
self.data_path = data_path
7979
self._templates: dict[str, SLOTemplate] = {}

backend/src/recommendation/quality/usecase_scorer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# Base path for weighted scores CSVs
2020
# Path: quality/usecase_scorer.py -> recommendation -> src -> backend -> project root -> data
2121
DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "data")
22-
WEIGHTED_SCORES_DIR = os.path.join(DATA_DIR, "business_context", "use_case", "weighted_scores")
22+
WEIGHTED_SCORES_DIR = os.path.join(DATA_DIR, "benchmarks", "accuracy", "weighted_scores")
2323

2424

2525
class UseCaseQualityScorer:

backend/src/recommendation/scorer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def _load_slo_ranges(self) -> dict:
9090
config_path = (
9191
Path(__file__).parent.parent.parent.parent
9292
/ "data"
93+
/ "configuration"
9394
/ "usecase_slo_workload.json"
9495
)
9596
try:

0 commit comments

Comments
 (0)