Skip to content

Commit 9efe060

Browse files
authored
Merge pull request llm-d-incubation#114 from amito/feature/model-catalog-benchmarks-1
Integrate Model Catalog as external data source
2 parents 5d9f022 + 8e0c448 commit 9efe060

30 files changed

+1900
-23
lines changed

Dockerfile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,12 @@ COPY data ./data
2929
# Copy scripts (schema init, benchmark loading — used by db-init Job)
3030
COPY scripts ./scripts
3131

32-
# Create directories for generated files
33-
RUN mkdir -p /app/generated_configs /app/logs/prompts && \
34-
chmod -R 770 /app/generated_configs /app/logs
32+
# Create non-root user and directories for generated files
33+
RUN groupadd --gid 1001 appuser && \
34+
useradd --uid 1001 --gid 0 --no-create-home appuser && \
35+
mkdir -p /app/generated_configs /app/logs/prompts && \
36+
chown -R appuser:0 /app && \
37+
chmod -R g=u /app/generated_configs /app/logs
3538

3639
# Set environment variables
3740
ENV PYTHONPATH=/app/src
@@ -41,6 +44,9 @@ ENV PATH="/app/.venv/bin:$PATH"
4144

4245
ARG MODEL_CATALOG_URL
4346

47+
# Switch to non-root user
48+
USER appuser
49+
4450
# Expose backend API port
4551
EXPOSE 8000
4652

deploy/kubernetes/backend.yaml

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,23 @@ spec:
1717
app.kubernetes.io/name: backend
1818
app.kubernetes.io/part-of: neuralnav
1919
spec:
20+
securityContext:
21+
runAsNonRoot: true
22+
seccompProfile:
23+
type: RuntimeDefault
2024
containers:
2125
- name: backend
2226
image: quay.io/neuralnav/neuralnav-backend:latest
27+
securityContext:
28+
allowPrivilegeEscalation: false
29+
capabilities:
30+
drop:
31+
- ALL
2332
ports:
2433
- containerPort: 8000
34+
envFrom:
35+
- configMapRef:
36+
name: neuralnav-config
2537
env:
2638
- name: POSTGRES_PASSWORD
2739
valueFrom:
@@ -30,8 +42,12 @@ spec:
3042
key: postgres-password
3143
- name: DATABASE_URL
3244
value: postgresql://neuralnav:$(POSTGRES_PASSWORD)@postgres:5432/neuralnav
33-
- name: MODEL_CATALOG_URL
34-
value: http://model-registry.odh-model-registries.svc:8080
45+
- name: MODEL_CATALOG_TOKEN
46+
valueFrom:
47+
secretKeyRef:
48+
name: neuralnav-secrets
49+
key: model-catalog-token
50+
optional: true
3551
- name: OLLAMA_HOST
3652
value: http://ollama:11434
3753
- name: OLLAMA_MODEL
@@ -42,6 +58,10 @@ spec:
4258
value: "8000"
4359
- name: CORS_ORIGINS
4460
value: "*"
61+
volumeMounts:
62+
- name: service-ca
63+
mountPath: /etc/pki/service-ca
64+
readOnly: true
4565
readinessProbe:
4666
httpGet:
4767
path: /health
@@ -63,6 +83,11 @@ spec:
6383
limits:
6484
cpu: "2"
6585
memory: 2Gi
86+
volumes:
87+
- name: service-ca
88+
configMap:
89+
name: neuralnav-service-ca
90+
optional: true
6691
---
6792
apiVersion: v1
6893
kind: Service

deploy/kubernetes/configmap.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: neuralnav-config
5+
namespace: neuralnav
6+
labels:
7+
app.kubernetes.io/part-of: neuralnav
8+
data:
9+
# Benchmark data source: "postgresql" (default) or "model_catalog" (RHOAI)
10+
NEURALNAV_BENCHMARK_SOURCE: postgresql
11+
12+
# Model Catalog connection (only used when NEURALNAV_BENCHMARK_SOURCE=model_catalog)
13+
MODEL_CATALOG_URL: https://model-catalog.rhoai-model-registries.svc:8443
14+
MODEL_CATALOG_SOURCE_ID: redhat_ai_validated_models
15+
MODEL_CATALOG_VERIFY_SSL: "true"
16+
# Path to the OpenShift service-serving CA bundle (mounted via service-ca ConfigMap)
17+
MODEL_CATALOG_CA_BUNDLE: /etc/pki/service-ca/service-ca.crt

deploy/kubernetes/deploy-all.sh

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,47 @@ set -e
33

44
echo "Deploying NeuralNav..."
55

6+
# Apply base infrastructure (everything except backend, which needs
7+
# service-ca and NetworkPolicy to be ready first)
68
oc apply -f deploy/kubernetes/namespace.yaml \
79
-f deploy/kubernetes/secrets.yaml \
10+
-f deploy/kubernetes/configmap.yaml \
11+
-f deploy/kubernetes/service-ca-configmap.yaml \
812
-f deploy/kubernetes/postgres.yaml \
913
-f deploy/kubernetes/ollama.yaml \
10-
-f deploy/kubernetes/backend.yaml \
1114
-f deploy/kubernetes/ui.yaml \
1215
-f deploy/kubernetes/route.yaml
1316

17+
# Cross-namespace NetworkPolicy (allows neuralnav backend -> Model Catalog)
18+
BENCHMARK_SOURCE=$(oc get configmap neuralnav-config -n neuralnav -o jsonpath='{.data.NEURALNAV_BENCHMARK_SOURCE}') || {
19+
echo "Warning: Failed to read neuralnav-config configmap, skipping Model Catalog network policy"
20+
BENCHMARK_SOURCE=""
21+
}
22+
if [ "$BENCHMARK_SOURCE" = "model_catalog" ]; then
23+
echo "Applying Model Catalog network policy..."
24+
oc apply -f deploy/kubernetes/networkpolicy-model-catalog.yaml
25+
26+
echo "Waiting for service-ca certificate injection..."
27+
for i in $(seq 1 30); do
28+
if oc get configmap neuralnav-service-ca -n neuralnav -o jsonpath='{.data.service-ca\.crt}' 2>/dev/null | grep -q "BEGIN CERTIFICATE"; then
29+
echo "Service CA certificate is ready."
30+
break
31+
fi
32+
if [ "$i" -eq 30 ]; then
33+
echo "Error: Timed out waiting for service-ca certificate injection" >&2
34+
exit 1
35+
fi
36+
sleep 2
37+
done
38+
else
39+
echo "Skipping Model Catalog network policy (benchmark source: ${BENCHMARK_SOURCE:-postgresql})"
40+
oc delete -f deploy/kubernetes/networkpolicy-model-catalog.yaml --ignore-not-found
41+
fi
42+
43+
# Apply backend after prerequisites are ready
44+
echo "Deploying backend..."
45+
oc apply -f deploy/kubernetes/backend.yaml
46+
1447
echo "Waiting for PostgreSQL to be ready..."
1548
oc wait --for=condition=ready pod -l app.kubernetes.io/name=postgres -n neuralnav --timeout=120s
1649

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Allow the neuralnav namespace to reach the Model Catalog API
2+
# in rhoai-model-registries. Applied separately because the target
3+
# namespace is outside of neuralnav.
4+
apiVersion: networking.k8s.io/v1
5+
kind: NetworkPolicy
6+
metadata:
7+
name: allow-neuralnav-to-model-catalog
8+
namespace: rhoai-model-registries
9+
labels:
10+
app.kubernetes.io/part-of: neuralnav
11+
spec:
12+
# Match the Model Catalog pods. Use a single label to avoid
13+
# fragility if the upstream deployment labels change.
14+
podSelector:
15+
matchLabels:
16+
app.kubernetes.io/name: model-catalog
17+
ingress:
18+
- from:
19+
- namespaceSelector:
20+
matchLabels:
21+
kubernetes.io/metadata.name: neuralnav
22+
podSelector:
23+
matchLabels:
24+
app.kubernetes.io/name: backend
25+
ports:
26+
- port: 8443
27+
protocol: TCP
28+
policyTypes:
29+
- Ingress

deploy/kubernetes/secrets.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@ metadata:
88
type: Opaque
99
stringData:
1010
postgres-password: changeme
11+
# Model Catalog bearer token (only needed when NEURALNAV_BENCHMARK_SOURCE=model_catalog)
12+
# On OpenShift, can use the ServiceAccount token instead
13+
model-catalog-token: ""
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# OpenShift injects the service-serving CA bundle into this ConfigMap.
2+
# The annotation triggers automatic injection of the cluster's service CA.
3+
apiVersion: v1
4+
kind: ConfigMap
5+
metadata:
6+
name: neuralnav-service-ca
7+
namespace: neuralnav
8+
labels:
9+
app.kubernetes.io/part-of: neuralnav
10+
annotations:
11+
service.beta.openshift.io/inject-cabundle: "true"

docker-compose.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ services:
6060
API_HOST: 0.0.0.0
6161
API_PORT: 8000
6262

63+
# Benchmark data source: "postgresql" (default) or "model_catalog" (RHOAI)
64+
NEURALNAV_BENCHMARK_SOURCE: ${NEURALNAV_BENCHMARK_SOURCE:-postgresql}
65+
66+
# Model Catalog connection (only used when NEURALNAV_BENCHMARK_SOURCE=model_catalog)
67+
MODEL_CATALOG_URL: ${MODEL_CATALOG_URL:-}
68+
MODEL_CATALOG_TOKEN: ${MODEL_CATALOG_TOKEN:-}
69+
MODEL_CATALOG_SOURCE_ID: ${MODEL_CATALOG_SOURCE_ID:-redhat_ai_validated_models}
70+
MODEL_CATALOG_VERIFY_SSL: ${MODEL_CATALOG_VERIFY_SSL:-true}
71+
MODEL_CATALOG_CA_BUNDLE: ${MODEL_CATALOG_CA_BUNDLE:-}
72+
6373
# Enable CORS for local development
6474
CORS_ORIGINS: http://localhost:8501,http://ui:8501
6575
volumes:

scripts/schema.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,18 @@ CREATE TABLE IF NOT EXISTS exported_summaries (
5151
profiler_type text,
5252
profiler_image text,
5353
profiler_tag text,
54+
source text NOT NULL DEFAULT 'local',
55+
model_uri text,
5456
CONSTRAINT exported_summaries_pkey PRIMARY KEY (id)
5557
);
5658

59+
-- Idempotent migrations for existing databases
60+
ALTER TABLE exported_summaries ADD COLUMN IF NOT EXISTS source text NOT NULL DEFAULT 'local';
61+
ALTER TABLE exported_summaries ADD COLUMN IF NOT EXISTS model_uri text;
62+
63+
-- Unique constraint on config_id (required for ON CONFLICT in upsert queries)
64+
CREATE UNIQUE INDEX IF NOT EXISTS idx_config_id_unique ON exported_summaries (config_id);
65+
5766
-- Create indexes for efficient lookups
5867
CREATE INDEX IF NOT EXISTS idx_benchmark_lookup
5968
ON exported_summaries(model_hf_repo, hardware, hardware_count, prompt_tokens, output_tokens);

src/neuralnav/api/dependencies.py

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
import asyncio
99
import logging
1010
import os
11-
from typing import cast
11+
import threading
12+
from typing import Any, cast
1213

1314
from fastapi import FastAPI, HTTPException, Request, status
1415
from starlette.concurrency import run_in_threadpool
@@ -29,6 +30,63 @@
2930
)
3031
logger = logging.getLogger(__name__)
3132

33+
_VALID_BENCHMARK_SOURCES = {"postgresql", "model_catalog"}
34+
35+
36+
def _get_benchmark_source_type() -> str:
37+
"""Get configured benchmark source type."""
38+
source = os.getenv("NEURALNAV_BENCHMARK_SOURCE", "postgresql").strip().lower()
39+
if source not in _VALID_BENCHMARK_SOURCES:
40+
logger.warning(
41+
"Unknown NEURALNAV_BENCHMARK_SOURCE='%s'; defaulting to 'postgresql'",
42+
source,
43+
)
44+
return "postgresql"
45+
return source
46+
47+
48+
def _sync_model_catalog_async(
49+
client: Any,
50+
database_url: str,
51+
model_catalog: ModelCatalog,
52+
quality_scorer: Any,
53+
) -> threading.Thread:
54+
"""Run Model Catalog sync in a background thread.
55+
56+
The app starts serving immediately (health probes, etc.)
57+
while catalog data syncs in the background.
58+
"""
59+
60+
def _sync() -> None:
61+
try:
62+
import psycopg2
63+
64+
from neuralnav.knowledge_base.model_catalog_sync import sync_model_catalog
65+
66+
logger.info("Background sync: loading Model Catalog data into PostgreSQL...")
67+
conn = psycopg2.connect(database_url)
68+
try:
69+
result = sync_model_catalog(
70+
client=client,
71+
conn=conn,
72+
model_catalog=model_catalog,
73+
quality_scorer=quality_scorer,
74+
)
75+
if result.errors:
76+
logger.warning(
77+
"Model Catalog sync completed with %d errors", len(result.errors)
78+
)
79+
else:
80+
logger.info("Background sync: Model Catalog data ready")
81+
finally:
82+
conn.close()
83+
except Exception:
84+
logger.exception("Background Model Catalog sync failed")
85+
86+
thread = threading.Thread(target=_sync, name="model-catalog-sync", daemon=True)
87+
thread.start()
88+
return thread
89+
3290

3391
# ---------------------------------------------------------------------------
3492
# Lifespan: initialize all singletons on app.state
@@ -37,12 +95,42 @@
3795

3896
def init_app_state(app: FastAPI) -> None:
3997
"""Initialize all singletons on app.state during lifespan startup."""
98+
source_type = _get_benchmark_source_type()
99+
100+
# Always create the same components — single code path
40101
app.state.model_catalog = ModelCatalog()
41102
app.state.slo_repo = SLOTemplateRepository()
42103
app.state.deployment_generator = DeploymentGenerator(simulator_mode=False)
43104
app.state.yaml_validator = YAMLValidator()
44105
app.state.cluster_managers = {} # dict[str, KubernetesClusterManager]
45-
app.state.workflow = RecommendationWorkflow()
106+
107+
if source_type == "model_catalog":
108+
from neuralnav.knowledge_base.model_catalog_client import ModelCatalogClient
109+
from neuralnav.recommendation.config_finder import ConfigFinder
110+
from neuralnav.recommendation.quality.usecase_scorer import UseCaseQualityScorer
111+
112+
client = ModelCatalogClient()
113+
app.state.model_catalog_client = client
114+
quality_scorer = UseCaseQualityScorer()
115+
116+
# Wire shared instances so sync updates propagate to recommendations
117+
config_finder = ConfigFinder(catalog=app.state.model_catalog, quality_scorer=quality_scorer)
118+
app.state.workflow = RecommendationWorkflow(config_finder=config_finder)
119+
120+
database_url = os.getenv(
121+
"DATABASE_URL",
122+
"postgresql://postgres:neuralnav@localhost:5432/neuralnav",
123+
)
124+
125+
logger.info("Using Model Catalog as benchmark source (syncing to PostgreSQL)")
126+
app.state.model_catalog_sync_thread = _sync_model_catalog_async(
127+
client, database_url, app.state.model_catalog, quality_scorer
128+
)
129+
else:
130+
app.state.model_catalog_client = None
131+
app.state.model_catalog_sync_thread = None
132+
app.state.workflow = RecommendationWorkflow()
133+
logger.info("Using PostgreSQL as benchmark source")
46134

47135

48136
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)