Skip to content

Commit a9ae525

Browse files
Merge branch 'main' into fix/registry-name-field-validation
2 parents 52cf7ed + 126a220 commit a9ae525

46 files changed

Lines changed: 13062 additions & 1016 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

packages/contract-tests/scripts/run-go-bff-consumer.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,12 @@ fi
198198

199199
log_info "Starting Mock BFF server on port $PORT..."
200200

201-
log_info "Starting Mock BFF server with go run"
201+
BFF_BINARY="$(mktemp -d)/bff-test"
202+
log_info "Building Mock BFF binary..."
203+
go build -o "$BFF_BINARY" ./cmd
202204

203-
go run ./cmd $BFF_MOCK_FLAGS --port "$PORT" --allowed-origins="*" > "$BFF_LOG_FILE" 2>&1 &
205+
log_info "Starting Mock BFF server"
206+
"$BFF_BINARY" $BFF_MOCK_FLAGS --port "$PORT" --allowed-origins="*" > "$BFF_LOG_FILE" 2>&1 &
204207

205208
BFF_PID=$!
206209
echo "$BFF_PID" > "$RESULTS_DIR/bff.pid"
@@ -215,6 +218,7 @@ cleanup() {
215218
sleep 2
216219
kill -9 "$BFF_PID" 2>/dev/null || true
217220
fi
221+
rm -f "$BFF_BINARY"
218222
}
219223
trap cleanup EXIT INT TERM
220224

packages/gen-ai/bff/.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,7 @@ __debug*
66

77
# Local MLflow data (created by mock mode)
88
.mlflow/
9-
.mlflow-*/
9+
.mlflow-*/
10+
11+
# Local Llama Stack runtime data (created by mock/test mode)
12+
testdata/llamastack/.data/

packages/gen-ai/bff/Makefile

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,27 @@ MLFLOW_VERSION ?= 3.9.0
2222
MLFLOW_PORT ?= 5001
2323
MLFLOW_DATA ?= $(shell pwd)/.mlflow
2424

25+
# Llama Stack test server configuration (used by mock/test mode only)
26+
TEST_LLAMA_STACK_VERSION ?= 0.6.0
27+
TEST_LLAMA_STACK_PORT ?= 18321
28+
TEST_LLAMA_STACK_DIR ?= testdata/llamastack
29+
TEST_LLAMA_STACK_DATA ?= $(shell pwd)/$(TEST_LLAMA_STACK_DIR)/.data
30+
TEST_LLAMA_STACK_RECORDINGS ?= $(shell pwd)/$(TEST_LLAMA_STACK_DIR)
31+
TEST_LLAMA_STACK_CONFIG ?= $(TEST_LLAMA_STACK_DIR)/config.yaml
32+
TEST_LLAMA_STACK_TEST_ID ?= bff/$(TEST_LLAMA_STACK_DIR)/test.py::record
33+
34+
# Provider and bare model IDs (source of truth, used by config yaml)
35+
TEST_LLAMA_STACK_PROVIDER ?= gemini
36+
TEST_LLAMA_STACK_MODEL_ID ?= models/gemini-2.5-flash
37+
TEST_LLAMA_STACK_EMBEDDING_MODEL_ID ?= models/gemini-embedding-001
38+
TEST_LLAMA_STACK_EMBEDDING_DIMENSION ?= 128
39+
TEST_LLAMA_STACK_SHIELD_MODEL_ID ?= models/gemini-2.5-flash
40+
41+
# Prefixed model refs (derived from above, used by Go tests and shield)
42+
TEST_LLAMA_STACK_MODEL ?= $(TEST_LLAMA_STACK_PROVIDER)/$(TEST_LLAMA_STACK_MODEL_ID)
43+
TEST_LLAMA_STACK_EMBEDDING_MODEL ?= $(TEST_LLAMA_STACK_PROVIDER)/$(TEST_LLAMA_STACK_EMBEDDING_MODEL_ID)
44+
TEST_LLAMA_STACK_SHIELD_MODEL ?= $(TEST_LLAMA_STACK_PROVIDER)/$(TEST_LLAMA_STACK_SHIELD_MODEL_ID)
45+
2546
.PHONY: all
2647
all: build
2748

@@ -53,6 +74,15 @@ vet: . ## Runs static analysis tools on source files and reports suspicious con
5374
test: fmt vet envtest uv ## Runs the full test suite.
5475
ENVTEST_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
5576
MLFLOW_PORT=$(MLFLOW_PORT) MLFLOW_VERSION=$(MLFLOW_VERSION) \
77+
LLAMA_STACK_TEST_ID=$(TEST_LLAMA_STACK_TEST_ID) \
78+
TEST_LLAMA_STACK_PORT=$(TEST_LLAMA_STACK_PORT) TEST_LLAMA_STACK_VERSION=$(TEST_LLAMA_STACK_VERSION) \
79+
TEST_LLAMA_STACK_PROVIDER=$(TEST_LLAMA_STACK_PROVIDER) \
80+
TEST_LLAMA_STACK_MODEL=$(TEST_LLAMA_STACK_MODEL) \
81+
TEST_LLAMA_STACK_EMBEDDING_MODEL=$(TEST_LLAMA_STACK_EMBEDDING_MODEL) \
82+
TEST_LLAMA_STACK_MODEL_ID=$(TEST_LLAMA_STACK_MODEL_ID) \
83+
TEST_LLAMA_STACK_EMBEDDING_MODEL_ID=$(TEST_LLAMA_STACK_EMBEDDING_MODEL_ID) \
84+
TEST_LLAMA_STACK_EMBEDDING_DIMENSION=$(TEST_LLAMA_STACK_EMBEDDING_DIMENSION) \
85+
TEST_LLAMA_STACK_SHIELD_MODEL=$(TEST_LLAMA_STACK_SHIELD_MODEL) \
5686
go test ./...
5787

5888
.PHONY: build
@@ -96,6 +126,49 @@ mlflow-clean: mlflow-down ## Remove local MLflow data.
96126
rm -rf $(MLFLOW_DATA) $(MLFLOW_DATA)-test
97127
@echo "MLflow data cleaned"
98128

129+
##@ Llama Stack
130+
131+
.PHONY: llamastack-up
132+
llamastack-up: uv ## Start local Llama Stack server in replay mode (Ctrl+C to stop).
133+
@rm -rf $(TEST_LLAMA_STACK_DATA)
134+
@mkdir -p $(TEST_LLAMA_STACK_DATA)
135+
@echo "Starting Llama Stack in REPLAY mode on port $(TEST_LLAMA_STACK_PORT)..."
136+
SQLITE_STORE_DIR=$(TEST_LLAMA_STACK_DATA) \
137+
GEMINI_API_KEY=dummy-key-for-replay \
138+
LLAMA_STACK_TEST_INFERENCE_MODE=replay \
139+
LLAMA_STACK_TEST_RECORDING_DIR=$(TEST_LLAMA_STACK_RECORDINGS) \
140+
LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server \
141+
LLAMA_STACK_TEST_ID=$(TEST_LLAMA_STACK_TEST_ID) \
142+
TEST_LLAMA_STACK_PROVIDER=$(TEST_LLAMA_STACK_PROVIDER) \
143+
TEST_LLAMA_STACK_MODEL_ID=$(TEST_LLAMA_STACK_MODEL_ID) \
144+
TEST_LLAMA_STACK_EMBEDDING_MODEL_ID=$(TEST_LLAMA_STACK_EMBEDDING_MODEL_ID) \
145+
TEST_LLAMA_STACK_EMBEDDING_DIMENSION=$(TEST_LLAMA_STACK_EMBEDDING_DIMENSION) \
146+
TEST_LLAMA_STACK_EMBEDDING_MODEL=$(TEST_LLAMA_STACK_EMBEDDING_MODEL) \
147+
TEST_LLAMA_STACK_SHIELD_MODEL=$(TEST_LLAMA_STACK_SHIELD_MODEL) \
148+
$(UV) run --with llama-stack==$(TEST_LLAMA_STACK_VERSION) --with-requirements $(TEST_LLAMA_STACK_DIR)/requirements.txt \
149+
llama stack run $(TEST_LLAMA_STACK_CONFIG) --port $(TEST_LLAMA_STACK_PORT)
150+
151+
.PHONY: llamastack-down
152+
llamastack-down: ## Stop local Llama Stack server and remove ephemeral data.
153+
@-lsof -t -i :$(TEST_LLAMA_STACK_PORT) | xargs kill 2>/dev/null || true
154+
@sleep 2
155+
@-lsof -t -i :$(TEST_LLAMA_STACK_PORT) | xargs kill -9 2>/dev/null || true
156+
@rm -rf $(TEST_LLAMA_STACK_DATA)
157+
@echo "Llama Stack server stopped (runtime data cleaned)"
158+
159+
.PHONY: llamastack-record
160+
llamastack-record: ## Record Llama Stack fixtures by running Go tests against real Gemini (needs GEMINI_API_KEY).
161+
@if [ -z "$${GEMINI_API_KEY:-}" ]; then \
162+
echo "ERROR: GEMINI_API_KEY must be set for recording."; \
163+
echo "Usage: GEMINI_API_KEY=<key> make llamastack-record"; \
164+
exit 1; \
165+
fi
166+
@rm -rf $(TEST_LLAMA_STACK_RECORDINGS)/recordings
167+
@echo "Cleared previous recordings"
168+
LLAMA_STACK_TEST_INFERENCE_MODE=record $(MAKE) test
169+
@COUNT=$$(find $(TEST_LLAMA_STACK_RECORDINGS)/recordings -name "*.json" 2>/dev/null | wc -l | tr -d ' '); \
170+
echo " Recording complete! $${COUNT} JSON files";
171+
99172
##@ Dependencies
100173

101174
## Location to install dependencies to

packages/gen-ai/bff/cmd/main_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ func TestGeneralBffConfiguration(t *testing.T) {
5151
envVar: "LLAMA_STACK_URL",
5252
varType: "string",
5353
defaultValue: "",
54-
testValue: testutil.TestLlamaStackURL,
54+
testValue: testutil.GetTestLlamaStackURL(),
5555
expectedDefault: "",
56-
expectedSet: testutil.TestLlamaStackURL,
56+
expectedSet: testutil.GetTestLlamaStackURL(),
5757
},
5858
{
5959
name: "LOG_LEVEL environment variable",

packages/gen-ai/bff/internal/api/api_suite_test.go

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ import (
3131

3232
"github.com/opendatahub-io/gen-ai/internal/config"
3333
"github.com/opendatahub-io/gen-ai/internal/integrations/kubernetes/k8smocks"
34+
"github.com/opendatahub-io/gen-ai/internal/integrations/llamastack/lsmocks"
3435
"github.com/opendatahub-io/gen-ai/internal/integrations/mlflow/mlflowmocks"
36+
"github.com/opendatahub-io/gen-ai/internal/testutil"
3537
)
3638

3739
// Package-level test infrastructure - initialized once, shared by all tests.
@@ -93,18 +95,21 @@ func TestAPIHandlers(t *testing.T) {
9395

9496
// SharedTestContext holds common test infrastructure for HTTP tests
9597
type SharedTestContext struct {
96-
App *App
97-
Server *httptest.Server
98-
HTTPClient *http.Client
99-
BaseURL string
100-
Logger *slog.Logger
101-
mlflowState *mlflowmocks.MLflowState
98+
App *App
99+
Server *httptest.Server
100+
HTTPClient *http.Client
101+
BaseURL string
102+
Logger *slog.Logger
103+
mlflowState *mlflowmocks.MLflowState
104+
llamaStackState *lsmocks.LlamaStackState
102105
}
103106

104107
var testCtx *SharedTestContext
105108

106109
// BeforeSuite sets up test infrastructure (envtest and HTTP server) for all Ginkgo tests.
107110
var _ = BeforeSuite(func() {
111+
testutil.ConfigureProductionEnvFromTest()
112+
108113
By("Setting up envtest environment")
109114

110115
logf.SetLogger(zap.New(zap.UseDevMode(true)))
@@ -213,6 +218,21 @@ var _ = BeforeSuite(func() {
213218
Timeout: 30 * time.Second,
214219
}
215220

221+
// Start Llama Stack as a child process (SetupLlamaStack also seeds test data)
222+
By("Starting LlamaStack")
223+
lsState, lsErr := lsmocks.SetupLlamaStack(logger)
224+
Expect(lsErr).NotTo(HaveOccurred())
225+
Expect(lsState).NotTo(BeNil())
226+
Expect(lsState.Seed).NotTo(BeNil(), "SeedData must return a SeedResult")
227+
DeferCleanup(func() {
228+
By("stopping LlamaStack server")
229+
lsmocks.CleanupLlamaStackState(
230+
lsState,
231+
func(format string, args ...any) { GinkgoWriter.Printf("ERROR: "+format+"\n", args...) },
232+
func(format string, args ...any) { GinkgoWriter.Printf(format+"\n", args...) },
233+
)
234+
})
235+
216236
// Start MLflow as a child process (SetupMLflow also seeds sample prompts)
217237
By("Starting MLflow")
218238
mlflowState, mlflowErr := mlflowmocks.SetupMLflow(logger)
@@ -229,12 +249,13 @@ var _ = BeforeSuite(func() {
229249
})
230250

231251
testCtx = &SharedTestContext{
232-
App: app,
233-
Server: server,
234-
HTTPClient: httpClient,
235-
BaseURL: server.URL,
236-
Logger: logger,
237-
mlflowState: mlflowState,
252+
App: app,
253+
Server: server,
254+
HTTPClient: httpClient,
255+
BaseURL: server.URL,
256+
Logger: logger,
257+
mlflowState: mlflowState,
258+
llamaStackState: lsState,
238259
}
239260

240261
By("HTTP test environment setup complete")

packages/gen-ai/bff/internal/api/app.go

Lines changed: 25 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,8 @@ type App struct {
4848
rootCAs *x509.CertPool
4949
clusterDomain string
5050
fileUploadJobTracker *services.FileUploadJobTracker
51-
// Used only when MockK8sClient is enabled
52-
testEnvState *k8smocks.TestEnvState
53-
// Used only when MockMLflowClient is enabled and MLflow is started as a child process
54-
mlflowState *mlflowmocks.MLflowState
51+
// cleanupFuncs holds shutdown callbacks for mock processes (envtest, MLflow, LlamaStack)
52+
cleanupFuncs []func()
5553
}
5654

5755
func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
@@ -100,6 +98,9 @@ func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
10098
}
10199
logger.Info("Detected dashboard namespace", "namespace", dashboardNamespace)
102100

101+
// Track cleanup functions for mock processes
102+
var cleanupFuncs []func()
103+
103104
// Initialize LlamaStack client factory - clients will be created per request
104105
var llamaStackClientFactory llamastack.LlamaStackClientFactory
105106
if cfg.MockLSClient {
@@ -127,12 +128,11 @@ func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
127128
}
128129

129130
var k8sFactory k8s.KubernetesClientFactory
130-
var testEnvState *k8smocks.TestEnvState
131131
if cfg.MockK8sClient {
132132
logger.Info("Using mocked Kubernetes client")
133133
var ctrlClient client.Client
134134
ctx, cancel := context.WithCancel(context.Background())
135-
testEnvState, ctrlClient, err = k8smocks.SetupEnvTest(k8smocks.TestEnvInput{
135+
testEnvState, ctrlClient, err := k8smocks.SetupEnvTest(k8smocks.TestEnvInput{
136136
Users: k8smocks.DefaultTestUsers,
137137
Logger: logger,
138138
Ctx: ctx,
@@ -145,6 +145,13 @@ func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
145145
cancel()
146146
return nil, fmt.Errorf("failed to setup envtest: %w", err)
147147
}
148+
cleanupFuncs = append(cleanupFuncs, func() {
149+
logger.Info("stopping test environment...")
150+
k8smocks.CleanupTestEnvState(testEnvState,
151+
func(format string, args ...any) { logger.Error(fmt.Sprintf(format, args...)) },
152+
func(format string, args ...any) { logger.Info(fmt.Sprintf(format, args...)) },
153+
)
154+
})
148155
k8sFactory, err = k8smocks.NewMockedKubernetesClientFactory(ctrlClient, testEnvState, cfg, logger)
149156
if err != nil {
150157
// Clean up partially initialized test environment
@@ -176,11 +183,18 @@ func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
176183

177184
// Initialize MLflow client factory
178185
var mlflowFactory mlflowpkg.MLflowClientFactory
179-
var mlflowState *mlflowmocks.MLflowState
180186
if cfg.MockMLflowClient {
181-
mlflowState, err = mlflowmocks.SetupMLflow(logger)
187+
mlflowState, err := mlflowmocks.SetupMLflow(logger)
182188
if err != nil {
183189
logger.Warn("MLflow mock server not available, MLflow endpoints will fail on request", "error", err)
190+
} else {
191+
cleanupFuncs = append(cleanupFuncs, func() {
192+
logger.Info("stopping MLflow server...")
193+
mlflowmocks.CleanupMLflowState(mlflowState,
194+
func(format string, args ...any) { logger.Error(fmt.Sprintf(format, args...)) },
195+
func(format string, args ...any) { logger.Info(fmt.Sprintf(format, args...)) },
196+
)
197+
})
184198
}
185199
mlflowFactory = mlflowmocks.NewMockClientFactory()
186200
} else {
@@ -228,8 +242,7 @@ func NewApp(cfg config.EnvConfig, logger *slog.Logger) (*App, error) {
228242
rootCAs: rootCAs,
229243
clusterDomain: clusterDomain,
230244
fileUploadJobTracker: fileUploadJobTracker,
231-
testEnvState: testEnvState,
232-
mlflowState: mlflowState,
245+
cleanupFuncs: cleanupFuncs,
233246
}
234247
return app, nil
235248
}
@@ -255,33 +268,9 @@ func resolveMLflowURL(cfg config.EnvConfig, logger *slog.Logger) string {
255268

256269
func (app *App) Shutdown() error {
257270
app.logger.Info("shutting down app...")
258-
259-
if app.testEnvState != nil {
260-
app.logger.Info("stopping test environment...")
261-
k8smocks.CleanupTestEnvState(
262-
app.testEnvState,
263-
func(format string, args ...interface{}) {
264-
app.logger.Error(fmt.Sprintf(format, args...))
265-
},
266-
func(format string, args ...interface{}) {
267-
app.logger.Info(fmt.Sprintf(format, args...))
268-
},
269-
)
271+
for i := len(app.cleanupFuncs) - 1; i >= 0; i-- {
272+
app.cleanupFuncs[i]()
270273
}
271-
272-
if app.mlflowState != nil {
273-
app.logger.Info("stopping MLflow server...")
274-
mlflowmocks.CleanupMLflowState(
275-
app.mlflowState,
276-
func(format string, args ...any) {
277-
app.logger.Error(fmt.Sprintf(format, args...))
278-
},
279-
func(format string, args ...any) {
280-
app.logger.Info(fmt.Sprintf(format, args...))
281-
},
282-
)
283-
}
284-
285274
return nil
286275
}
287276

packages/gen-ai/bff/internal/api/async_moderation.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,8 @@ import (
99
"sync"
1010
"time"
1111

12-
"github.com/openai/openai-go/v2/packages/ssestream"
13-
"github.com/openai/openai-go/v2/responses"
1412
"github.com/opendatahub-io/gen-ai/internal/constants"
1513
"github.com/opendatahub-io/gen-ai/internal/integrations/llamastack"
16-
"github.com/opendatahub-io/gen-ai/internal/integrations/llamastack/lsmocks"
1714
)
1815

1916
// ModerationChunk represents a chunk of text awaiting or completed moderation
@@ -287,15 +284,6 @@ func (app *App) handleStreamingResponseAsync(w http.ResponseWriter, r *http.Requ
287284
// Create streaming response
288285
stream, err := app.repositories.Responses.CreateResponseStream(ctx, params)
289286
if err != nil {
290-
// Check if this is a mock streaming error - delegate to mock client
291-
if _, ok := err.(*lsmocks.MockStreamError); ok {
292-
if client, clientErr := app.repositories.Responses.GetClient(r.Context()); clientErr == nil {
293-
if mockClient, ok := client.(*lsmocks.MockLlamaStackClient); ok {
294-
mockClient.HandleMockStreaming(ctx, w, flusher, params)
295-
return
296-
}
297-
}
298-
}
299287
app.handleLlamaStackClientError(w, r, err)
300288
return
301289
}
@@ -566,7 +554,7 @@ func (app *App) handleStreamingResponseAsync(w http.ResponseWriter, r *http.Requ
566554
}
567555

568556
// streamWithoutModeration handles streaming when moderation is disabled
569-
func (app *App) streamWithoutModeration(w http.ResponseWriter, flusher http.Flusher, stream *ssestream.Stream[responses.ResponseStreamEventUnion], ctx context.Context) {
557+
func (app *App) streamWithoutModeration(w http.ResponseWriter, flusher http.Flusher, stream llamastack.ResponseStreamIterator, ctx context.Context) {
570558
sendEvent := func(eventData []byte) error {
571559
_, err := fmt.Fprintf(w, "data: %s\n\n", eventData)
572560
if err != nil {

0 commit comments

Comments
 (0)