Skip to content

Commit 26b2d46

Browse files
authored
feat(store): add openai_response_store HttpFilter (praxis-proxy#582)
Signed-off-by: Sébastien Han <seb@redhat.com>
1 parent 9c4961c commit 26b2d46

25 files changed

Lines changed: 1744 additions & 11 deletions

File tree

Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ praxis-filter = { version = "0.3.1", path = "filter", package = "praxis-proxy-fi
7171
praxis-protocol = { version = "0.3.1", path = "protocol", package = "praxis-proxy-protocol" }
7272
praxis-tls = { version = "0.3.1", path = "tls", package = "praxis-proxy-tls" }
7373
praxis-test-utils = { path = "tests/utils" }
74+
secrecy = { version = "0.10.3", features = ["serde"] }
7475
serde = { version = "1.0.228", features = ["derive", "rc"] }
7576
serde_json = "1.0.150"
7677
rcgen = "0.14.8"

core/src/config/validate/listener/timeouts.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,12 @@
55
66
use tracing::debug;
77

8+
use super::super::cluster::MAX_TIMEOUT_MS;
89
use crate::{
910
config::{Listener, ProtocolKind},
1011
errors::ProxyError,
1112
};
1213

13-
use super::super::cluster::MAX_TIMEOUT_MS;
14-
1514
// -----------------------------------------------------------------------------
1615
// Timeout Constants
1716
// -----------------------------------------------------------------------------

examples/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ page.
133133
| [format-routing.yaml](configs/ai/openai/responses/format-routing.yaml) | Route by AI API format (Responses vs Chat Completions) |
134134
| [responses-routing.yaml](configs/ai/openai/responses/responses-routing.yaml) | Route Responses API by mode (stateless vs stateful) |
135135
| [request-validate.yaml](configs/ai/openai/responses/request-validate.yaml) | Validate Responses API requests and reject invalid parameter combinations |
136+
| [response-store.yaml](configs/ai/openai/responses/response-store.yaml) | Persist non-streaming Responses API responses to SQLite |
136137

137138
### Branching
138139

examples/configs/ai/openai/responses/full-flow.yaml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,16 @@ filter_chains:
6666
mode: x-praxis-responses-mode
6767

6868
- filter: openai_responses_validate
69-
# Future #354 orchestration filters would follow here
70-
# for stateful requests (gated by filter conditions on the
71-
# x-praxis-responses-mode header or responses.* metadata)
72-
# before all valid requests route to the inference backend:
73-
# - filter: response_store
69+
70+
- filter: openai_response_store
71+
backend: sqlite
72+
# In-memory:
73+
# database_url: "sqlite::memory:"
74+
# File-backed:
75+
database_url: "sqlite://responses.db?mode=rwc"
76+
responses_table: openai_responses
77+
conversations_table: openai_conversations
78+
# Future #354 orchestration filters would follow here:
7479
# - filter: rehydrate
7580
# - filter: compact
7681
# - filter: tool_parse
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Response Store
2+
#
3+
# Persists non-streaming Responses API responses to a SQLite
4+
# database. The `openai_responses_format` filter must run first to
5+
# classify the request body — `openai_response_store` reads its
6+
# metadata to decide whether to persist.
7+
#
8+
# Streaming responses (`stream: true`) are skipped; streaming
9+
# persistence will be handled by a separate filter. Non-2xx
10+
# responses and non-JSON content types are also skipped.
11+
#
12+
# The store is lazily initialized on the first qualifying
13+
# request. If initialization fails (bad URL, permissions),
14+
# the failure is permanent and the filter becomes a no-op.
15+
16+
listeners:
17+
- name: ai-gateway
18+
address: "127.0.0.1:8080"
19+
filter_chains: [responses-pipeline]
20+
21+
filter_chains:
22+
- name: responses-pipeline
23+
filters:
24+
- filter: openai_responses_format
25+
26+
- filter: openai_response_store
27+
backend: sqlite
28+
# In-memory:
29+
# database_url: "sqlite::memory:"
30+
# File-backed:
31+
database_url: "sqlite://responses.db?mode=rwc"
32+
responses_table: openai_responses
33+
conversations_table: openai_conversations
34+
35+
- filter: router
36+
routes:
37+
- path: "/v1/responses"
38+
cluster: "inference-backend"
39+
40+
- filter: load_balancer
41+
clusters:
42+
- name: "inference-backend"
43+
endpoints:
44+
- "127.0.0.1:8000"

filter/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ name = "praxis_filter"
1515

1616
[features]
1717
default = ["ai-inference"]
18-
ai-inference = ["dep:sqlx"]
18+
ai-inference = ["dep:secrecy", "dep:sqlx", "dep:tokio"]
1919
ext-proc = ["dep:praxis-proto", "dep:tonic", "dep:prost-wkt-types"]
2020

2121
[lints]
@@ -32,11 +32,13 @@ praxis-proto = { workspace = true, optional = true }
3232
rand = { workspace = true }
3333
prost-wkt-types = { workspace = true, optional = true }
3434
regex = { workspace = true }
35+
secrecy = { workspace = true, optional = true }
3536
serde = { workspace = true }
3637
serde_json = { workspace = true }
3738
serde_yaml = { workspace = true }
3839
sqlx = { workspace = true, optional = true }
3940
thiserror = { workspace = true }
41+
tokio = { workspace = true, optional = true }
4042
tonic = { workspace = true, optional = true }
4143
tracing = { workspace = true }
4244
zeroize = { workspace = true }

filter/src/builtins/http/ai/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ mod prompt_enrich;
1414
#[cfg(feature = "ai-inference")]
1515
#[allow(
1616
dead_code,
17-
reason = "store module is the foundation for upcoming response store filter"
17+
reason = "store module provides foundation for response store filter and upcoming CRUD endpoints"
1818
)]
1919
pub(crate) mod store;
2020
#[cfg(feature = "ai-inference")]
@@ -26,6 +26,8 @@ pub use inference::ModelToHeaderFilter;
2626
#[cfg(feature = "ai-inference")]
2727
pub use openai::OpenaiResponsesValidateFilter;
2828
#[cfg(feature = "ai-inference")]
29+
pub use openai::ResponseStoreFilter;
30+
#[cfg(feature = "ai-inference")]
2931
pub use openai::ResponsesFormatFilter;
3032
#[cfg(feature = "ai-inference")]
3133
pub use prompt_enrich::PromptEnrichFilter;

filter/src/builtins/http/ai/openai/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ pub(crate) mod responses;
77

88
#[cfg(feature = "ai-inference")]
99
pub use responses::OpenaiResponsesValidateFilter;
10-
pub use responses::ResponsesFormatFilter;
10+
pub use responses::{ResponseStoreFilter, ResponsesFormatFilter};

filter/src/builtins/http/ai/openai/responses/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ mod config;
2525
)]
2626
pub(crate) mod store;
2727

28+
pub use store::ResponseStoreFilter;
29+
2830
#[cfg(test)]
2931
#[allow(
3032
clippy::unwrap_used,

0 commit comments

Comments
 (0)