Skip to content

Commit 4420800

Browse files
authored
Justin/free limit stop logging (#5487)
1 parent 49f316d commit 4420800

File tree

26 files changed

+560
-64
lines changed

26 files changed

+560
-64
lines changed

.claude/settings.local.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
"WebFetch(domain:ai.google.dev)",
106106
"Bash(npx tsoa:*)",
107107
"Bash(python3:*)",
108+
"Bash(lsof:*)",
108109
"Bash(git mv:*)",
109110
"Bash(npm run test:rate-limit:*)",
110111
"Bash(npx eslint:*)",

bifrost/lib/clients/jawnTypes/private.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2580,6 +2580,7 @@ Json: JsonObject;
25802580
/** @enum {string} */
25812581
BodyMappingType: "OPENAI" | "NO_MAPPING" | "RESPONSES";
25822582
HeliconeMeta: {
2583+
freeLimitExceeded?: boolean;
25832584
aiGatewayBodyMapping?: components["schemas"]["BodyMappingType"];
25842585
providerModelId?: string;
25852586
gatewayModel?: string;
@@ -2646,6 +2647,21 @@ Json: JsonObject;
26462647
};
26472648
Log: {
26482649
response: {
2650+
model?: string;
2651+
/** Format: double */
2652+
reasoningTokens?: number;
2653+
/** Format: double */
2654+
completionAudioTokens?: number;
2655+
/** Format: double */
2656+
promptAudioTokens?: number;
2657+
/** Format: double */
2658+
promptCacheWriteTokens?: number;
2659+
/** Format: double */
2660+
promptCacheReadTokens?: number;
2661+
/** Format: double */
2662+
completionTokens?: number;
2663+
/** Format: double */
2664+
promptTokens?: number;
26492665
/** Format: double */
26502666
cost?: number;
26512667
/** Format: double */

packages/llm-mapper/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ type HeliconeMetadata = {
271271
promptVersion?: string | null;
272272
targetUrl?: string | null;
273273
requestReferrer?: string | null;
274+
storageLocation?: string | null;
274275
};
275276

276277
// UNORGANZIED

packages/llm-mapper/utils/getMappedContent.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ const metaDataFromHeliconeRequest = (
104104
promptVersion: heliconeRequest.prompt_version ?? null,
105105
targetUrl: heliconeRequest.target_url ?? null,
106106
requestReferrer: heliconeRequest.request_referrer ?? null,
107+
storageLocation: heliconeRequest.storage_location ?? null,
107108
};
108109
};
109110

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Add free_limit_exceeded column to track which month the free tier limit was exceeded
2+
-- Stores month in YYYY-MM format (e.g., "2026-01" = exceeded in January 2026)
3+
-- NULL = not exceeded or under limit
4+
-- When set, request/response bodies are not stored for non-PTB requests
5+
6+
ALTER TABLE organization
7+
ADD COLUMN IF NOT EXISTS free_limit_exceeded TEXT DEFAULT NULL;

valhalla/jawn/src/lib/handlers/HandlerContext.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ export class HandlerContext extends SetOnce {
1616
public legacyUsage: Usage;
1717
public usage?: ModelUsage;
1818
public costBreakdown?: CostBreakdown;
19-
public storageLocation?: "s3" | "clickhouse";
19+
public storageLocation?: "s3" | "clickhouse" | "not_stored_exceeded_free";
2020
public sizeBytes?: number;
2121
public rawLog: RawLog;
2222
public processedLog: ProcessedLog;
@@ -70,6 +70,16 @@ export type Log = {
7070
delayMs: number;
7171
cachedLatency?: number;
7272
cost?: number;
73+
// Token usage (from Worker when body isn't stored)
74+
promptTokens?: number;
75+
completionTokens?: number;
76+
promptCacheReadTokens?: number;
77+
promptCacheWriteTokens?: number;
78+
promptAudioTokens?: number;
79+
completionAudioTokens?: number;
80+
reasoningTokens?: number;
81+
// Model (from Worker when body isn't stored)
82+
model?: string;
7383
};
7484
};
7585

@@ -80,6 +90,7 @@ export type Usage = {
8090
promptAudioTokens?: number;
8191
completionTokens?: number;
8292
completionAudioTokens?: number;
93+
reasoningTokens?: number;
8394

8495
// anthropic cache control
8596
promptCacheWrite5m?: number;
@@ -139,6 +150,9 @@ export type HeliconeMeta = {
139150
gatewayModel?: string; // registry format
140151
providerModelId?: string; // provider format
141152
aiGatewayBodyMapping?: BodyMappingType; // body mapping type
153+
154+
// Free tier limit
155+
freeLimitExceeded?: boolean;
142156
};
143157

144158
export type KafkaMessageContents = {

valhalla/jawn/src/lib/handlers/LoggingHandler.ts

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ type RequestRecord = {
4343
organizationId: string;
4444
requestBody: string;
4545
responseBody: string;
46-
location: "s3" | "clickhouse";
46+
location: "s3" | "clickhouse" | "not_stored_exceeded_free";
4747
};
4848

4949
// Legacy type definitions for deleted tables
@@ -171,9 +171,16 @@ export class LoggingHandler extends AbstractLogHandler {
171171
});
172172

173173
context.sizeBytes = size ?? 0;
174-
// if we know size is def less than 10mb use clickhouse otherwise just stick to s3
175-
context.storageLocation =
176-
size && size <= S3_MIN_SIZE_THRESHOLD ? "clickhouse" : "s3";
174+
// Determine storage location:
175+
// 1. If free tier limit exceeded, bodies were not stored
176+
// 2. If size is small enough, use clickhouse
177+
// 3. Otherwise use s3
178+
if (context.message.heliconeMeta.freeLimitExceeded) {
179+
context.storageLocation = "not_stored_exceeded_free";
180+
} else {
181+
context.storageLocation =
182+
size && size <= S3_MIN_SIZE_THRESHOLD ? "clickhouse" : "s3";
183+
}
177184

178185
const requestMapped = this.mapRequest(context);
179186
const responseMapped = this.mapResponse(context);
@@ -634,6 +641,13 @@ export class LoggingHandler extends AbstractLogHandler {
634641
responseText: string;
635642
} {
636643
try {
644+
// If free tier limit exceeded, bodies were not stored
645+
if (context.storageLocation === "not_stored_exceeded_free") {
646+
return {
647+
requestText: "",
648+
responseText: "",
649+
};
650+
}
637651
if (context.storageLocation === "clickhouse") {
638652
return {
639653
requestText: JSON.stringify(context.processedLog.request.body),

valhalla/jawn/src/lib/handlers/RateLimitHandler.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ import {
88
import { RateLimitStore } from "../stores/RateLimitStore";
99
import { AbstractLogHandler } from "./AbstractLogHandler";
1010
import { HandlerContext } from "./HandlerContext";
11+
import { dbQueryClickhouse, dbExecute } from "../shared/db/dbExecute";
12+
13+
const FREE_TIER_LIMIT = 10_000;
14+
const FREE_TIER_CHECK_PROBABILITY = 0.01; // 1% of requests
1115

1216
export class RateLimitHandler extends AbstractLogHandler {
1317
private rateLimitStore: RateLimitStore;
@@ -30,6 +34,17 @@ export class RateLimitHandler extends AbstractLogHandler {
3034
}
3135

3236
try {
37+
// Probabilistic free tier limit check (1% of requests)
38+
// - If freeLimitExceeded is null: check with 1% probability
39+
// - If freeLimitExceeded is current month: skip check (already exceeded this month)
40+
// - If freeLimitExceeded is old month: check with 1% probability (may have reset)
41+
if (
42+
context.orgParams.tier === "free" &&
43+
this.shouldCheckFreeTierLimit(context.orgParams.freeLimitExceeded)
44+
) {
45+
await this.checkAndUpdateFreeTierLimit(context.orgParams.id);
46+
}
47+
3348
const { data: isRateLimited, error: rateLimitErr } = this.rateLimitEntry(
3449
context.orgParams.id,
3550
context.orgParams.percentLog
@@ -58,6 +73,79 @@ export class RateLimitHandler extends AbstractLogHandler {
5873
}
5974
}
6075

76+
private getCurrentMonth(): string {
77+
return new Date().toISOString().slice(0, 7); // "YYYY-MM"
78+
}
79+
80+
private shouldCheckFreeTierLimit(freeLimitExceeded: string | null): boolean {
81+
if (freeLimitExceeded === null) {
82+
// Not exceeded - check with 1% probability
83+
return Math.random() < FREE_TIER_CHECK_PROBABILITY;
84+
}
85+
86+
if (freeLimitExceeded === this.getCurrentMonth()) {
87+
// Already exceeded this month - skip check entirely
88+
return false;
89+
}
90+
91+
// Old month - check with 1% probability to see if still over limit
92+
return Math.random() < FREE_TIER_CHECK_PROBABILITY;
93+
}
94+
95+
private async checkAndUpdateFreeTierLimit(orgId: string): Promise<void> {
96+
try {
97+
const count = await this.get30DayRequestCount(orgId);
98+
if (count >= FREE_TIER_LIMIT) {
99+
// Over limit - set to current month
100+
await this.setFreeLimitExceeded(orgId, true);
101+
console.log(
102+
`[FreeTierLimit] Limit exceeded for org ${orgId}: ${count} requests in last 30 days`
103+
);
104+
} else {
105+
// Under limit - clear the flag
106+
await this.setFreeLimitExceeded(orgId, false);
107+
console.log(
108+
`[FreeTierLimit] Limit cleared for org ${orgId}: ${count} requests in last 30 days`
109+
);
110+
}
111+
} catch (error) {
112+
// Don't fail the request if the check fails
113+
console.error(`Error checking free tier limit for org ${orgId}:`, error);
114+
}
115+
}
116+
117+
private async get30DayRequestCount(orgId: string): Promise<number> {
118+
const { data, error } = await dbQueryClickhouse<{ count: number }>(
119+
`SELECT COUNT(*) as count FROM request_response_rmt
120+
WHERE organization_id = {val_0:String}
121+
AND request_created_at >= now() - INTERVAL 30 DAY`,
122+
[orgId]
123+
);
124+
125+
if (error || !data || data.length === 0) {
126+
console.error(`Error getting 30-day request count for org ${orgId}:`, error);
127+
return 0;
128+
}
129+
130+
return data[0].count ?? 0;
131+
}
132+
133+
private async setFreeLimitExceeded(
134+
orgId: string,
135+
exceeded: boolean
136+
): Promise<void> {
137+
// Store as month string (e.g., "2026-01") or null
138+
const value = exceeded ? this.getCurrentMonth() : null;
139+
const { error } = await dbExecute(
140+
`UPDATE organization SET free_limit_exceeded = $1 WHERE id = $2`,
141+
[value, orgId]
142+
);
143+
144+
if (error) {
145+
console.error(`Error setting free_limit_exceeded for org ${orgId}:`, error);
146+
}
147+
}
148+
61149
public rateLimitEntry(
62150
orgId: string,
63151
percentLog: number

valhalla/jawn/src/lib/handlers/ResponseBodyHandler.ts

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,10 @@ export class ResponseBodyHandler extends AbstractLogHandler {
7979
context.message.log.response.status =
8080
processedResponseBody.data.statusOverride;
8181
}
82-
context.processedLog.response.model = getModelFromResponse(
83-
processedResponseBody.data?.processedBody
84-
);
82+
// Get model from response body, or fall back to Worker-provided model when body isn't stored
83+
context.processedLog.response.model =
84+
getModelFromResponse(processedResponseBody.data?.processedBody) ||
85+
context.message.log.response.model;
8586

8687
const definedModel =
8788
calculateModel(
@@ -113,37 +114,55 @@ export class ResponseBodyHandler extends AbstractLogHandler {
113114
}
114115

115116
// Set legacy usage values captured from body processors
117+
// Fall back to Worker-provided tokens when body isn't stored (free tier limit exceeded)
116118
const legacyUsage =
117119
processedResponseBody.data?.usage ??
118120
processedResponseBody.data?.processedBody?.usage ??
119121
{};
120-
context.legacyUsage.completionTokens = legacyUsage.completionTokens;
121-
context.legacyUsage.promptTokens = legacyUsage.promptTokens;
122-
context.legacyUsage.totalTokens = legacyUsage.totalTokens;
122+
context.legacyUsage.completionTokens =
123+
legacyUsage.completionTokens ??
124+
context.message.log.response.completionTokens;
125+
context.legacyUsage.promptTokens =
126+
legacyUsage.promptTokens ?? context.message.log.response.promptTokens;
127+
context.legacyUsage.totalTokens =
128+
legacyUsage.totalTokens ??
129+
((context.legacyUsage.promptTokens ?? 0) +
130+
(context.legacyUsage.completionTokens ?? 0) ||
131+
undefined);
123132
context.legacyUsage.heliconeCalculated = legacyUsage.heliconeCalculated;
133+
// Fall back to Worker-provided cache/audio tokens when body isn't stored
124134
context.legacyUsage.promptCacheWriteTokens =
125-
legacyUsage.promptCacheWriteTokens;
135+
legacyUsage.promptCacheWriteTokens ??
136+
context.message.log.response.promptCacheWriteTokens;
126137
context.legacyUsage.promptCacheReadTokens =
127-
legacyUsage.promptCacheReadTokens;
128-
context.legacyUsage.promptAudioTokens = legacyUsage.promptAudioTokens;
138+
legacyUsage.promptCacheReadTokens ??
139+
context.message.log.response.promptCacheReadTokens;
140+
context.legacyUsage.promptAudioTokens =
141+
legacyUsage.promptAudioTokens ??
142+
context.message.log.response.promptAudioTokens;
129143
context.legacyUsage.completionAudioTokens =
130-
legacyUsage.completionAudioTokens;
144+
legacyUsage.completionAudioTokens ??
145+
context.message.log.response.completionAudioTokens;
146+
context.legacyUsage.reasoningTokens =
147+
legacyUsage.reasoningTokens ??
148+
context.message.log.response.reasoningTokens;
131149
context.legacyUsage.promptCacheWrite5m = legacyUsage.promptCacheWrite5m;
132150
context.legacyUsage.promptCacheWrite1h = legacyUsage.promptCacheWrite1h;
133151
if (typeof legacyUsage.cost === "number" && legacyUsage.cost) {
134152
context.legacyUsage.cost = legacyUsage.cost;
135153
} else {
154+
// Use context.legacyUsage which has Worker-provided tokens as fallback
136155
const cost = modelCost({
137156
model: context.processedLog.model ?? "",
138157
provider: context.message.log.request.provider ?? "",
139-
sum_prompt_tokens: legacyUsage.promptTokens ?? 0,
140-
prompt_cache_write_tokens: legacyUsage.promptCacheWriteTokens ?? 0,
141-
prompt_cache_read_tokens: legacyUsage.promptCacheReadTokens ?? 0,
142-
prompt_audio_tokens: legacyUsage.promptAudioTokens ?? 0,
143-
sum_completion_tokens: legacyUsage.completionTokens ?? 0,
144-
completion_audio_tokens: legacyUsage.completionAudioTokens ?? 0,
145-
prompt_cache_write_5m: legacyUsage.promptCacheWrite5m ?? 0,
146-
prompt_cache_write_1h: legacyUsage.promptCacheWrite1h ?? 0,
158+
sum_prompt_tokens: context.legacyUsage.promptTokens ?? 0,
159+
prompt_cache_write_tokens: context.legacyUsage.promptCacheWriteTokens ?? 0,
160+
prompt_cache_read_tokens: context.legacyUsage.promptCacheReadTokens ?? 0,
161+
prompt_audio_tokens: context.legacyUsage.promptAudioTokens ?? 0,
162+
sum_completion_tokens: context.legacyUsage.completionTokens ?? 0,
163+
completion_audio_tokens: context.legacyUsage.completionAudioTokens ?? 0,
164+
prompt_cache_write_5m: context.legacyUsage.promptCacheWrite5m ?? 0,
165+
prompt_cache_write_1h: context.legacyUsage.promptCacheWrite1h ?? 0,
147166
});
148167

149168
context.legacyUsage.cost = cost;

valhalla/jawn/src/lib/handlers/S3ReaderHandler.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,16 @@ export class S3ReaderHandler extends AbstractLogHandler {
4545

4646
if (content.error || !content.data) {
4747
if (content.error?.notFoundErr) {
48-
// Not found is unrecoverable, we will have no request/response to log
49-
// Do not process further, do not send to DLQ
50-
return ok(`Content not found in S3: ${signedUrl.data}`);
48+
// Content not found in S3 - this can happen when:
49+
// 1. Free tier limit exceeded (bodies not stored)
50+
// 2. Omit headers set (bodies not stored)
51+
// Continue processing with empty bodies - metadata will still be logged
52+
console.log(
53+
`S3 content not found for request ${context.message.log.request.id}, continuing with empty bodies`
54+
);
55+
context.rawLog.rawRequestBody = undefined;
56+
context.rawLog.rawResponseBody = undefined;
57+
return await super.handle(context);
5158
}
5259
return err(
5360
`Error fetching content from S3: ${JSON.stringify(content.error)}`

0 commit comments

Comments
 (0)