-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Expand file tree
/
Copy pathmigrations.ts
More file actions
2231 lines (2103 loc) · 126 KB
/
Copy pathmigrations.ts
File metadata and controls
2231 lines (2103 loc) · 126 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import crypto from 'crypto';
import Database from 'better-sqlite3';
import { initEncryptionKey } from '../lib/crypto.js';
import { applyModelPricing } from './model-pricing.js';
export function migrateDbSchema(db: Database.Database) {
createTables(db);
initEncryptionKey(db);
seedModels(db);
migrateModels(db);
migrateModelsV2(db);
migrateModelsV3Ranks(db);
migrateModelsV4(db);
migrateModelsV5(db);
migrateModelsV6(db);
migrateModelsV7(db);
migrateModelsV8(db);
migrateModelsV9(db);
migrateModelsV10(db);
migrateModelsV11(db);
migrateModelsV12(db);
migrateModelsV13(db);
migrateModelsV14(db);
migrateModelsV15(db);
migrateModelsV16Vision(db);
migrateModelsV17IntelligenceTiers(db);
migrateModelsV18OpenCodeZen(db);
migrateModelsV19Gemma4(db);
migrateModelsV20KiloFree(db);
migrateModelsV21PruneDead(db);
migrateModelsV22Tools(db);
migrateModelsV23FreeTierAudit(db);
migrateModelsV24ZenRefresh(db);
migrateModelsV25ZenDeadPromos(db);
// V25 is the LAST model-data migration. Since the Premium live catalog
// shipped (June 2026), model/limit DATA is maintained in the published
// catalog (served signed by the catalog service) and reaches installs via
// catalog-sync — premium on the live tier within ~12h, free at the monthly
// promote. Shipping model data as a
// migration would hand it to free users on their next binary update,
// bypassing the tier gate. Migrations from here on are baseline/code-level
// only (schema, family rules, provider plumbing, quirk-seed corrections).
// After all model migrations: add/refresh paid-equivalent pricing
// (drives the realistic "Est. savings" analytics stat).
applyModelPricing(db);
migrateEmbeddingsV1(db);
migrateQuirksV1(db);
ensureUnifiedKey(db);
migrateProfilesInit(db);
}
function createTables(db: Database.Database) {
db.exec(`
CREATE TABLE IF NOT EXISTS models (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
model_id TEXT NOT NULL,
display_name TEXT NOT NULL,
intelligence_rank INTEGER NOT NULL,
speed_rank INTEGER NOT NULL,
size_label TEXT NOT NULL DEFAULT '',
rpm_limit INTEGER,
rpd_limit INTEGER,
tpm_limit INTEGER,
tpd_limit INTEGER,
monthly_token_budget TEXT NOT NULL DEFAULT '',
context_window INTEGER,
enabled INTEGER NOT NULL DEFAULT 1,
supports_vision INTEGER NOT NULL DEFAULT 0,
UNIQUE(platform, model_id)
);
CREATE TABLE IF NOT EXISTS api_keys (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
label TEXT NOT NULL DEFAULT '',
encrypted_key TEXT NOT NULL,
iv TEXT NOT NULL,
auth_tag TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'unknown',
enabled INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
last_checked_at TEXT
);
CREATE TABLE IF NOT EXISTS requests (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
model_id TEXT NOT NULL,
key_id INTEGER,
status TEXT NOT NULL,
input_tokens INTEGER NOT NULL DEFAULT 0,
output_tokens INTEGER NOT NULL DEFAULT 0,
latency_ms INTEGER NOT NULL DEFAULT 0,
error TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS rate_limit_usage (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
model_id TEXT NOT NULL,
key_id INTEGER NOT NULL,
kind TEXT NOT NULL CHECK (kind IN ('request', 'tokens')),
tokens INTEGER NOT NULL DEFAULT 0,
created_at_ms INTEGER NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS rate_limit_cooldowns (
platform TEXT NOT NULL,
model_id TEXT NOT NULL,
key_id INTEGER NOT NULL,
expires_at_ms INTEGER NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (platform, model_id, key_id)
);
CREATE TABLE IF NOT EXISTS fallback_config (
id INTEGER PRIMARY KEY AUTOINCREMENT,
model_db_id INTEGER NOT NULL REFERENCES models(id),
priority INTEGER NOT NULL,
enabled INTEGER NOT NULL DEFAULT 1,
UNIQUE(model_db_id)
);
CREATE TABLE IF NOT EXISTS profiles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
emoji TEXT NOT NULL DEFAULT '',
color TEXT NOT NULL DEFAULT '#6366f1',
type TEXT NOT NULL DEFAULT 'custom',
is_favorite INTEGER NOT NULL DEFAULT 0,
sort_order INTEGER NOT NULL DEFAULT 0,
auto_sort TEXT,
layout_config TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS profile_models (
id INTEGER PRIMARY KEY AUTOINCREMENT,
profile_id INTEGER NOT NULL REFERENCES profiles(id) ON DELETE CASCADE,
model_db_id INTEGER NOT NULL REFERENCES models(id) ON DELETE CASCADE,
priority INTEGER NOT NULL,
enabled INTEGER NOT NULL DEFAULT 1,
UNIQUE(profile_id, model_db_id)
);
CREATE TABLE IF NOT EXISTS settings (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
-- Dashboard accounts (email + password) gating the /api/* admin surface (#35).
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
email TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS sessions (
token_hash TEXT PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
expires_at_ms INTEGER NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions(user_id);
CREATE INDEX IF NOT EXISTS idx_requests_created_at ON requests(created_at);
CREATE INDEX IF NOT EXISTS idx_requests_platform ON requests(platform);
CREATE INDEX IF NOT EXISTS idx_rate_limit_usage_lookup ON rate_limit_usage(platform, model_id, key_id, kind, created_at_ms);
CREATE INDEX IF NOT EXISTS idx_rate_limit_cooldowns_expires ON rate_limit_cooldowns(expires_at_ms);
CREATE INDEX IF NOT EXISTS idx_api_keys_platform ON api_keys(platform);
`);
ensureRequestKeyIdColumn(db);
ensureApiKeysBaseUrlColumn(db);
ensureModelsKeyIdColumn(db);
ensureRequestGroupIdAndAttempt(db);
ensureRequestTtfbColumn(db);
ensureRequestRequestedModelColumn(db);
}
// `requested_model` is the model id the CLIENT pinned in the request body.
// NULL when the request was auto-routed ('auto' or omitted model field).
// requested_model = model_id means the pin was honored; a different model_id
// means rate limits or failures forced a failover to another model.
function ensureRequestRequestedModelColumn(db: Database.Database) {
const columns = db.prepare('PRAGMA table_info(requests)').all() as { name: string }[];
if (!columns.some(col => col.name === 'requested_model')) {
db.prepare('ALTER TABLE requests ADD COLUMN requested_model TEXT').run();
}
}
// Request tracing uses one group id per inbound request and the current retry
// loop index for each logged attempt. Both are nullable so older rows remain
// valid, but new rows can carry chronological trace metadata.
function ensureRequestGroupIdAndAttempt(db: Database.Database) {
const columns = db.prepare('PRAGMA table_info(requests)').all() as { name: string }[];
if (!columns.some(col => col.name === 'request_group_id')) {
db.prepare('ALTER TABLE requests ADD COLUMN request_group_id TEXT').run();
}
if (!columns.some(col => col.name === 'attempt_number')) {
db.prepare('ALTER TABLE requests ADD COLUMN attempt_number INTEGER').run();
}
}
// `ttfb_ms` is the time-to-first-byte for streaming responses (ms from dispatch
// to the first chunk). NULL for non-streaming or pre-existing rows. Feeds the
// bandit router's latency axis (server/src/services/scoring.ts).
function ensureRequestTtfbColumn(db: Database.Database) {
const columns = db.prepare('PRAGMA table_info(requests)').all() as { name: string }[];
if (!columns.some(col => col.name === 'ttfb_ms')) {
db.prepare('ALTER TABLE requests ADD COLUMN ttfb_ms INTEGER').run();
}
}
function ensureRequestKeyIdColumn(db: Database.Database) {
const columns = db.prepare('PRAGMA table_info(requests)').all() as { name: string }[];
if (!columns.some(col => col.name === 'key_id')) {
db.prepare('ALTER TABLE requests ADD COLUMN key_id INTEGER').run();
}
db.prepare('CREATE INDEX IF NOT EXISTS idx_requests_key_id ON requests(key_id)').run();
}
// `base_url` is the upstream endpoint for the user-configured 'custom' provider
// (#117). NULL for every built-in platform — they use their hardcoded base URL.
function ensureApiKeysBaseUrlColumn(db: Database.Database) {
const columns = db.prepare('PRAGMA table_info(api_keys)').all() as { name: string }[];
if (!columns.some(col => col.name === 'base_url')) {
db.prepare('ALTER TABLE api_keys ADD COLUMN base_url TEXT').run();
}
}
// `key_id` binds a custom model to the api_keys row that carries ITS endpoint,
// so several custom providers can coexist (#212). NULL for built-in platforms
// (any key of the platform serves any of its models).
function ensureModelsKeyIdColumn(db: Database.Database) {
const columns = db.prepare('PRAGMA table_info(models)').all() as { name: string }[];
if (!columns.some(col => col.name === 'key_id')) {
db.prepare('ALTER TABLE models ADD COLUMN key_id INTEGER').run();
// Backfill: bind pre-existing custom models to the (single) legacy custom
// endpoint key so they keep routing to the URL they were created for.
db.prepare(`
UPDATE models
SET key_id = (SELECT id FROM api_keys WHERE platform = 'custom' ORDER BY id LIMIT 1)
WHERE platform = 'custom' AND key_id IS NULL
`).run();
}
}
function seedModels(db: Database.Database) {
const count = db.prepare('SELECT COUNT(*) as cnt FROM models').get() as { cnt: number };
if (count.cnt > 0) return;
const insert = db.prepare(`
INSERT INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
// NOTE: Limits current as of April 2026. See migrateModels() for in-place updates.
const models = [
// Google — gemini-2.5-flash free quotas were cut Dec 2025 (now ~20 RPD, budget much lower than before)
['google', 'gemini-2.5-pro', 'Gemini 2.5 Pro', 1, 8, 'Frontier', 5, 100, 250000, null, '~12M', 1048576],
['google', 'gemini-2.5-flash', 'Gemini 2.5 Flash', 4, 5, 'Large', 10, 20, 250000, null, '~3M', 1048576],
['google', 'gemini-2.5-flash-lite', 'Gemini 2.5 Flash-Lite', 8, 3, 'Medium', 15, 1000, 250000, null, '~120M', 1048576],
// OpenRouter — upgraded DeepSeek R1 -> V3.1 (stronger reasoning); default RPD ~200
['openrouter', 'deepseek/deepseek-v3.1:free', 'DeepSeek V3.1 (free)', 2, 10, 'Frontier', 20, 200, null, null, '~6M', 131072],
['openrouter', 'moonshotai/kimi-k2:free', 'Kimi K2 (free)', 2, 9, 'Frontier', 20, 200, null, null, '~6M', 131072],
['openrouter', 'qwen/qwen3-coder:free', 'Qwen3 Coder (free)', 3, 9, 'Frontier', 20, 200, null, null, '~6M', 262144],
['openrouter', 'z-ai/glm-4.5-air:free', 'GLM-4.5 Air (free)', 4, 9, 'Large', 20, 200, null, null, '~6M', 131072],
// Cerebras — same 30 RPM / 1M TPD free pool; adding frontier coder, Llama 4 Maverick, GPT-OSS
['cerebras', 'qwen-3-coder-480b', 'Qwen3-Coder 480B', 2, 1, 'Frontier', 30, null, 60000, 1000000, '~30M', 131072],
['cerebras', 'llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 3, 1, 'Frontier', 30, null, 60000, 1000000, '~30M', 131072],
['cerebras', 'qwen3-235b', 'Qwen3 235B', 3, 1, 'Large', 30, null, 60000, 1000000, '~30M', 8192],
['cerebras', 'gpt-oss-120b', 'GPT-OSS 120B', 3, 1, 'Large', 30, null, 60000, 1000000, '~30M', 131072],
// GitHub Models — GPT-4o replaced with GPT-5 (same free tier key)
['github', 'openai/gpt-5', 'GPT-5 (GitHub)', 1, 7, 'Frontier', 10, 50, null, null, '~18M', 128000],
// SambaNova — 70B RPM bumped to 20
['sambanova', 'Meta-Llama-3.3-70B-Instruct', 'Llama 3.3 70B', 6, 9, 'Large', 20, null, null, 200000, '~6M', 8192],
// Mistral — Experiment pool ~1B tokens/mo shared across all models
['mistral', 'mistral-large-latest', 'Mistral Large 3', 7, 8, 'Large', 2, null, 500000, null, '~50-100M', 131072],
['mistral', 'magistral-medium-latest', 'Magistral Medium', 4, 8, 'Large', 2, null, 500000, null, '~50-100M', 40000],
['mistral', 'codestral-latest', 'Codestral', 6, 6, 'Medium', 2, null, 500000, null, '~50-100M', 32000],
// Groq — scout TPM corrected to 6k (not 30k)
['groq', 'llama-3.3-70b-versatile', 'Llama 3.3 70B', 9, 2, 'Medium', 30, 1000, 6000, 500000, '~15M', 131072],
['groq', 'llama-4-scout-17b-16e-instruct', 'Llama 4 Scout', 10, 2, 'Medium', 30, 1000, 6000, 1000000, '~30M', 131072],
// NVIDIA NIM — moved to credit-based model in 2025; no longer truly recurring monthly. Disabled by default.
['nvidia', 'meta/llama-3.1-70b-instruct', 'Llama 3.1 70B (NV)', 11, 6, 'Large', 40, null, null, null, 'credits-based', 131072],
// Cohere — trial tier is 1000 calls/mo total → realistic budget 1-2M
['cohere', 'command-r-plus-08-2024', 'Command R+ (08-2024)', 12, 11, 'Large', 20, 33, null, null, '~1-2M', 131072],
['cloudflare', '@cf/meta/llama-3.1-70b-instruct', 'Llama 3.1 70B (CF)', 13, 11, 'Medium', null, null, null, null, '~18-45M', 131072],
// Hugging Face — free Inference credits are ~$0.10/mo → budget closer to 1-3M on a 70B model
['huggingface', 'accounts/fireworks/models/llama-v3p3-70b-instruct', 'Llama 3.3 70B (HF)', 14, 11, 'Medium', null, null, null, null, '~1-3M', 131072],
// New providers — recurring monthly free tiers, no card required
['zhipu', 'glm-4.5-flash', 'GLM-4.5 Flash', 5, 4, 'Large', null, null, null, 1000000, '~30M', 131072],
['moonshot', 'kimi-latest', 'Kimi Latest', 4, 8, 'Large', 60, null, null, 500000, '~15M', 200000],
['minimax', 'MiniMax-M1', 'MiniMax M1', 5, 8, 'Large', 20, null, 1000000, null, '~30M', 200000],
];
const insertMany = db.transaction(() => {
for (const m of models) {
insert.run(...m);
}
});
insertMany();
// Seed default fallback config from models
const allModels = db.prepare('SELECT id, intelligence_rank FROM models ORDER BY intelligence_rank ASC').all() as { id: number; intelligence_rank: number }[];
const insertFallback = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
const insertFallbacks = db.transaction(() => {
for (let i = 0; i < allModels.length; i++) {
insertFallback.run(allModels[i].id, i + 1);
}
});
insertFallbacks();
console.log(`Seeded ${models.length} models and fallback config`);
}
/**
* Idempotent migration to bring existing DBs up to the April 2026 pool.
* Covers: replaces outdated models (DeepSeek R1 → V3.1, GPT-4o → GPT-5),
* corrects stale rate-limits / monthly budgets, adds new smarter models
* and three new providers (Zhipu, Moonshot, MiniMax).
*/
function migrateModels(db: Database.Database) {
// 1) Replace outdated models in-place (preserves fallback_config & any references)
const renames: Array<[string, string, string, string, number, string, number | null, number | null, number]> = [
// platform, oldModelId, newModelId, newDisplayName, intelligenceRank, monthlyBudget, rpdLimit, contextWindow, sizeLabelPriority(unused)
];
const renameStmt = db.prepare(`
UPDATE models
SET model_id = ?, display_name = ?, intelligence_rank = ?,
monthly_token_budget = ?, rpd_limit = COALESCE(?, rpd_limit),
context_window = COALESCE(?, context_window),
size_label = COALESCE(?, size_label)
WHERE platform = ? AND model_id = ?
`);
// DeepSeek R1 (free) -> DeepSeek V3.1 (free)
renameStmt.run('deepseek/deepseek-v3.1:free', 'DeepSeek V3.1 (free)', 2, '~6M', 200, 131072, 'Frontier', 'openrouter', 'deepseek/deepseek-r1:free');
// GitHub GPT-4o -> GPT-5
renameStmt.run('openai/gpt-5', 'GPT-5 (GitHub)', 1, '~18M', null, 128000, 'Frontier', 'github', 'gpt-4o');
// 2) Correct stale limits / budgets on existing rows
db.prepare(`UPDATE models SET rpd_limit = 20, monthly_token_budget = '~3M' WHERE platform = 'google' AND model_id = 'gemini-2.5-flash'`).run();
db.prepare(`UPDATE models SET rpm_limit = 20 WHERE platform = 'sambanova' AND model_id = 'Meta-Llama-3.3-70B-Instruct'`).run();
db.prepare(`UPDATE models SET tpm_limit = 6000 WHERE platform = 'groq' AND model_id = 'llama-4-scout-17b-16e-instruct'`).run();
db.prepare(`UPDATE models SET monthly_token_budget = '~1-2M' WHERE platform = 'cohere' AND model_id = 'command-r-plus-08-2024'`).run();
db.prepare(`UPDATE models SET monthly_token_budget = '~1-3M' WHERE platform = 'huggingface' AND model_id = 'accounts/fireworks/models/llama-v3p3-70b-instruct'`).run();
// NVIDIA moved to credit model — disable and label accordingly
db.prepare(`UPDATE models SET monthly_token_budget = 'credits-based', enabled = 0 WHERE platform = 'nvidia' AND model_id = 'meta/llama-3.1-70b-instruct'`).run();
// 3) Insert new models (UNIQUE(platform, model_id) makes this idempotent)
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const newModels: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
// Cerebras — same free pool as qwen3-235b
['cerebras', 'qwen-3-coder-480b', 'Qwen3-Coder 480B', 2, 1, 'Frontier', 30, null, 60000, 1000000, '~30M', 131072],
['cerebras', 'llama-4-maverick-17b-128e-instruct', 'Llama 4 Maverick', 3, 1, 'Frontier', 30, null, 60000, 1000000, '~30M', 131072],
['cerebras', 'gpt-oss-120b', 'GPT-OSS 120B', 3, 1, 'Large', 30, null, 60000, 1000000, '~30M', 131072],
// OpenRouter free tier
['openrouter', 'deepseek/deepseek-v3.1:free', 'DeepSeek V3.1 (free)', 2, 10, 'Frontier', 20, 200, null, null, '~6M', 131072],
['openrouter', 'moonshotai/kimi-k2:free', 'Kimi K2 (free)', 2, 9, 'Frontier', 20, 200, null, null, '~6M', 131072],
['openrouter', 'qwen/qwen3-coder:free', 'Qwen3 Coder (free)', 3, 9, 'Frontier', 20, 200, null, null, '~6M', 262144],
['openrouter', 'z-ai/glm-4.5-air:free', 'GLM-4.5 Air (free)', 4, 9, 'Large', 20, 200, null, null, '~6M', 131072],
// Mistral Experiment pool — shared ~1B/mo across models
['mistral', 'magistral-medium-latest', 'Magistral Medium', 4, 8, 'Large', 2, null, 500000, null, '~50-100M', 40000],
['mistral', 'codestral-latest', 'Codestral', 6, 6, 'Medium', 2, null, 500000, null, '~50-100M', 32000],
// New providers
['zhipu', 'glm-4.5-flash', 'GLM-4.5 Flash', 5, 4, 'Large', null, null, null, 1000000, '~30M', 131072],
['moonshot', 'kimi-latest', 'Kimi Latest', 4, 8, 'Large', 60, null, null, 500000, '~15M', 200000],
['minimax', 'MiniMax-M1', 'MiniMax M1', 5, 8, 'Large', 20, null, 1000000, null, '~30M', 200000],
];
const apply = db.transaction(() => {
for (const m of newModels) insert.run(...m);
// Ensure every model has a fallback_config row (new inserts + any orphans)
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL
ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFallback = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) {
addFallback.run(missing[i].id, maxPriority + i + 1);
}
}
});
apply();
}
/**
* Second-pass migration after live-testing every model against its provider.
* Corrects model IDs verified wrong, removes models not actually available on
* the current free tier, and adds real :free OpenRouter models found in the
* live catalog (April 2026).
*/
function migrateModelsV2(db: Database.Database) {
// Helper: delete a model and its fallback_config entry (FK is RESTRICT-by-default)
const deleteModel = db.prepare(`DELETE FROM models WHERE platform = ? AND model_id = ?`);
const deleteFallback = db.prepare(`
DELETE FROM fallback_config WHERE model_db_id IN (
SELECT id FROM models WHERE platform = ? AND model_id = ?
)
`);
const removals: Array<[string, string]> = [
// GitHub free tier does NOT include GPT-5 (only catalog-listed). Revert handled below.
// Cerebras: qwen-3-coder-480b and llama-4-maverick not on free tier; gpt-oss-120b is listed
// but requires special access — our key gets 404. Remove all three.
['cerebras', 'qwen-3-coder-480b'],
['cerebras', 'llama-4-maverick-17b-128e-instruct'],
['cerebras', 'gpt-oss-120b'],
// These OpenRouter :free variants do not exist in the live catalog (April 2026)
['openrouter', 'deepseek/deepseek-v3.1:free'],
['openrouter', 'moonshotai/kimi-k2:free'],
];
const applyRemovals = db.transaction(() => {
for (const [p, m] of removals) {
deleteFallback.run(p, m);
deleteModel.run(p, m);
}
});
applyRemovals();
// GitHub: gpt-5 is in the model catalog but returns "unavailable_model" on free tier
// inference. Revert to gpt-4o which works. This only runs if the gpt-5 row exists.
db.prepare(`
UPDATE models
SET model_id = 'gpt-4o', display_name = 'GPT-4o', intelligence_rank = 5,
size_label = 'Large', context_window = 8000, monthly_token_budget = '~18M'
WHERE platform = 'github' AND model_id = 'openai/gpt-5'
`).run();
// Groq: scout requires the meta-llama/ publisher prefix
db.prepare(`
UPDATE models SET model_id = 'meta-llama/llama-4-scout-17b-16e-instruct'
WHERE platform = 'groq' AND model_id = 'llama-4-scout-17b-16e-instruct'
`).run();
// Add real OpenRouter :free models that exist in the live catalog
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const additions: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
// Frontier-tier free models verified in OR catalog 2026-04
['openrouter', 'nvidia/nemotron-3-super-120b-a12b:free', 'Nemotron 3 Super 120B (free)', 2, 9, 'Frontier', 20, 200, null, null, '~6M', 262144],
['openrouter', 'qwen/qwen3-next-80b-a3b-instruct:free', 'Qwen3-Next 80B (free)', 3, 9, 'Large', 20, 200, null, null, '~6M', 262144],
['openrouter', 'minimax/minimax-m2.5:free', 'MiniMax M2.5 (free)', 3, 9, 'Large', 20, 200, null, null, '~6M', 196608],
['openrouter', 'google/gemma-4-31b-it:free', 'Gemma 4 31B (free)', 5, 9, 'Medium', 20, 200, null, null, '~6M', 262144],
];
const applyAdditions = db.transaction(() => {
for (const a of additions) insert.run(...a);
// Fallback entries for new models
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
applyAdditions();
}
/**
* Re-rank intelligence based on April 2026 coding + agentic tool-use benchmarks:
* SWE-bench Verified, Terminal-Bench 2, TAU-Bench, Aider Polyglot.
* Higher rank = weaker. Ties are allowed (same weights across providers).
*/
function migrateModelsV3Ranks(db: Database.Database) {
const setRank = db.prepare(`UPDATE models SET intelligence_rank = ? WHERE platform = ? AND model_id = ?`);
const ranks: Array<[number, string, string]> = [
// #1-10 frontier coders / agents
[1, 'openrouter', 'minimax/minimax-m2.5:free'], // SWE-V ~80%, TB2 ~57%
[2, 'openrouter', 'qwen/qwen3-coder:free'], // SWE-V ~70%
[3, 'openrouter', 'qwen/qwen3-next-80b-a3b-instruct:free'], // SWE-V ~70.6%
[4, 'moonshot', 'kimi-latest'], // K2: SWE-V ~71%
[5, 'cerebras', 'qwen-3-235b-a22b-instruct-2507'], // SWE-V ~65-72%
[6, 'google', 'gemini-2.5-pro'], // SWE-V 63.8%, Aider 83%
[7, 'openrouter', 'z-ai/glm-4.5-air:free'], // ~58% SWE-V (distill of 4.5)
[8, 'openrouter', 'openai/gpt-oss-120b:free'], // SWE-V 62.4%
[9, 'openrouter', 'nvidia/nemotron-3-super-120b-a12b:free'], // SWE-V 53.7%
[10, 'minimax', 'MiniMax-M1'], // M1 predecessor, ~45-55%
// #11-15 mid-tier specialists
[11, 'mistral', 'codestral-latest'], // HumanEval 86.6%
[12, 'mistral', 'mistral-large-latest'],
[13, 'mistral', 'magistral-medium-latest'], // reasoning, not code-tuned
[14, 'google', 'gemini-2.5-flash'],
[15, 'zhipu', 'glm-4.5-flash'],
// #16 Llama 3.3 70B — identical weights across providers (tie)
[16, 'groq', 'llama-3.3-70b-versatile'],
[16, 'sambanova', 'Meta-Llama-3.3-70B-Instruct'],
[16, 'openrouter', 'meta-llama/llama-3.3-70b-instruct:free'],
[16, 'huggingface', 'accounts/fireworks/models/llama-v3p3-70b-instruct'],
// #17-23 weaker
[17, 'openrouter', 'nousresearch/hermes-3-llama-3.1-405b:free'], // L3.1 base with tool-use tune
[18, 'groq', 'meta-llama/llama-4-scout-17b-16e-instruct'], // multimodal focus
[19, 'openrouter', 'google/gemma-4-31b-it:free'],
[20, 'google', 'gemini-2.5-flash-lite'],
[21, 'github', 'gpt-4o'], // Aug 2024, SWE-V ~33%
[22, 'nvidia', 'meta/llama-3.1-70b-instruct'], // older Llama 3.1 tune
[22, 'cloudflare', '@cf/meta/llama-3.1-70b-instruct'], // same base weights
[23, 'cohere', 'command-r-plus-08-2024'], // RAG-focused, weakest on code
];
const apply = db.transaction(() => {
for (const [rank, platform, modelId] of ranks) {
setRank.run(rank, platform, modelId);
}
});
apply();
}
/**
* V4: Agentic-tool-use focus. Live-probed every candidate against real free-tier
* keys (April 2026) with a weather-tool function-calling test. Keeps only models
* that return a structured tool_calls response and are reachable on the free tier.
*
* Adds SambaNova DeepSeek/Llama-4/gpt-oss, Groq gpt-oss & qwen3-32b, OpenRouter
* ling-2.6-flash + nemotron-nano + gpt-oss + trinity, Mistral devstral/medium,
* GitHub gpt-4.1, Cohere command-a, Cloudflare llama-4/gpt-oss/glm-4.7. Removes
* moonshot/kimi (paid-only now), minimax/M1 (superseded), HF/Fireworks route
* (no structured tools), OR/gemma-4 (weak at tools). Renames CF llama-3.1 → 3.3
* fp8-fast. Corrects stale limits.
*/
function migrateModelsV4(db: Database.Database) {
// 1) Remove entries that are unavailable or fail agentic tool use
const deleteModel = db.prepare(`DELETE FROM models WHERE platform = ? AND model_id = ?`);
const deleteFallback = db.prepare(`
DELETE FROM fallback_config WHERE model_db_id IN (
SELECT id FROM models WHERE platform = ? AND model_id = ?
)
`);
const removals: Array<[string, string]> = [
['moonshot', 'kimi-latest'], // paid-only now ($1 min deposit)
['minimax', 'MiniMax-M1'], // superseded; use OR minimax-m2.5:free
['openrouter', 'google/gemma-4-31b-it:free'], // weak at tool use
['huggingface', 'accounts/fireworks/models/llama-v3p3-70b-instruct'], // emits tool call as text content, not structured
];
const applyRemovals = db.transaction(() => {
for (const [p, m] of removals) {
deleteFallback.run(p, m);
deleteModel.run(p, m);
}
});
applyRemovals();
// 2) Cloudflare: replace Llama 3.1 70B with the current-gen 3.3 70B fp8-fast
db.prepare(`
UPDATE models
SET model_id = '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
display_name = 'Llama 3.3 70B fp8-fast (CF)',
context_window = 131072
WHERE platform = 'cloudflare' AND model_id = '@cf/meta/llama-3.1-70b-instruct'
`).run();
// 3) Field corrections verified via primary sources + live probe
db.prepare(`UPDATE models SET tpm_limit = 12000 WHERE platform = 'groq' AND model_id = 'llama-3.3-70b-versatile'`).run();
db.prepare(`UPDATE models SET rpd_limit = 20 WHERE platform = 'sambanova' AND model_id = 'Meta-Llama-3.3-70B-Instruct'`).run();
db.prepare(`UPDATE models SET rpd_limit = 14400 WHERE platform = 'cerebras' AND model_id = 'qwen-3-235b-a22b-instruct-2507'`).run();
db.prepare(`UPDATE models SET rpd_limit = 250, monthly_token_budget = '~25M' WHERE platform = 'google' AND model_id = 'gemini-2.5-flash'`).run();
// gemini-2.5-pro is at-risk: April 2026 Google moved Pro-class off free tier in practice.
// Our live probe hit "quota exceeded" immediately. Cut rpd in half to reduce 429 blast radius.
db.prepare(`UPDATE models SET rpd_limit = 50, monthly_token_budget = '~6M' WHERE platform = 'google' AND model_id = 'gemini-2.5-pro'`).run();
// 4) Add live-probed, tool-capable models
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const additions: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
// OpenRouter :free — shared 20 RPM / 200 RPD / ~6M tokens across :free pool
['openrouter', 'inclusionai/ling-2.6-flash:free', 'Ling 2.6 Flash (free)', 7, 9, 'Large', 20, 200, null, null, '~6M', 262144],
['openrouter', 'arcee-ai/trinity-large-preview:free', 'Trinity Large Preview (free)', 13, 9, 'Frontier', 20, 200, null, null, '~6M', 131072],
['openrouter', 'nvidia/nemotron-3-nano-30b-a3b:free', 'Nemotron 3 Nano 30B (free)', 22, 9, 'Medium', 20, 200, null, null, '~6M', 262144],
['openrouter', 'openai/gpt-oss-120b:free', 'GPT-OSS 120B (free)', 6, 9, 'Large', 20, 200, null, null, '~6M', 131072],
['openrouter', 'openai/gpt-oss-20b:free', 'GPT-OSS 20B (free)', 18, 9, 'Medium', 20, 200, null, null, '~6M', 131072],
['openrouter', 'meta-llama/llama-3.3-70b-instruct:free', 'Llama 3.3 70B (free)', 17, 9, 'Medium', 20, 200, null, null, '~6M', 131072],
// SambaNova — 20 RPM / 20 RPD / 200K TPD shared free Developer tier
['sambanova', 'DeepSeek-V3.1', 'DeepSeek V3.1', 5, 9, 'Frontier', 20, 20, null, 200000, '~3M', 131072],
['sambanova', 'DeepSeek-V3.2', 'DeepSeek V3.2', 4, 9, 'Frontier', 20, 20, null, 200000, '~3M', 131072],
['sambanova', 'Llama-4-Maverick-17B-128E-Instruct', 'Llama 4 Maverick', 11, 9, 'Large', 20, 20, null, 200000, '~3M', 8192],
['sambanova', 'gpt-oss-120b', 'GPT-OSS 120B (SambaNova)', 6, 9, 'Large', 20, 20, null, 200000, '~3M', 131072],
// Groq — very fast; 30 RPM per model, 1000 RPD on most, 14.4k on the 8B
['groq', 'openai/gpt-oss-120b', 'GPT-OSS 120B (Groq)', 6, 2, 'Large', 30, 1000, 8000, 200000, '~6M', 131072],
['groq', 'openai/gpt-oss-20b', 'GPT-OSS 20B (Groq)', 18, 2, 'Medium', 30, 1000, 8000, 200000, '~6M', 131072],
['groq', 'qwen/qwen3-32b', 'Qwen3 32B (Groq)', 19, 2, 'Medium', 60, 1000, 6000, 500000, '~15M', 131072],
['groq', 'llama-3.1-8b-instant', 'Llama 3.1 8B Instant', 28, 2, 'Small', 30, 14400, 6000, 500000, '~15M', 131072],
// Mistral Experiment tier — shared 2 RPM / 500k TPM / 1B tokens/mo across all models
['mistral', 'devstral-latest', 'Devstral', 16, 8, 'Medium', 2, null, 500000, null, '~50-100M', 131072],
['mistral', 'mistral-medium-latest', 'Mistral Medium 3.5', 14, 8, 'Large', 2, null, 500000, null, '~50-100M', 131072],
// GitHub Models — Low-tier category (15 RPM / 150 RPD, 8K in / 4K out per call)
['github', 'openai/gpt-4.1', 'GPT-4.1 (GitHub)', 20, 7, 'Large', 10, 50, null, null, '~9M', 128000],
// Cohere — shared 1000 calls/mo trial pool, 20 RPM Chat
['cohere', 'command-a-03-2025', 'Command-A (03-2025)', 27, 11, 'Large', 20, 33, null, null, '~1-2M', 131072],
// Cloudflare Workers AI — shared 10K Neurons/day free pool across all @cf/* models
['cloudflare', '@cf/openai/gpt-oss-120b', 'GPT-OSS 120B (CF)', 6, 11, 'Large', null, null, null, null, '~18-45M', 131072],
['cloudflare', '@cf/zai-org/glm-4.7-flash', 'GLM-4.7 Flash (CF)', 10, 11, 'Large', null, null, null, null, '~18-45M', 131072],
['cloudflare', '@cf/meta/llama-4-scout-17b-16e-instruct', 'Llama 4 Scout (CF)', 12, 11, 'Large', null, null, null, null, '~18-45M', 131072],
];
const apply = db.transaction(() => {
for (const a of additions) insert.run(...a);
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
apply();
// 5) Re-rank the live catalog by agentic tool-use capability (lower = smarter).
// Grounded in April 2026 SWE-Bench Verified + BFCL v3 + Tau-Bench numbers.
const setRank = db.prepare(`UPDATE models SET intelligence_rank = ? WHERE platform = ? AND model_id = ?`);
const ranks: Array<[number, string, string]> = [
[1, 'openrouter', 'minimax/minimax-m2.5:free'],
[2, 'openrouter', 'qwen/qwen3-coder:free'],
[3, 'openrouter', 'qwen/qwen3-next-80b-a3b-instruct:free'],
[4, 'sambanova', 'DeepSeek-V3.2'],
[5, 'sambanova', 'DeepSeek-V3.1'],
[6, 'cerebras', 'qwen-3-235b-a22b-instruct-2507'],
[6, 'openrouter', 'openai/gpt-oss-120b:free'],
[6, 'groq', 'openai/gpt-oss-120b'],
[6, 'sambanova', 'gpt-oss-120b'],
[6, 'cloudflare', '@cf/openai/gpt-oss-120b'],
[7, 'openrouter', 'inclusionai/ling-2.6-flash:free'],
[8, 'openrouter', 'z-ai/glm-4.5-air:free'],
[10, 'cloudflare', '@cf/zai-org/glm-4.7-flash'],
[11, 'sambanova', 'Llama-4-Maverick-17B-128E-Instruct'],
[12, 'groq', 'meta-llama/llama-4-scout-17b-16e-instruct'],
[12, 'cloudflare', '@cf/meta/llama-4-scout-17b-16e-instruct'],
[13, 'openrouter', 'arcee-ai/trinity-large-preview:free'],
[14, 'google', 'gemini-2.5-pro'],
[14, 'mistral', 'mistral-large-latest'],
[14, 'mistral', 'mistral-medium-latest'],
[16, 'mistral', 'devstral-latest'],
[16, 'mistral', 'codestral-latest'],
[17, 'groq', 'llama-3.3-70b-versatile'],
[17, 'sambanova', 'Meta-Llama-3.3-70B-Instruct'],
[17, 'cloudflare', '@cf/meta/llama-3.3-70b-instruct-fp8-fast'],
[17, 'openrouter', 'meta-llama/llama-3.3-70b-instruct:free'],
[17, 'nvidia', 'meta/llama-3.1-70b-instruct'],
[18, 'openrouter', 'openai/gpt-oss-20b:free'],
[18, 'groq', 'openai/gpt-oss-20b'],
[19, 'groq', 'qwen/qwen3-32b'],
[20, 'google', 'gemini-2.5-flash'],
[20, 'github', 'openai/gpt-4.1'],
[21, 'mistral', 'magistral-medium-latest'],
[22, 'openrouter', 'nvidia/nemotron-3-super-120b-a12b:free'],
[23, 'openrouter', 'nvidia/nemotron-3-nano-30b-a3b:free'],
[24, 'zhipu', 'glm-4.5-flash'],
[25, 'github', 'gpt-4o'],
[26, 'google', 'gemini-2.5-flash-lite'],
[27, 'cohere', 'command-a-03-2025'],
[27, 'cohere', 'command-r-plus-08-2024'],
[28, 'groq', 'llama-3.1-8b-instant'],
];
const applyRanks = db.transaction(() => {
for (const [r, p, m] of ranks) setRank.run(r, p, m);
});
applyRanks();
}
/**
* V5: Google moved all Pro-tier Gemini off the free tier on 2026-04-01 — disable
* gemini-2.5-pro. Add Cerebras `zai-glm-4.7` (355B z.ai GLM preview, newly on
* free tier but throttled to 10 RPM / 100 RPD due to high demand; context capped
* at 8192 on free tier).
*/
function migrateModelsV5(db: Database.Database) {
db.prepare(`UPDATE models SET enabled = 0 WHERE platform = 'google' AND model_id = 'gemini-2.5-pro'`).run();
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const apply = db.transaction(() => {
insert.run('cerebras', 'zai-glm-4.7', 'GLM-4.7 (Cerebras)', 7, 1, 'Frontier', 10, 100, null, null, '~3M', 8192);
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
apply();
}
/**
* V6: Live-probed against real free-tier keys on 2026-04-25.
*
* Corrections (Google free-tier RPD): the documented "250" / "1000" RPD numbers
* for gemini-2.5-flash and gemini-2.5-flash-lite are stale — both share a 20
* RPD per-model-per-project free pool now. Confirmed by the
* `generate_content_free_tier_requests` quota error, limit 20.
*
* Removals: arcee-ai/trinity-large-preview:free returns 404 "No endpoints found"
* — pulled from OpenRouter's free pool. (Other previously-suspected dead OR :free
* IDs are still live in /api/v1/models, so they stay.)
*
* Additions (all probe-verified to return 200 with content on the user's keys):
* - 3 Cloudflare Workers AI reasoning models
* - 3 Google preview models, including Pro (which returned a free-tier 429
* against the same 20 RPD pool, confirming free-tier eligibility)
* - 2 OpenRouter :free models with no expiration_date
*/
function migrateModelsV6(db: Database.Database) {
// 1) Remove confirmed-dead OR route
const deleteModel = db.prepare(`DELETE FROM models WHERE platform = ? AND model_id = ?`);
const deleteFallback = db.prepare(`
DELETE FROM fallback_config WHERE model_db_id IN (
SELECT id FROM models WHERE platform = ? AND model_id = ?
)
`);
const removals: Array<[string, string]> = [
['openrouter', 'arcee-ai/trinity-large-preview:free'],
];
const applyRemovals = db.transaction(() => {
for (const [p, m] of removals) {
deleteFallback.run(p, m);
deleteModel.run(p, m);
}
});
applyRemovals();
// 2) Correct stale Google free-tier RPD numbers
db.prepare(`
UPDATE models SET rpd_limit = 20, monthly_token_budget = '~3M'
WHERE platform = 'google' AND model_id = 'gemini-2.5-flash'
`).run();
db.prepare(`
UPDATE models SET rpd_limit = 20, monthly_token_budget = '~3M'
WHERE platform = 'google' AND model_id = 'gemini-2.5-flash-lite'
`).run();
// 3) Add live-probed models
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const additions: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
// Cloudflare Workers AI — 10K Neurons/day shared free pool. Reasoning traces
// burn output tokens fast, so per-call effective budget is small. Estimates
// assume 1K-in/500-out typical: kimi-k2.5 ≈ 50/day, qwen3-30b ≈ 200/day,
// r1-distill ≈ 5/day on the reasoning-heavy path.
['cloudflare', '@cf/moonshotai/kimi-k2.5', 'Kimi K2.5 (CF)', 3, 11, 'Frontier', null, null, null, null, '~10-20M', 262144],
['cloudflare', '@cf/qwen/qwen3-30b-a3b-fp8', 'Qwen3 30B-A3B fp8 (CF)', 7, 11, 'Large', null, null, null, null, '~18-45M', 131072],
['cloudflare', '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b', 'DeepSeek R1 Distill Qwen 32B (CF)', 9, 11, 'Large', null, null, null, null, '~3-5M', 131072],
// Google preview tier — shares the 20 RPD per-model free pool. Pro confirmed
// free-tier-eligible by the `free_tier_requests` quota metric in 429 errors.
['google', 'gemini-3.1-flash-lite-preview', 'Gemini 3.1 Flash-Lite Preview', 18, 3, 'Medium', 15, 20, 250000, null, '~3M', 1048576],
['google', 'gemini-3-flash-preview', 'Gemini 3 Flash Preview', 11, 5, 'Large', 10, 20, 250000, null, '~3M', 1048576],
['google', 'gemini-3.1-pro-preview', 'Gemini 3.1 Pro Preview', 1, 8, 'Frontier', 5, 20, 250000, null, '~3M', 1048576],
// OpenRouter :free pool — 20 RPM / 50 RPD (1000 once $10 credits bought).
['openrouter', 'google/gemma-4-31b-it:free', 'Gemma 4 31B (free)', 19, 9, 'Medium', 20, 200, null, null, '~6M', 262144],
['openrouter', 'liquid/lfm-2.5-1.2b-instruct:free', 'Liquid LFM 2.5 1.2B (free)', 30, 10, 'Small', 20, 200, null, null, '~6M', 32768],
];
const apply = db.transaction(() => {
for (const a of additions) insert.run(...a);
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
apply();
}
/**
* V7 (April 2026): live-probed delta against OpenRouter's free pool + Z.ai.
* - Removes inclusionai/ling-2.6-flash:free (transitioned to paid, 404 on chat).
* - Adds 8 new :free routes confirmed via /v1/models + chat-completion probe.
* - Adds zhipu/glm-4.7-flash (probe: 429 "overloaded" — free-pool throttle, not
* "insufficient balance" which paid models return). Same baseUrl works for both
* api.z.ai and open.bigmodel.cn keys.
* HF and NVIDIA left as-is: HF still serves chat with current key; NVIDIA already disabled.
*/
function migrateModelsV7(db: Database.Database) {
const deleteModel = db.prepare(`DELETE FROM models WHERE platform = ? AND model_id = ?`);
const deleteFallback = db.prepare(`
DELETE FROM fallback_config WHERE model_db_id IN (
SELECT id FROM models WHERE platform = ? AND model_id = ?
)
`);
const removals: Array<[string, string]> = [
['openrouter', 'inclusionai/ling-2.6-flash:free'],
];
const applyRemovals = db.transaction(() => {
for (const [p, m] of removals) {
deleteFallback.run(p, m);
deleteModel.run(p, m);
}
});
applyRemovals();
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
// OpenRouter :free quotas: 20 RPM / 50 RPD without credits, 1000 RPD with $10 lifetime topup.
// Catalog convention is rpd=200 (matches existing rows).
const additions: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
['openrouter', 'inclusionai/ling-2.6-1t:free', 'Ling 2.6 1T (free)', 4, 9, 'Frontier', 20, 200, null, null, '~6M', 262144],
['openrouter', 'tencent/hy3-preview:free', 'Tencent HY3 Preview (free)', 7, 9, 'Frontier', 20, 200, null, null, '~6M', 262144],
['openrouter', 'poolside/laguna-m.1:free', 'Poolside Laguna M.1 (free)', 13, 9, 'Large', 20, 200, null, null, '~6M', 131072],
['openrouter', 'google/gemma-4-26b-a4b-it:free', 'Gemma 4 26B-A4B (free)', 22, 9, 'Medium', 20, 200, null, null, '~6M', 262144],
['openrouter', 'nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free', 'Nemotron 3 Nano 30B Reasoning (free)', 23, 9, 'Medium', 20, 200, null, null, '~6M', 262144],
['openrouter', 'poolside/laguna-xs.2:free', 'Poolside Laguna XS.2 (free)', 26, 10, 'Medium', 20, 200, null, null, '~6M', 131072],
['openrouter', 'nvidia/nemotron-nano-9b-v2:free', 'Nemotron Nano 9B v2 (free)', 28, 10, 'Medium', 20, 200, null, null, '~6M', 128000],
['openrouter', 'liquid/lfm-2.5-1.2b-thinking:free', 'Liquid LFM 2.5 1.2B Thinking (free)', 30, 10, 'Small', 20, 200, null, null, '~6M', 32768],
// Zhipu (Z.ai) — free pool. glm-4.7-flash quotas unpublished; mirror glm-4.5-flash row shape.
['zhipu', 'glm-4.7-flash', 'GLM-4.7 Flash', 18, 4, 'Large', null, null, null, 1000000, '~30M', 131072],
];
const apply = db.transaction(() => {
for (const a of additions) insert.run(...a);
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
apply();
}
/**
* V8 (May 2026): 3-day delta. SambaNova's /v1/models added two free-tier models;
* Cloudflare's @cf catalog added two new text models. All four probe-verified 200
* with the user's keys. SambaNova's paid-only MiniMax-M2.5 explicitly returns 422
* "Couldn't find valid service tier", so the 200s on these rows confirm free-tier
* access. Cloudflare's @cf/* models share the 10K Neurons/day free pool.
*/
function migrateModelsV8(db: Database.Database) {
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const additions: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
// SambaNova free pool: 20 RPM / 20 RPD / 200K TPD shared across all free models.
['sambanova', 'DeepSeek-V3.1-cb', 'DeepSeek V3.1 (CB)', 5, 9, 'Frontier', 20, 20, null, 200000, '~3M', 131072],
['sambanova', 'gemma-3-12b-it', 'Gemma 3 12B (SambaNova)', 22, 9, 'Medium', 20, 20, null, 200000, '~3M', 131072],
// Cloudflare @cf — 10K Neurons/day shared pool.
['cloudflare', '@cf/moonshotai/kimi-k2.6', 'Kimi K2.6 (CF)', 2, 11, 'Frontier', null, null, null, null, '~10-20M', 262144],
['cloudflare', '@cf/ibm-granite/granite-4.0-h-micro', 'Granite 4.0 H Micro (CF)', 29, 11, 'Small', null, null, null, null, '~5-10M', 131072],
];
const apply = db.transaction(() => {
for (const a of additions) insert.run(...a);
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
apply();
}
/**
* V9 (May 2026): disable cerebras/zai-glm-4.7. The model still appears in
* Cerebras's /v1/models listing but the chat-completions endpoint returns
* 404 "Model does not exist or you do not have access" for free-tier keys —
* matches their docs note about temporarily reducing free-tier access on
* zai-glm-4.7 due to high demand. Row kept (not deleted) so it can be
* re-enabled later without losing fallback_config history.
*/
function migrateModelsV9(db: Database.Database) {
db.prepare(
"UPDATE models SET enabled = 0 WHERE platform = 'cerebras' AND model_id = 'zai-glm-4.7'"
).run();
}
/**
* V10 (May 2026): Ollama Cloud — first new platform since Z.ai/Zhipu in V7.
* Free plan: GPU-time-based quota (not per-token), 1 concurrent model,
* 5h session caps, no card required. /v1/models lists 39 SKUs but only 28
* respond on the Free tier — paid models return 403 with an explicit
* "this model requires a subscription" message.
*
* Curated to ~10 representative free models that either (a) aren't reachable
* elsewhere in the catalog or (b) provide a useful alternate route through
* Ollama's independent rate-limit pool. Probe-verified May 2 2026.
*
* Quota shape: GPU-time, not tokens. monthly_token_budget reflects rough
* Free-tier "session" capacity rather than a hard token cap.
*/
function migrateModelsV10(db: Database.Database) {
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const additions: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
// Budget strings are estimates: Ollama publishes no token cap (quota is GPU-time +
// 7-day rolling). Frontier ~5-10M, Large ~10-20M, Medium ~20-30M reflect that
// heavier models burn quota faster. Numeric limits stay null — real provider
// throttling is the source of truth, not these display strings.
['ollama', 'qwen3-coder:480b', 'Qwen3-Coder 480B (Ollama)', 2, 9, 'Frontier', null, null, null, null, '~5-10M', 262144],
['ollama', 'mistral-large-3:675b', 'Mistral Large 3 675B (Ollama)', 3, 9, 'Frontier', null, null, null, null, '~5-10M', 131072],
['ollama', 'deepseek-v3.2', 'DeepSeek V3.2 (Ollama)', 4, 9, 'Frontier', null, null, null, null, '~5-10M', 131072],
['ollama', 'cogito-2.1:671b', 'Cogito 2.1 671B (Ollama)', 4, 9, 'Frontier', null, null, null, null, '~5-10M', 131072],
['ollama', 'kimi-k2-thinking', 'Kimi K2 Thinking (Ollama)', 5, 9, 'Frontier', null, null, null, null, '~5-10M', 131072],
['ollama', 'glm-4.7', 'GLM-4.7 (Ollama)', 6, 9, 'Frontier', null, null, null, null, '~5-10M', 131072],
['ollama', 'gpt-oss:120b', 'GPT-OSS 120B (Ollama)', 6, 9, 'Large', null, null, null, null, '~10-20M', 131072],
['ollama', 'devstral-2:123b', 'Devstral 2 123B (Ollama)', 8, 10, 'Large', null, null, null, null, '~10-20M', 131072],
['ollama', 'gpt-oss:20b', 'GPT-OSS 20B (Ollama)', 18, 10, 'Medium', null, null, null, null, '~20-30M', 131072],
['ollama', 'gemma4:31b', 'Gemma 4 31B (Ollama)', 22, 10, 'Medium', null, null, null, null, '~20-30M', 131072],
];
const apply = db.transaction(() => {
for (const a of additions) insert.run(...a);
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
apply();
}
/**
* V11 (May 2026):
* 1. Fix long-standing bug: Cerebras `qwen3-235b` was inserted with the
* wrong model_id in the original seed (real id is
* `qwen-3-235b-a22b-instruct-2507`). Subsequent rank/limit updates that
* target the correct id have been silent no-ops since V0 on fresh deploys.
* 2. Re-enable NVIDIA NIM — `meta/llama-3.1-70b-instruct` was disabled in V2
* when NIM moved to credits. Per May 2026 audit it's free again (~1,000
* starter credits never expire, 40 RPM/model).
* 3. Add three new aggregator/anon-friendly platforms confirmed live May 2026:
* Kilo Gateway, Pollinations, LLM7.io — all three accept anonymous
* requests on at least one model.
* - The user still needs a placeholder key entry (any non-empty string
* works) because the router filters on `keys.length === 0` to decide
* whether a platform is routable.
* Chutes was evaluated and dropped: probe with a free-tier key returned
* 402 on every model — "Quota exceeded and account balance is $0.0,
* please pay with fiat or send tao". The "free" tier requires a paid
* balance, which conflicts with the no-card criterion.
*/
function migrateModelsV11(db: Database.Database) {
// 1) Rename cerebras qwen3-235b → qwen-3-235b-a22b-instruct-2507 if the
// old id still exists on this DB. Safe to re-run because of the WHERE.
db.prepare(`
UPDATE models SET model_id = 'qwen-3-235b-a22b-instruct-2507'
WHERE platform = 'cerebras' AND model_id = 'qwen3-235b'
`).run();
// 2) Re-enable NVIDIA NIM (still has 1,000+ starter credits free-tier).
db.prepare(`
UPDATE models SET enabled = 1, monthly_token_budget = '~3M (1k credits)'
WHERE platform = 'nvidia' AND model_id = 'meta/llama-3.1-70b-instruct'
`).run();
// 3) Add catalog rows for the four new platforms. Numeric limits are
// conservative — provider docs publish best-effort bounds that fluctuate.