-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathworkerv85x.js
More file actions
6976 lines (6765 loc) · 315 KB
/
Copy pathworkerv85x.js
File metadata and controls
6976 lines (6765 loc) · 315 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import { connect as cfConnect } from "cloudflare:sockets";
// workerv85x.js — 8 verified fixes: endpoint success path, retry backoff, parseIPs cache,
// getDynamicProxyIP cache, base64 normalization, sha224 loop alloc,
// spurious async removal, SWR comment accuracy
// Use-case: single user, used as VPN + upstream proxy, priority = zero disconnects,
// maximum throughput, instant DNS, multi-hour sessions, subway reconnect.
//
// Changes vs v84x (8 changes: 3 correctness, 2 performance, 2 reliability, 1 cleanup):
//
// RELIABILITY:
// R1. ENDPOINT_HEALTH: add markEndpointSuccess to clear false-positive blacklisting.
// markEndpointFailure correctly penalizes a proxy after 2 failures within cooldown.
// There was no success path. If a proxy recovered 5 seconds into a 90-second cooldown,
// every connection for the remaining 85 seconds avoided it and piled onto the surviving
// pool. On a 2-proxy setup, one transient blip meant 100% of load on one proxy for up
// to 90 seconds. Fix: add markEndpointSuccess(host) that halves rec.failures (floor 0)
// on every successful Phase 1 TCP open. Two clean connects bring any proxy back below
// the fail threshold regardless of prior penalty. Called from handleTCPOutBound right
// after the "connected to" log line, only when proxyMode === "proxyip" (same guard as
// markEndpointFailure so direct/prefix destinations are never touched).
//
// R2. handleTCPOutBound: replace flat RETRY_DELAY_MS=25ms with attempt-indexed backoff.
// 25ms (one event-loop tick) was chosen as "no thundering herd risk for single user."
// That reasoning applies to load-balancing, not to reconnect scenarios. On a subway
// reconnect the physical interface comes back but routing tables are still converging
// (100–500ms). With 25ms delay, all 3 attempts (3 × 3s timeout) fired within ~100ms
// total and all failed for the same reason — the delay provided zero additional signal.
// Fix: delay = Math.min(300 * i, 600) ms where i is 0-based retry index.
// First retry: 300ms. Second retry: 600ms. Total max: 3s + 300ms + 3s + 600ms + 3s
// = 9.9s, well within the 3-attempt budget. Gives the network stack time to re-route
// before the next attempt. RETRY_DELAY_MS constant removed; delay computed inline.
//
// PERFORMANCE:
// P1. parseIPs: cache parsed results at module level via _parsedIPsCache.
// parseIPs(envProxyIPs) and parseIPs(envPrefixes) were called on every single
// handleTCPOutBound invocation (every new TCP connection the VPN opens). envProxyIPs
// and envPrefixes are Cloudflare env vars set at Worker deployment — they never change
// within an isolate's lifetime. Splitting and filtering the same comma-separated string
// on every connection is pure waste. Fix: module-level Map<string, string[]> keyed by
// the raw string value. First call parses and stores; all subsequent calls are a Map
// lookup. Zero behaviour change. On a Windows Update session (20+ parallel connections)
// this eliminates 20+ redundant string splits per second.
//
// P2. getDynamicProxyIP: cache NAT64 result per (address|prefix) key within an isolate.
// In prefix mode, getDynamicProxyIP(addressRemote, prefix, dohURL) was called on every
// handleTCPOutBound invocation including retries. The function does a DoH A-record
// lookup for the remote hostname and applies a NAT64 prefix transform. For a sustained
// session connecting to the same CDN edge (e.g. Instagram, always the same IP for
// 5+ minutes), this fired redundant DoH requests on every connection attempt. Fix:
// module-level Map keyed by "address|prefix". On a hit the DoH lookup is skipped
// entirely. Bounded to 200 entries to prevent unbounded growth on pathological traffic.
// Entries are intentionally not TTL-evicted here: the underlying DNS TTL is already
// managed by DNS_CACHE; this is purely a transform-result cache.
//
// CORRECTNESS:
// C1. base64ToUint8Array: normalize URL-safe alphabet and missing padding before atob().
// atob() throws a DOMException if the input uses the URL-safe alphabet (- instead of
// +, _ instead of /) or is missing the = padding characters. base64ToArrayBuffer
// (used for WS early-data) already handles this correctly. base64ToUint8Array —
// used via base64DecodeUtf8 for vmess chain proxy parsing (line ~1595) and SS/SOCKS
// userinfo parsing — did not normalize. A vmess:// URI whose inner payload is
// base64url-encoded without padding would throw a fatal DOMException, silently
// breaking chain proxy config with no error logged in the settings-save path.
// Fix: apply .trim().replace(/-/g,"+").replace(/_/g,"/") and pad to 4-char boundary
// before atob(). Consistent with base64ToArrayBuffer's existing normalization.
//
// C2. sha224: hoist w = new Uint32Array(64) outside the 64-byte chunk loop.
// The Uint32Array was allocated fresh on every 64-byte block of the SHA-224 input.
// For a short Trojan password this is 1 allocation (1 block); for longer strings
// it scales with length. sha224 runs once per isolate lifetime (getCachedTrojan-
// PasswordHash caches the result after first call), so the impact is bounded to
// cold-start. Fix: declare w once before the outer loop; zero it with w.fill(0) at
// the start of each iteration. Semantically identical, no per-block allocation.
//
// C3. handleUDPOutBound / handleTrojanUDPOutBound: remove spurious async declaration.
// Both functions were declared async but contain zero await expressions. They return
// a synchronous { write(chunk) {} } object immediately. async wraps that object in
// Promise.resolve({write}) — every caller then awaits that Promise to unwrap the
// plain object, adding one unnecessary microtask tick per DNS connection setup.
// Removing async makes the function signature match its actual behaviour.
//
// CLEANUP:
// CL1. resolveDNS: correct the misleading SWR comment.
// The comment said "let the normal fetch path below refresh in the background."
// This was inaccurate: the SWR branch returns immediately and starts NO background
// fetch. The refresh only happens when the 60-second grace window expires and the
// next incoming request triggers a cold-miss. This is correct intentional design
// for a single-user low-traffic isolate (avoids a background DoH fetch on every
// TTL expiry), but the comment implied a background process was already running.
// Updated to accurately describe the grace-window-then-next-caller behavior.
//
// Inherited from v84x (all verified correct, unchanged):
// C1–C2 (v84x), CL1 (v84x)
// C1–C2, CL1, O1, P1, S1 (v83x)
// R1, C1, CL1, O1, P1, S1 (v82x)
// CL1–CL2, P1–P5, S2–S3 (v81x)
// H1–H2, C1, CL1 (v80x), H1–H2, C1, CL1 (v79x)
// P1, R1, C1, S1 (v78x), BF1–BF3, R1 (v77x), BF1 (v76x)
// BF1–BF2, P1–P2 (v73x), BF1–BF4, P1–P2, S1 (v71x)
// BF3, P1–P2, H1–H5 (v70x), BF1–BF3, P1, H1, K1–K3 (v69x)
// BF1–BF5, P1–P4, R1–R3, H3, H6, H7, C5, L4, M3, N-new (v68x)
// BF1–BF2 (v64x), BF1–BF2 (v63x), M1c, H8c
// C1–C5, H1, H3–H7, M2–M3, L4, N1–N3, L1–L3, L5 (from v61x)
//
// Changes vs v83x (3 changes: 2 correctness, 1 cleanup):
//
// CORRECTNESS:
// C1. warmUpDNS: use correct onlyIPv4 value to match real cache keys.
// warmUpDNS always called resolveDNS(domain, false, dohURL) — onlyIPv4 hardcoded
// to false. The DNS cache key is "dohURL|domain|4" when onlyIPv4=true and
// "dohURL|domain|46" when false. getConfigAddresses calls
// resolveDNS(hostName, !enableIPv6, ...) and the fallback handler calls
// resolveDNS(fallbackDomain, !enableIPv6, ...). When the user has enableIPv6=false,
// every warm-up entry lands under the "46" key while the real callers look up the
// "4" key — a complete cache miss. The entire P1 (v83x) feature was a no-op for
// IPv6-disabled users, firing 12 background DoH requests that nobody ever reads.
// Fix: pass !ctx.settings.enableIPv6 instead of false.
// Also added: isDomain(fallbackDomain) guard so an IP-type FALLBACK env var
// (e.g. FALLBACK="1.2.3.4") doesn't trigger a pointless DoH A-record lookup
// for a literal IP address.
//
// C2. warmUpDNS: resolve all domains in parallel instead of sequentially.
// The warm-up loop was for (domain of domains) { await resolveDNS(...) } — strictly
// sequential. Each domain starts only after the previous one fully resolves.
// Worst-case: 12 domains × 500ms DNS_TIMEOUT = 6s of chained waiting. Realistic:
// 12 × ~150ms ≈ 1.8s. During that window real VPN traffic fires its own resolveDNS
// calls for different domains and receives zero benefit from the warm-up because
// DNS_IN_FLIGHT coalescing only helps for the exact domain already in-flight.
// Fix: replace the sequential loop with Promise.allSettled() so all warm-up domains
// fire simultaneously. All 12 DoH requests are in-flight at once; real traffic for
// any of those domains joins the in-flight promise via DNS_IN_FLIGHT coalescing
// and gets the result as soon as it resolves — instead of waiting for all previous
// domains to finish first. Total warm-up time = slowest single DoH call, not the sum.
//
// CLEANUP:
// CL1. Add block-scope braces to case "cache-clear" and case "cache-stats".
// Both cases declared const variables directly in case clauses without braces:
// const authClear / const authStats / const now / const proxyHealth / const stats.
// In JS, lexical declarations (const/let) in case clauses are scoped to the entire
// switch block, not just that case. No runtime crash today because each name is
// unique across all cases, but it is a no-case-declarations lint violation and a
// latent hazard: a future engineer adding a new case that reuses 'now' or 'stats'
// gets "SyntaxError: Identifier already declared" with no obvious cause.
// Fix: wrap both case bodies in { } blocks for correct lexical scoping.
// This is a pre-existing issue from v78x; first opportunity to clean it up.
//
// Inherited from v83x (all verified correct, unchanged):
// C1 (v83x), CL1 (v83x), O1 (v83x), P1 (v83x), S1 (v83x)
//
// Changes vs v82x (5 changes: 1 correctness, 1 cleanup, 1 observability, 1 performance, 1 stability):
//
// CORRECTNESS:
// C1. handleTCPOutBound: fix stale "cross-session health map" comment at the
// markEndpointFailure call site. v82x C1 updated the ENDPOINT_HEALTH declaration
// block but missed this second reference in handleTCPOutBound (line ~4414).
// A future engineer reading the call site in isolation would still see the
// inaccurate "cross-session" claim and build incorrect mental models.
// Fix: change to "per-isolate health map" to match the corrected declaration.
//
// CLEANUP:
// CL1. getRandomString: remove redundant `| 0` after Math.floor().
// Introduced in v81x P3 alongside the RANDOM_STRING_CHARSET_LEN cache.
// Math.floor() on a value in range [0, charset_length) always returns an integer
// in the safe-integer range — `| 0` adds no precision or safety and misleads
// readers into thinking the cast is necessary. Removed for clarity.
//
// OBSERVABILITY:
// O1. cache-stats: expose ENDPOINT_HEALTH state in the /cache-stats response.
// Previously the health map existed in memory with no external visibility —
// you could only learn which proxy IPs were in cooldown by reading logs.
// Now /cache-stats includes a proxyHealth array with each tracked host, its
// failure count, seconds since last failure, and whether it is currently healthy.
// Entries that have fully decayed are excluded from the output.
//
// PERFORMANCE:
// P1. Cold-start DNS pre-warming on first WebSocket connection.
// CF Workers isolates on the free tier are evicted after ~10 minutes of idle.
// On a cold start, DNS_CACHE is empty — every domain needs a fresh DoH round-trip.
// For a VPN session this means the first few seconds after an isolate restart
// hit full DNS latency for every tab and connection simultaneously.
// Fix: on the very first WebSocket connection of a fresh isolate (_isolateWarmedUp
// is false), fire background DoH lookups via ctx.context.waitUntil() for the
// predictable always-needed domains: the worker's own hostname, fallbackDomain,
// and up to 10 domain-type cleanIPs from settings. These are the domains that
// every session needs and that are cheap to pre-resolve. The warm-up is entirely
// non-blocking — it runs in the background and never delays the connection.
//
// STABILITY:
// S1. Structured WS close code descriptions in disconnect logs.
// S3 (v81x) added close code logging, but reading "code=1006" requires the
// engineer to recall the RFC meaning from memory. Added a module-level lookup
// table (WS_CLOSE_CODE_NAMES) that maps the most important WebSocket close codes
// to human-readable labels. The close event now logs:
// "WS closed: code=1006 [NAT/network kill] reason=none wasClean=false"
// making the log immediately actionable without consulting external references.
//
// Inherited from v82x (all verified correct, unchanged):
//
// REVERT:
// R1. safeWriteToOutbound: remove `await writer.ready` introduced in v81x S1.
// The reasoning in v81x S1 was theoretically sound — writer.ready ensures the
// underlying TCP sink signals readiness before accepting the next chunk — but it
// introduced a stop-and-wait pattern that is redundant and harmful in this code's
// actual execution model:
//
// safeWriteToOutbound is always called with `await` at its call sites. This means
// only ONE chunk is ever in-flight at a time. writer.write() in a sequential
// (non-concurrent) write pattern already stalls naturally when the stream's internal
// high-water mark is reached — the Promise it returns won't resolve until the
// queue has capacity. Adding writer.ready AFTER a completed write() then forces
// a second async checkpoint that waits for the NEXT write to be acceptable before
// returning, effectively halving the pipeline depth to zero and adding a
// microtask-tick overhead on every single chunk.
//
// Symptom: sustained downloads (Windows Update, Steam) were throttled because
// each chunk required two sequential async resolutions instead of one.
// The correct backpressure mechanism is already in place: writer.write() stalls
// when the queue is full. No additional ready check is needed or beneficial here.
//
// CORRECTNESS:
// C1. ENDPOINT_HEALTH: correct the scope comment from "cross-session" to "per-isolate".
// v81x S2's comment stated the map "shares failure knowledge across all sessions."
// This is only true when all sessions happen to land on the SAME isolate — which
// Cloudflare does not guarantee. CF Workers can and does spin up multiple isolate
// instances per Worker deployment; each isolate has its own module-level memory.
// The map is still valuable as a best-effort intra-isolate health check (concurrent
// connections within one isolate DO share it), but the comment overstated the
// guarantee. Updated to accurately describe the per-isolate scope so future
// engineers don't build logic on a false assumption.
// NOTE: Backing this with KV to achieve true cross-isolate sharing was evaluated
// and rejected — KV writes would exhaust the free-tier 1,000/day quota in minutes
// during an active Windows Update session.
//
// Inherited from v81x (all verified correct, unchanged):
// CL1 (v81x), CL2 (v81x)
// P1–P5 (v81x), S2 (v81x), S3 (v81x)
// [S1 (v81x) reverted above — see R1]
// H1 (v80x), C1 (v80x), CL1 (v80x)
// H1-H2 (v79x), C1 (v79x), CL1 (v79x)
// P1 (v78x), R1 (v78x), C1 (v78x), S1 (v78x)
// BF1–BF3 (v77x), R1 (v77x)
// BF1 (v76x)
// BF1–BF2 (v73x), P1–P2 (v73x)
// BF1–BF4 (v71x), P1–P2 (v71x), S1 (v71x)
// BF3 (v70x), P1–P2 (v70x), H1–H5 (v70x)
// BF1–BF3 (v69x), P1 (v69x), H1, K1–K3 (v69x)
// BF1–BF5 (v68x), P1–P4 (v68x), R1–R3 (v68x)
// H3, H6, H7, C5, L4, M3, N-new (v68x)
// BF1–BF2 (v64x), BF1–BF2 (v63x), M1c, H8c
// C1–C5, H1, H3–H7, M2–M3, L4, N1–N3
// L1, L2, L3, L5 (from v61x)
//
// Changes vs v79x (3 changes: 1 hardening, 1 correctness, 1 cleanup):
//
// HARDENING:
// H1. Authenticate: strengthen _cachedSecretKey guard from === null to falsy.
// The cache sentinel was `if (_cachedSecretKey === null)`. If env.kv.get("secretKey")
// ever returns "" (an empty string — valid KV response for a key written with an
// empty value, e.g. via the CF dashboard or API), "" is stored into _cachedSecretKey.
// On the next call, _cachedSecretKey !== null is true, KV read is skipped, secretKey
// is assigned "", the `if (!secretKey)` guard fires, and authentication permanently
// returns false for the entire isolate lifetime with no recovery path except a cold
// start. Changing to `if (!_cachedSecretKey)` guards against both null and "".
// Note: the null sentinel still correctly forces a re-read after generateJWTToken
// creates a new key (null cached → next Authenticate re-reads KV → gets new key).
// No change to generateJWTToken is needed.
//
// CORRECTNESS:
// C1. resolveDNS: return dnsData (without ttlMs) on cold-miss to match cache-hit shape.
// After v79x C1 stripped ttlMs from cached.data, the cold-miss path still returned
// `result` (which includes ttlMs), while the cache-hit path returns `cached.data`
// (which does not). All callers destructure only { ipv4, ipv6 } so there is zero
// behavioral impact today, but the inconsistency is a latent trap for any future
// caller that reads ttlMs from the return value and gets it only on cold misses.
// Fix: return dnsData instead of result on the cold-miss path.
//
// CLEANUP:
// CL1. Remove two dead constants: OVERALL_CONNECTION_TIMEOUT and
// MAX_OUTBOUND_WRITE_QUEUE_BYTES. Both were declared at module level but no code
// reads them. OVERALL_CONNECTION_TIMEOUT (= 0) was left when createOverallConnection-
// Timeout was removed (documented at line ~313). MAX_OUTBOUND_WRITE_QUEUE_BYTES
// (= 8MB) was left when the queuedOutboundBytes backpressure counter was deleted in
// v71x BF1. Both were confirmed by grep: zero references outside the declaration line.
//
// Inherited from v79x (all verified correct, unchanged):
// HARDENING:
// H1. VlOverWSHandler / TrOverWSHandler: explicit TCP socket close in pipePromise.finally.
// On the normal path (no error), safeCloseTcpSocket was implicitly handled by
// remoteSocketToWS's own finally block. This is correct today, but creates an
// invisible coupling: if remoteSocketToWS's internal finally is ever refactored
// (e.g., to support half-open without explicit close), the TCP socket silently
// leaks on clean disconnects with no log, no error, no indication.
// Fix: add safeCloseTcpSocket(remoteSocketWapper.value) explicitly in
// pipePromise's finally block AFTER awaiting toWsPromise in both VlOverWSHandler
// and TrOverWSHandler. safeCloseTcpSocket is idempotent (suppresses errors) so
// the redundant call on the error path is always safe. Cleanup is now
// self-contained in the outermost finally regardless of remoteSocketToWS internals.
//
// H2. remoteSocketToWS: replace manual vlHeader merge with concatUint8Arrays.
// The VLResponseHeader prepend (first TCP→WS chunk only) used a manual
// new Uint8Array + two .set() calls and sent .buffer instead of the Uint8Array
// directly. webSocket.send(merged.buffer) works today because merged is a fresh
// full-buffer allocation, but would silently send extra bytes if the code were
// ever changed to use a subarray view. All other webSocket.send() calls in the
// file send Uint8Array directly. Fix: use concatUint8Arrays(vlHeader, chunk)
// (consistent with the UDP handlers) and send(merged) without .buffer.
//
// CORRECTNESS:
// C1. setDNSCacheEntry: strip ttlMs from cached.data to avoid duplicate storage.
// tryProvider returns { ipv4, ipv6, ttlMs }. setDNSCacheEntry was storing
// { data: result, ttlMs: result.ttlMs, ... } — so ttlMs was present at TWO
// levels: cached.ttlMs (used correctly by the TTL expiry logic) and
// cached.data.ttlMs (invisible noise, ignored by all callers that destructure
// { ipv4, ipv6 }). Fix: destructure ttlMs out of result before storing so
// cached.data contains only { ipv4, ipv6 } — clean, unambiguous, no surprises
// for future readers of the cache object shape.
//
// PERFORMANCE:
// P1. Authenticate: cache secretKey at module level to avoid repeated KV reads.
// env.kv.get("secretKey") was called on every panel request that calls
// Authenticate (11 routes). hmacKey() already caches the derived CryptoKey
// (module-level _cachedHmacKey / _cachedHmacSecret), but the raw string it
// derives from was re-fetched from KV each time. CF's in-isolate KV read cache
// mitigates this in practice, but is not guaranteed. Fix: add module-level
// _cachedSecretKey = null, populated on first call, identical pattern to
// _cachedHmacKey. Zero cold KV reads for secretKey after first panel action.
//
// CLEANUP:
// CL1. Stats endpoint: use Response.json() instead of manual JSON.stringify.
// The /stats debug endpoint used new Response(JSON.stringify(stats), { headers: {
// 'Content-Type': 'application/json', ... }}). Response.json(stats, init) is the
// correct CF Workers API for pure JSON responses with no Content-Type override.
// NOTE: respond() helper was NOT changed — three call sites (login, password reset,
// logout) pass customHeaders with "Content-Type": "text/plain" to deliberately
// override the default. Whether Response.json()'s internal Content-Type can be
// overridden by init.headers is implementation-dependent; the explicit spread in
// respond() is safer and preserved unchanged.
//
// Inherited from v78x (all verified correct, unchanged):
// P1 (v78x), R1 (v78x), C1 (v78x), S1 (v78x)
// BF1–BF3 (v77x), R1 (v77x)
// BF1 (v76x)
// BF1–BF2 (v73x), P1–P2 (v73x)
// BF1–BF4 (v71x), P1–P2 (v71x), S1 (v71x)
// BF3 (v70x), P1–P2 (v70x), H1–H5 (v70x)
// BF1–BF3 (v69x), P1 (v69x), H1, K1–K3 (v69x)
// BF1–BF5 (v68x), P1–P4 (v68x), R1–R3 (v68x)
// H3, H6, H7, C5, L4, M3, N-new (v68x)
// BF1–BF2 (v64x), BF1–BF2 (v63x), M1c, H8c
// C1–C5, H1, H3–H7, M2–M3, L4, N1–N3
// L1, L2, L3, L5 (from v61x)
var encoder = new TextEncoder();
var decoder = new TextDecoder();
function encodeBase64(input) {
if (Uint8Array.prototype.toBase64) {
return input.toBase64();
}
const CHUNK_SIZE = 32768;
const arr = [];
for (let i = 0; i < input.length; i += CHUNK_SIZE) {
arr.push(String.fromCharCode.apply(null, input.subarray(i, i + CHUNK_SIZE)));
}
return btoa(arr.join(""));
}
function toBase64Utf8(value) {
return encodeBase64(encoder.encode(value)); // #5: reuse module-level encoder (var, hoisted, initialized before first call)
}
const DNS_CACHE = new Map();
const DNS_IN_FLIGHT = new Map(); // For request coalescing
const DNS_CACHE_TTL = 2700000; // 45 minutes — BF2 (v74x): 20min was causing ~600-1800 background DoH refreshes per 2h session; 45min cuts refresh churn ~55% with no user-visible impact (CDN IPs almost never change intra-session)
const DNS_CACHE_MAX_SIZE = 5000; // P1 (v74x): raised from 3000; a 2–3h browsing session easily visits 3000+ unique domains; 5000 entries ≈ ~1MB well within 128MB isolate limit
const NEGATIVE_DNS_CACHE_TTL = 5000; // 5s — fastest retry after subway reconnect / transient DNS failure
const MAX_WEBSOCKET_MESSAGE_SIZE = 1024 * 1024; // 1MB
const MAX_DNS_RESPONSE_SIZE = 65536; // 64KB
// S2 (v81x) / C1 (v82x): Proxy IP health map — per-isolate scope, best-effort.
// IMPORTANT: This Map lives in module-level memory, which is scoped to a single CF
// isolate instance. Cloudflare may spin up multiple isolate instances for the same
// Worker deployment — there is NO guarantee that concurrent connections share an isolate.
// What this map DOES provide: within one isolate, concurrent WebSocket sessions (e.g.
// Windows Update opening 20+ parallel connections that land on the same isolate) share
// failure knowledge and won't pile onto a known-bad proxy IP.
// What this map does NOT provide: globally shared health state across all CF instances.
// KV-backed sharing was evaluated and rejected — KV writes would exhaust the free-tier
// 1,000/day quota in minutes during an active Windows Update session.
// Entries decay automatically after 5 minutes. Map is bounded to 50 entries to prevent
// OOM on pathological proxy pools; oldest entry evicted when full (insertion-order FIFO).
const ENDPOINT_HEALTH = new Map(); // host → { failures, lastFailureTs }
const ENDPOINT_HEALTH_MAX_SIZE = 50;
const ENDPOINT_HEALTH_COOLDOWN_MS = 90000; // 90s cooldown after threshold failures
const ENDPOINT_HEALTH_DECAY_MS = 300000; // 5 min — auto-remove stale entries
const ENDPOINT_HEALTH_FAIL_THRESHOLD = 2; // mark bad after 2 failures within cooldown
function markEndpointFailure(host) {
if (!host) return;
const now = Date.now();
// Evict stale entries passively on every failure mark (bounded cost)
for (const [k, v] of ENDPOINT_HEALTH) {
if (now - v.lastFailureTs > ENDPOINT_HEALTH_DECAY_MS) ENDPOINT_HEALTH.delete(k);
}
if (ENDPOINT_HEALTH.size >= ENDPOINT_HEALTH_MAX_SIZE) {
ENDPOINT_HEALTH.delete(ENDPOINT_HEALTH.keys().next().value);
}
const rec = ENDPOINT_HEALTH.get(host) || { failures: 0, lastFailureTs: 0 };
rec.failures++;
rec.lastFailureTs = now;
ENDPOINT_HEALTH.set(host, rec);
}
function isEndpointHealthy(host) {
if (!host) return true;
const rec = ENDPOINT_HEALTH.get(host);
if (!rec) return true;
if (Date.now() - rec.lastFailureTs > ENDPOINT_HEALTH_DECAY_MS) {
ENDPOINT_HEALTH.delete(host);
return true;
}
return !(rec.failures >= ENDPOINT_HEALTH_FAIL_THRESHOLD &&
Date.now() - rec.lastFailureTs < ENDPOINT_HEALTH_COOLDOWN_MS);
}
// R1 (v85x): Success path for endpoint health — halves failure count on clean Phase 1 connect.
// Without this, a proxy that fails twice (threshold) and recovers immediately stays blacklisted
// for the full 90-second cooldown with no way to re-enter the pool early. On a 2-proxy setup
// one transient blip meant 100% of load on one proxy for up to 90s. Two clean successes bring
// any proxy back below the fail threshold regardless of prior penalty.
function markEndpointSuccess(host) {
if (!host) return;
const rec = ENDPOINT_HEALTH.get(host);
if (!rec) return; // never failed — nothing to clear
rec.failures = Math.max(0, Math.floor(rec.failures / 2));
if (rec.failures === 0) {
ENDPOINT_HEALTH.delete(host); // clean slate — remove entry entirely
} else {
ENDPOINT_HEALTH.set(host, rec);
}
}
// CL1 (v80x): OVERALL_CONNECTION_TIMEOUT and MAX_OUTBOUND_WRITE_QUEUE_BYTES removed.
// Both were confirmed dead by grep — zero references outside the declaration line.
// OVERALL_CONNECTION_TIMEOUT (= 0): createOverallConnectionTimeout was removed earlier
// (documented below at the comment on that removal).
// MAX_OUTBOUND_WRITE_QUEUE_BYTES (= 8MB): the queuedOutboundBytes backpressure counter
// that read it was deleted in v71x BF1; TCP backpressure is now handled natively.
// BF1 (v71x): BACKPRESSURE_WAIT_MS removed — was used by the now-deleted queuedOutboundBytes
// while-loop. TCP backpressure is handled natively by writer.write() stalling.
const REGEX_HOST_PORT = /^(?:\[(?<ipv6>.+?)\]|(?<host>[^:]+))(:(?<port>\d+))?$/;
const REGEX_UUID_V4 = /^[0-9a-f]{8}-[0-9a-f]{4}-[4][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
// #6: Hoisted from isDomain/isIPv4/isIPv6 — were compiled on every call, now compiled once at module load
const REGEX_DOMAIN = /^(?=.{1,253}$)(?!-)(?:[A-Za-z0-9-]{1,63}\.)+[A-Za-z]{2,63}$/;
const REGEX_IPV4 = /^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?:\/([0-9]|[1-2][0-9]|3[0-2]))?$/;
const REGEX_IPV6 = /^\[(?:(?:[a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4}|(?:[a-fA-F0-9]{1,4}:){1,7}:|::(?:[a-fA-F0-9]{1,4}:){0,7}|(?:[a-fA-F0-9]{1,4}:){1,6}:[a-fA-F0-9]{1,4}|(?:[a-fA-F0-9]{1,4}:){1,5}(?::[a-fA-F0-9]{1,4}){1,2}|(?:[a-fA-F0-9]{1,4}:){1,4}(?::[a-fA-F0-9]{1,4}){1,3}|(?:[a-fA-F0-9]{1,4}:){1,3}(?::[a-fA-F0-9]{1,4}){1,4}|(?:[a-fA-F0-9]{1,4}:){1,2}(?::[a-fA-F0-9]{1,4}){1,5}|[a-fA-F0-9]{1,4}:(?::[a-fA-F0-9]{1,4}){1,6})\](?:\/(1[0-1][0-9]|12[0-8]|[0-9]?[0-9]))?$/;
const MIN_FRAGMENT_LENGTH = 10;
const MAX_FRAGMENT_LENGTH = 2048;
// #9: Module-level constant — VLVersion[0] is always 0 in VLESS; was allocated per connection
const VLESS_RESPONSE_HEADER = new Uint8Array([0, 0]);
// BF1 (v76x): Module-level constant reused across all Trojan UDP responses — avoids
// allocating a new Uint8Array([0x0d, 0x0a]) on every DNS response send.
const TROJAN_UDP_CRLF = new Uint8Array([0x0d, 0x0a]);
// H1: Module-level sleep avoids allocating a new Promise + arrow function closure on every
// backpressure tick. Reuses one function reference across all connections.
const _sleep = ms => new Promise(r => setTimeout(r, ms));
// H8 (fixed v63x): env is not in scope at module level — use a mutable flag initialised
// on first request. safeWriteToOutbound reads this instead of env?.DEBUG.
let _debugMode = false;
let _validatedUUID = null; // Cached UUID validation: set to UUID string once validated
// P1: hmacKey cache — crypto.subtle.importKey is expensive C++ crypto; the secret never
// changes within an isolate. Cache the CryptoKey after first import.
let _cachedHmacKey = null;
let _cachedHmacSecret = null;
// P1 (v79x): secretKey string cache — mirrors _cachedHmacKey pattern.
// env.kv.get("secretKey") was called on every Authenticate() invocation (11 panel routes).
// CF in-isolate KV read cache helps but is not guaranteed. Cache the string explicitly
// so only the very first panel action in an isolate hits KV; all subsequent reads are free.
let _cachedSecretKey = null;
// P2: decompressHtml cache — static gzip blobs never change within an isolate.
// Key: first 32 chars of content string + asString flag (sufficient fingerprint —
// two different HTML blobs will always differ in their first 32 base64 chars).
const _HTML_DECOMP_CACHE = new Map();
// S1 (v83x): WebSocket close code lookup — maps RFC 6455 codes to human-readable labels.
// Used by the close event listener in VlOverWSHandler / TrOverWSHandler to make
// disconnect logs immediately actionable without consulting external references.
// Only the codes that meaningfully occur in a CF Workers proxy context are listed;
// unlisted codes fall back to "unknown" so future codes are handled gracefully.
const WS_CLOSE_CODE_NAMES = new Map([
[1000, "clean close"],
[1001, "client navigated away"],
[1002, "protocol error"],
[1003, "unsupported data"],
[1005, "no status received"],
[1006, "NAT/network kill"], // abnormal — no close frame (ISP NAT timeout, CF edge reset)
[1007, "invalid payload"],
[1008, "policy violation"],
[1009, "message too large"],
[1011, "server error"],
[1012, "service restart"],
[1013, "try again later"],
[1015, "TLS handshake failure"],
]);
// P1 (v83x): Cold-start DNS pre-warm sentinel.
// Set to true after the first WebSocket connection of this isolate instance.
// Prevents repeated pre-warm attempts across connections within the same isolate.
let _isolateWarmedUp = false;
// Fix 5: Module-level constant — was allocated fresh on every DoH forward request.
const DOH_FORWARD_HEADERS = ["accept", "content-type"];
function buildDoHForwardHeaders(request) {
const incoming = request.headers;
const headers = new Headers();
for (const key of DOH_FORWARD_HEADERS) {
const value = incoming.get(key);
if (value) headers.set(key, value);
}
if (!headers.has("accept")) {
headers.set("accept", "application/dns-message, application/dns-json");
}
if (request.method === "POST") {
const contentType = (headers.get("content-type") || "").toLowerCase();
const isSupported = contentType.includes("application/dns-message") || contentType.includes("application/dns-json");
if (!isSupported) {
headers.set("content-type", "application/dns-message");
}
}
return headers;
}
function sanitizeRangeString(value, fallback, minAllowed, maxAllowed) {
if (!value || typeof value !== "string") return fallback;
const normalized = value.trim();
if (!normalized) return fallback;
const parts = normalized.split("-").map((part) => Number.parseInt(part, 10));
if (parts.some((part) => !Number.isFinite(part))) return fallback;
if (parts.length === 1) {
const clamped = Math.max(minAllowed, Math.min(maxAllowed, parts[0]));
return String(clamped);
}
let [minValue, maxValue] = parts;
minValue = Math.max(minAllowed, Math.min(maxAllowed, minValue));
maxValue = Math.max(minAllowed, Math.min(maxAllowed, maxValue));
if (minValue > maxValue) [minValue, maxValue] = [maxValue, minValue];
return minValue === maxValue ? String(minValue) : `${minValue}-${maxValue}`;
}
// PERFORMANCE FIX: Module-level KV cache
const SETTINGS_CACHE = {
data: null,
timestamp: 0,
ttl: 300000, // 5 minutes
loading: null,
version: 0,
lastRefreshFailure: 0
};
const WARP_CACHE = {
data: null,
timestamp: 0,
ttl: 3600000, // 1 hour
loading: null,
version: 0,
lastRefreshFailure: 0
};
let CACHE_VERSION = 0;
const REFRESH_FAILURE_COOLDOWN = 30000; // 30s cooldown after a background refresh failure
// Sub response cache — keyed by pathName|client, invalidated when CACHE_VERSION changes
const SUB_CACHE = new Map();
const SUB_CACHE_MAX_SIZE = 20; // single user: ~5-10 unique path|client combos; tighter = more memory for VPN traffic
let SUB_CACHE_VERSION = -1;
// Monotonic request counter for traceId — cheaper than Math.random() per request
let _reqId = 0;
function invalidateCache() {
CACHE_VERSION++;
SETTINGS_CACHE.data = null;
SETTINGS_CACHE.timestamp = 0;
SETTINGS_CACHE.version = CACHE_VERSION;
WARP_CACHE.data = null;
WARP_CACHE.timestamp = 0;
WARP_CACHE.version = CACHE_VERSION;
console.log(`[CACHE] Invalidated - new version: ${CACHE_VERSION}`);
}
async function getCachedData(cacheObj, fetchFn, cacheName, context) {
const now = Date.now();
const age = now - cacheObj.timestamp;
const isStale = age > cacheObj.ttl;
const needsRefresh = age > cacheObj.ttl * 0.5;
if (cacheObj.data && !isStale && cacheObj.version === CACHE_VERSION) {
const refreshCooledDown = now - cacheObj.lastRefreshFailure > REFRESH_FAILURE_COOLDOWN;
if (needsRefresh && !cacheObj.loading && refreshCooledDown) {
const refreshPromise = (async () => {
// BF3 (v71x): Snapshot version BEFORE the async KV fetch.
// If invalidateCache() fires mid-fetch, CACHE_VERSION changes. Without the snapshot,
// the refresher would stamp stale data with the new version, making the next request
// believe it has fresh data when it does not. Fix: only commit if version is unchanged.
const versionAtStart = CACHE_VERSION;
try {
const fresh = await fetchFn();
if (CACHE_VERSION === versionAtStart) {
cacheObj.data = fresh;
cacheObj.timestamp = Date.now();
cacheObj.version = CACHE_VERSION;
} else {
console.warn(`[CACHE] Version changed during background refresh for ${cacheName} (${versionAtStart}→${CACHE_VERSION}), discarding stale result`);
}
} catch (error) {
cacheObj.lastRefreshFailure = Date.now();
console.error(`[CACHE] Refresh failed for ${cacheName}:`, error.message);
} finally {
cacheObj.loading = null;
}
})();
cacheObj.loading = refreshPromise;
if (context?.waitUntil) {
context.waitUntil(refreshPromise);
}
}
return cacheObj.data;
}
if (cacheObj.loading) {
try {
await cacheObj.loading;
if (cacheObj.data) return cacheObj.data;
} catch (error) {
console.error(`[CACHE] Coalesced load failed for ${cacheName}`);
throw error;
}
}
cacheObj.loading = (async () => {
// BF3 (v71x): Same versionAtStart guard as background-refresh path.
// If invalidateCache() fires mid-fetch, stamp versionAtStart-1 (never === CACHE_VERSION)
// so this entry is immediately stale and re-fetched on the next request.
// We still return the data to the current caller — it's fresh enough for this request.
const versionAtStart = CACHE_VERSION;
try {
const fresh = await fetchFn();
cacheObj.data = fresh;
cacheObj.timestamp = Date.now();
cacheObj.version = CACHE_VERSION === versionAtStart ? CACHE_VERSION : versionAtStart - 1;
return fresh;
} catch (error) {
console.error(`[CACHE] Fetch failed for ${cacheName}:`, error.message);
if (cacheObj.data) return cacheObj.data;
throw error;
} finally {
cacheObj.loading = null;
}
})();
return cacheObj.loading;
}
// Helper to prevent memory leaks from hanging timers
function withTimeout(promise, timeoutMs, errorMessage) {
let timeoutId;
const timeoutPromise = new Promise((_, reject) => {
timeoutId = setTimeout(() => reject(new Error(errorMessage)), timeoutMs);
});
return Promise.race([promise, timeoutPromise])
.finally(() => clearTimeout(timeoutId));
}
// Item 13: createOverallConnectionTimeout and clearTimer removed — OVERALL_CONNECTION_TIMEOUT is
// permanently 0 (disabled), so the timer was never created. All overallTimeoutId references and
// clearTimer() calls in VlOverWSHandler/TrOverWSHandler were dead code (always null/no-op).
// CL1 (v81x): Dead esbuild CommonJS shims removed (__create, __defProp, __getOwnPropDesc,
// __getOwnPropNames, __getProtoOf, __hasOwnProp, __require, __commonJS, __copyProps, __toESM).
// These were bundler artifacts from a CommonJS dependency that no longer exists in the bundle.
// Confirmed zero call sites by grep — every cross-reference was self-referential within the
// declaration block itself. They parsed on every cold start, adding latency for no reason.
// src/protocols/warp.ts
async function fetchWarpAccounts(env) {
const WarpAccounts = [];
const apiBaseUrl = "https://api.cloudflareclient.com/v0a4005/reg";
// #1: Generate two independent keypairs (was [sharedKey, sharedKey] — both accounts were identical)
const warpKeys = await Promise.all([generateKeyPair(), generateKeyPair()]);
const fetchAccount = async (key) => {
try {
const response = await fetch(apiBaseUrl, {
method: "POST",
headers: {
"User-Agent": "insomnia/8.6.1",
"Content-Type": "application/json"
},
body: JSON.stringify({
install_id: "",
fcm_token: "",
tos: (/* @__PURE__ */ new Date()).toISOString(),
type: "Android",
model: "PC",
locale: "en_US",
warp_enabled: true,
key: key.publicKey
})
});
if (!response.ok) {
// A 429 returns an HTML error page, not JSON — calling .json() would throw SyntaxError.
throw new Error(`WARP registration failed: HTTP ${response.status} ${response.statusText}`);
}
return await response.json();
} catch (error) {
const message2 = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to get warp configs: ${message2}`);
}
};
// #2: Fetch both accounts in parallel (was sequential for...of with await — wasted 500ms+)
const warpResults = await Promise.all(warpKeys.map(key => fetchAccount(key)));
for (let i = 0; i < warpKeys.length; i++) {
const key = warpKeys[i];
const { config } = warpResults[i];
WarpAccounts.push({
privateKey: key.privateKey,
warpIPv6: `${config.interface.addresses.v6}/128`,
reserved: config.client_id,
publicKey: config.peers[0].public_key
});
}
// Persist for cold-start reuse — but don't throw if KV blips; accounts are in memory
const success = await saveDataset(env, "warpAccounts", WarpAccounts);
if (!success) {
console.error('WARP KV save failed — serving in-memory accounts for this session');
}
return WarpAccounts;
}
async function generateKeyPair() {
const keyPair = await crypto.subtle.generateKey(
{ name: "X25519", namedCurve: "X25519" },
true,
["deriveBits"]
);
const pkcs8 = await crypto.subtle.exportKey("pkcs8", keyPair.privateKey);
const privateKeyRaw = new Uint8Array(pkcs8).slice(-32);
const publicKeyRaw = new Uint8Array(
await crypto.subtle.exportKey("raw", keyPair.publicKey)
);
// P1 (v70x): use module-level encodeBase64 — the local closure was a duplicate that
// allocated a new function object on every generateKeyPair call for no reason.
return {
publicKey: encodeBase64(publicKeyRaw),
privateKey: encodeBase64(privateKeyRaw)
};
}
// src/cores/utils.ts
function isDomain(address) {
if (!address) return false;
return REGEX_DOMAIN.test(address);
}
function createIdleWatchdog(log, onTimeout, webSocket, idleMs = 300000, checkMs = 25000) {
let lastActivity = Date.now();
let active = false; // set true on every chunk write; read+reset in interval — zero Date.now() in hot path
let stopped = false;
let timer = setInterval(() => {
if (stopped) return;
const now = Date.now();
// NOTE: webSocket.ping() does not exist in the CF Workers WebSocket API — removed.
// The runtime handles protocol-level ping/pong automatically; no manual keepalive needed.
if (active) {
lastActivity = now; // one Date.now() per interval regardless of traffic rate
active = false;
}
const idleTime = now - lastActivity;
if (idleTime > idleMs) {
log(`Closing idle connection after ${Math.floor(idleTime / 1e3)}s`);
stopped = true;
clearInterval(timer);
timer = null;
try {
onTimeout();
} catch (error) {
const message2 = error instanceof Error ? error.message : String(error);
log(`Watchdog timeout handler failed: ${message2}`);
}
}
}, checkMs);
return {
touch() { active = true; }, // pure boolean write — no syscall, no branch on timestamp
stop() {
stopped = true;
if (timer) {
clearInterval(timer);
timer = null;
}
}
};
}
function setDNSCacheEntry(cacheKey, value) {
// L4: Map.delete() is a no-op if key doesn't exist — .has() check before delete is redundant.
DNS_CACHE.delete(cacheKey);
if (DNS_CACHE.size >= DNS_CACHE_MAX_SIZE) {
const oldestKey = DNS_CACHE.keys().next().value;
DNS_CACHE.delete(oldestKey);
}
DNS_CACHE.set(cacheKey, value);
}
function buildDNSURLs(domain, onlyIPv4 = false, dohURL = "https://cloudflare-dns.com/dns-query") {
const base = `${dohURL}?name=${encodeURIComponent(domain)}`;
return {
ipv4: `${base}&type=A`,
ipv6: onlyIPv4 ? null : `${base}&type=AAAA`
};
}
const DNS_TIMEOUT = 500; // 500ms fast-fail — snappier fallback in censored networks (was 1000ms)
// R1: IP-based fallback — "dns.google" requires DNS to resolve, which defeats the purpose
// of a fallback when DNS itself is degraded. 8.8.8.8/dns-query is IP-direct, no DNS
// lookup needed. 8.8.8.8 is already in the bypass list so it will not loop.
const DNS_FALLBACK_DOH = "https://8.8.8.8/dns-query";
async function resolveDNS(domain, onlyIPv4 = false, dohURL = "https://cloudflare-dns.com/dns-query") {
const cacheKey = `${dohURL}|${domain}|${onlyIPv4 ? "4" : "46"}`;
// BF4 (v71x): DNS_IN_FLIGHT saturation — spin-wait up to 500ms instead of throwing.
// v70x threw immediately when in-flight count exceeded 200, which the UDP IIFE caught
// and silently dropped the DNS packet. During browser-startup bursts (many tabs opening
// simultaneously) 200 distinct in-flight domains is reachable, causing visible DNS
// lookup stalls (1–5s per domain) inside the VPN tunnel.
// Fix: wait in 25ms increments for up to 500ms. In practice DoH responses return in
// 50–200ms so the queue drains well within the wait window.
// BF4 (v70x): DNS_IN_FLIGHT saturation guard: spin-wait up to 1000ms before giving up.
// IMPORTANT: this check runs BEFORE the SWR cache lookup — a throw here drops the packet
// entirely. Threshold 500 / 1000ms is intentionally generous to survive browser-startup
// bursts without premature failure. Do NOT lower the threshold — it does NOT trigger SWR.
if (DNS_IN_FLIGHT.size > 500) {
let waited = 0;
while (DNS_IN_FLIGHT.size > 500 && waited < 1000) {
await _sleep(25);
waited += 25;
}
if (DNS_IN_FLIGHT.size > 500) {
throw new Error(`DNS resolution queue full (${DNS_IN_FLIGHT.size} in-flight) after ${waited}ms wait`);
}
}
const cached = DNS_CACHE.get(cacheKey);
if (cached) {
const ttl = cached.isFailure
? (cached.isNxdomain ? 60000 : NEGATIVE_DNS_CACHE_TTL) // NXDOMAIN: 60s (permanent), network error: 5s (transient)
: (cached.ttlMs ?? DNS_CACHE_TTL); // R1 (v77x): use actual per-record TTL when available
if (Date.now() - cached.timestamp < ttl) {
// BF3 (v70x): LRU touch — move to back of Map (newest) so frequently-used entries
// survive eviction pressure. Without this, a hot entry at position 0 was evicted
// before a stale entry inserted more recently (FIFO, not LRU).
DNS_CACHE.delete(cacheKey);
DNS_CACHE.set(cacheKey, cached);
if (cached.isFailure) {
throw new Error(cached.error || `DNS lookup failed for ${domain}`);
}
return cached.data;
}
// CL1 (v85x): Stale-while-revalidate grace window — corrected comment.
// If the cached entry has valid data and is past its TTL, return it immediately
// (avoids 50–300ms DNS stall) and extend the cache entry by 60s so subsequent
// callers within the grace window also get instant results without triggering fetches.
// NOTE: No background refresh is started here. The 60s grace window simply defers the
// refresh to the next caller that arrives after the window expires — at that point a
// cold-miss triggers a fresh DoH fetch via the normal path below. This is intentional
// for single-user low-traffic isolates: a true always-on background refresh would fire
// a DoH request on every TTL expiry even when no traffic needs the result.
// Single user — CDN IPs rarely change intra-session; this avoids blocking latency.
if (!cached.isFailure && cached.data) {
DNS_CACHE.delete(cacheKey);
const entryTtlMs = cached.ttlMs ?? DNS_CACHE_TTL; // R1 (v77x): use actual TTL for grace window
// Advance timestamp so the entry appears fresh for 60s — subsequent requests return instantly
DNS_CACHE.set(cacheKey, { ...cached, timestamp: Date.now() - entryTtlMs + 60000 });
return cached.data;
}
// C1: Fall through to in-flight check BEFORE deleting stale entry.
// Old code deleted the stale fallback entry first, losing it if the in-flight request also failed.
}
// C1: Check in-flight FIRST — join existing fetch rather than losing stale fallback data
if (DNS_IN_FLIGHT.has(cacheKey)) {
return DNS_IN_FLIGHT.get(cacheKey);
}
// No in-flight — safe to evict stale entry now and start fresh fetch
if (cached) DNS_CACHE.delete(cacheKey);
// #9: fetchWithTimeout now calls controller.abort() in finally to free network resources promptly.
// externalSignal: optional AbortSignal from mutual-abort parallel racing (P1 v69x); if it fires
// before the timeout, the fetch is cancelled and an AbortError propagates naturally.
const fetchWithTimeout = async (url, recordType, timeout, externalSignal) => {
const controller = new AbortController();
// If an external signal (from the parallel-race loser abort) fires, abort our controller too.
const onExternalAbort = () => controller.abort();
if (externalSignal) externalSignal.addEventListener("abort", onExternalAbort, { once: true });
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
return await fetchDNSRecords(url, recordType, controller.signal);
} catch (error) {
if (error?.name === "AbortError") {
throw new Error("DNS timeout");
}
throw error;
} finally {
clearTimeout(timeoutId);
if (externalSignal) externalSignal.removeEventListener("abort", onExternalAbort);
controller.abort(); // #9: Always abort to release underlying fetch resources
}
};
// #11: Try a DoH provider; returns { ipv4, ipv6 } or throws.
// signal: optional AbortSignal from the mutual-abort parallel racing logic (P1 v69x).
const tryProvider = async (providerURL, signal) => {
const urls = buildDNSURLs(domain, onlyIPv4, providerURL);
const results = await Promise.allSettled([
fetchWithTimeout(urls.ipv4, 1, DNS_TIMEOUT, signal),
onlyIPv4 || !urls.ipv6 ? Promise.resolve({ records: [], ttlMs: DNS_CACHE_TTL }) : fetchWithTimeout(urls.ipv6, 28, DNS_TIMEOUT, signal)
]);
// R1 (v77x): unpack { records, ttlMs } from each result; use minimum TTL across both.
const ipv4Result = results[0].status === "fulfilled" ? results[0].value : { records: [], ttlMs: DNS_CACHE_TTL };
const ipv6Result = results[1].status === "fulfilled" ? results[1].value : { records: [], ttlMs: DNS_CACHE_TTL };
const ipv4 = ipv4Result.records;
const ipv6 = ipv6Result.records;
if (ipv4.length === 0 && ipv6.length === 0) {
const err = results[0].status === "rejected" ? results[0].reason : new Error("No DNS records found");
throw err;
}
const ttlMs = Math.min(ipv4Result.ttlMs, ipv6Result.ttlMs);
return { ipv4, ipv6, ttlMs };
};
const fetchPromise = (async () => {
try {
// P1 (v69x): Parallel DNS with mutual abort — reverts v68x sequential regression.
// v68x used sequential primary-then-fallback to avoid abandoned fetches holding
// connection slots. But sequential adds up to DNS_TIMEOUT (500ms) of extra latency
// when the primary is blocked — the worst case for censored-network users.
// Fix: run both providers in parallel; the winner immediately aborts the loser via
// AbortController, so the losing slot is released as soon as one resolves.
// DNS resolves once then caches for 5 min, so the extra slot is held for ~100-300ms
// at most — a far better trade-off than guaranteed 500ms added latency on cold lookups.
let result;
if (DNS_FALLBACK_DOH === dohURL) {
// Primary and fallback are identical — no point racing.
result = await tryProvider(dohURL);
} else {
const abortPrimary = new AbortController();
const abortFallback = new AbortController();
const tryProviderWithAbort = async (url, ownAbort, otherAbort) => {
try {
const r = await tryProvider(url, ownAbort.signal);
otherAbort.abort(); // cancel the loser immediately
return r;
} catch (err) {
// fetchWithTimeout converts AbortError -> Error("DNS timeout"), so err.name
// is never "AbortError" here. Re-throw regardless — Promise.any handles it.
throw err;
}
};
try {
result = await Promise.any([
tryProviderWithAbort(dohURL, abortPrimary, abortFallback),
tryProviderWithAbort(DNS_FALLBACK_DOH, abortFallback, abortPrimary)
]);
} catch (aggregateErr) {
// Both failed — extract the underlying errors from the AggregateError
const errors = aggregateErr?.errors ?? [aggregateErr];
const msgs = errors.map(e => (e instanceof Error ? e.message : String(e))).join("; ");
// C1 (v78x): propagate isNxdomain if ANY provider confirmed NXDOMAIN (Status=3).
// When both providers fail, the new Error would drop the cause chain. Re-attach it
// so the outer catch can still set the correct 60s (NXDOMAIN) vs 5s cache TTL.
const isNxdomain = errors.some(e => e?.cause?.isNxdomain === true);
throw new Error(`All DNS providers failed: ${msgs}`, isNxdomain ? { cause: { isNxdomain: true } } : undefined);
}
}
// C1 (v79x): destructure ttlMs out of result so cached.data = { ipv4, ipv6 } only.
// Previously ttlMs leaked into cached.data too (unused noise alongside cached.ttlMs).
const { ttlMs: resolvedTtlMs, ...dnsData } = result;
setDNSCacheEntry(cacheKey, { data: dnsData, ttlMs: resolvedTtlMs, timestamp: Date.now(), isFailure: false });
// C1 (v80x): return dnsData (without ttlMs) so cold-miss and cache-hit return the
// same shape { ipv4, ipv6 }. Previously returned result (which includes ttlMs),
// creating an inconsistency: cache-hit returns cached.data (no ttlMs), cold-miss
// returns result (with ttlMs). All callers destructure { ipv4, ipv6 } only so there
// is zero behavioral impact today, but the inconsistency is a latent trap.
return dnsData;
} catch (error) {
const message2 = error instanceof Error ? error.message : String(error);
const dnsErrorMessage = message2.length > 200 ? `${message2.slice(0, 200)}...` : message2;
if (cached && !cached.isFailure) return cached.data;
setDNSCacheEntry(cacheKey, {