-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathmlx-model-report-20260225_105306.html
More file actions
1069 lines (1013 loc) · 124 KB
/
mlx-model-report-20260225_105306.html
File metadata and controls
1069 lines (1013 loc) · 124 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MLX Model Test Report — 2026-02-25 10:53</title>
<!-- MathJax for LaTeX rendering -->
<script>
MathJax = {
tex: {
inlineMath: [['$', '$'], ['\\(', '\\)']],
displayMath: [['$$', '$$'], ['\\[', '\\]']],
processEscapes: true
},
svg: { fontCache: 'global' },
startup: { typeset: false }
};
</script>
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-svg.js" async></script>
<!-- marked.js for Markdown rendering -->
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #0d1117; color: #c9d1d9; padding: 2rem; }
h1 { color: #58a6ff; margin-bottom: 0.5rem; }
h2 { color: #58a6ff; margin: 2rem 0 1rem; border-bottom: 1px solid #21262d; padding-bottom: 0.5rem; }
.summary { display: flex; gap: 1.5rem; margin: 1.5rem 0; flex-wrap: wrap; }
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.2rem 1.5rem; min-width: 160px; }
.card .label { font-size: 0.85rem; color: #8b949e; margin-bottom: 0.3rem; }
.card .value { font-size: 1.8rem; font-weight: 700; }
.card .value.green { color: #3fb950; }
.card .value.red { color: #f85149; }
.card .value.blue { color: #58a6ff; }
.card .value.yellow { color: #d29922; }
table { width: 100%; border-collapse: collapse; margin: 1rem 0; }
th { background: #161b22; color: #58a6ff; text-align: left; padding: 0.7rem 1rem; font-weight: 600; border-bottom: 2px solid #30363d; position: sticky; top: 0; z-index: 10; }
td { padding: 0.6rem 1rem; border-bottom: 1px solid #21262d; }
tr:hover { background: #161b22; }
.status-ok { color: #3fb950; font-weight: 600; }
.status-fail { color: #f85149; font-weight: 600; }
.bar { background: #21262d; border-radius: 4px; height: 20px; position: relative; overflow: hidden; min-width: 60px; }
.bar-fill { height: 100%; border-radius: 4px; transition: width 0.3s; }
.bar-label { position: absolute; right: 6px; top: 1px; font-size: 0.75rem; color: #c9d1d9; font-weight: 600; }
.mono { font-family: 'SF Mono', Menlo, monospace; font-size: 0.85rem; }
.preview { max-width: 400px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; font-size: 0.8rem; color: #8b949e; }
.error-text { color: #f85149; font-size: 0.85rem; max-width: 500px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
.meta { color: #8b949e; font-size: 0.9rem; margin-bottom: 1.5rem; }
.rank { color: #8b949e; font-size: 0.85rem; width: 30px; text-align: center; }
.ai-score { text-align: center; font-size: 1rem; width: 35px; }
/* Config badges */
.config-panel { margin-bottom: 1rem; display: flex; flex-wrap: wrap; gap: 0.4rem; }
.config-badge { display: inline-block; font-size: 0.75rem; padding: 0.2rem 0.6rem; border: 1px solid #30363d; border-radius: 12px; color: #c9d1d9; font-family: 'SF Mono', Menlo, monospace; white-space: nowrap; }
.config-badge strong { color: #e6edf3; }
/* Prompt box */
.prompt-box { background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 0.8rem 1rem; margin-bottom: 1rem; font-size: 0.85rem; color: #c9d1d9; line-height: 1.5; }
.prompt-label { display: inline-block; background: #58a6ff; color: #0d1117; font-size: 0.65rem; font-weight: 700; padding: 0.1rem 0.4rem; border-radius: 3px; margin-right: 0.5rem; vertical-align: middle; letter-spacing: 0.05em; }
/* Variant / AFM tags */
.variant-tag { display: inline-block; font-size: 0.7rem; padding: 0.1rem 0.4rem; background: #a371f7; color: #0d1117; border-radius: 3px; margin-left: 0.4rem; font-weight: 600; font-family: -apple-system, sans-serif; }
.afm-tag { display: inline-block; font-size: 0.7rem; padding: 0.1rem 0.4rem; background: #21262d; border: 1px solid #f0883e; color: #f0883e; border-radius: 3px; font-family: 'SF Mono', Menlo, monospace; }
.temp-tag { display: inline-block; font-size: 0.7rem; padding: 0.1rem 0.4rem; background: #21262d; border: 1px solid #d29922; color: #d29922; border-radius: 3px; font-family: 'SF Mono', Menlo, monospace; }
/* Response sections */
.response-section { margin: 0.5rem 0; border: 1px solid #21262d; border-radius: 8px; overflow: hidden; }
.response-header { display: flex; align-items: center; gap: 0.8rem; padding: 0.8rem 1.2rem; background: #161b22; cursor: pointer; user-select: none; color: #c9d1d9; font-size: 0.95rem; font-weight: 500; }
.response-header:hover { background: #1c2129; }
.toggle-icon { font-size: 0.7rem; color: #8b949e; transition: transform 0.2s; display: inline-block; width: 1rem; }
.toggle-icon.open { transform: rotate(90deg); }
.response-meta { margin-left: auto; color: #8b949e; font-size: 0.8rem; font-family: -apple-system, sans-serif; }
.response-body { padding: 1.5rem; background: #0d1117; border-top: 1px solid #21262d; }
/* Rendered markdown content */
.rendered-content { line-height: 1.7; font-size: 0.95rem; }
.rendered-content h1, .rendered-content h2, .rendered-content h3 { color: #58a6ff; margin: 1.2rem 0 0.6rem; }
.rendered-content h1 { font-size: 1.4rem; border-bottom: 1px solid #21262d; padding-bottom: 0.3rem; }
.rendered-content h2 { font-size: 1.2rem; }
.rendered-content h3 { font-size: 1.05rem; }
.rendered-content p { margin: 0.6rem 0; }
.rendered-content ul, .rendered-content ol { margin: 0.6rem 0 0.6rem 1.5rem; }
.rendered-content li { margin: 0.3rem 0; }
.rendered-content code { background: #161b22; padding: 0.15rem 0.4rem; border-radius: 4px; font-family: 'SF Mono', Menlo, monospace; font-size: 0.85em; color: #e6edf3; }
.rendered-content pre { background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 1rem; overflow-x: auto; margin: 0.8rem 0; }
.rendered-content pre code { background: none; padding: 0; }
.rendered-content blockquote { border-left: 3px solid #30363d; padding-left: 1rem; color: #8b949e; margin: 0.6rem 0; }
.rendered-content strong { color: #e6edf3; }
.rendered-content hr { border: none; border-top: 1px solid #21262d; margin: 1rem 0; }
.rendered-content table { border: 1px solid #30363d; }
.rendered-content th, .rendered-content td { border: 1px solid #30363d; padding: 0.4rem 0.8rem; }
/* MathJax overrides for dark theme */
mjx-container { color: #c9d1d9 !important; }
mjx-container svg { fill: #c9d1d9; }
.expand-all { background: #21262d; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 0.5rem 1rem; cursor: pointer; font-size: 0.85rem; margin-bottom: 1rem; }
.expand-all:hover { background: #30363d; }
</style>
</head>
<body>
<h1>MLX Model Test Report</h1>
<p class="meta">Generated 2026-02-25 10:53 · AFM MLX Backend · <strong>v0.9.5-0cfba17</strong></p>
<p class="meta" style="margin-top:0.3rem"><code style="background:#161b22;padding:0.3rem 0.6rem;border-radius:4px;font-size:0.8rem">mlx-model-test.sh --prompts /Users/syl/bin/test-stop-sequences.txt --smart claude,codex</code></p>
<div class="summary">
<div class="card"><div class="label">Test Runs</div><div class="value blue">34</div></div>
<div class="card"><div class="label">Passed</div><div class="value green">28</div></div>
<div class="card"><div class="label">Failed</div><div class="value red">6</div></div>
<div class="card"><div class="label">Best tok/s</div><div class="value yellow">44.1</div></div>
<div class="card"><div class="label">Fastest</div><div class="value" style="font-size:1rem;color:#d29922">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-code-fence</div></div>
</div>
<h2>Performance Ranking (by tokens/sec)</h2>
<p style="color:#8b949e;font-size:0.85rem;margin-bottom:0.5rem">Click a row to jump to its full response below.</p>
<table>
<tr>
<th>#</th>
<th>Model / Config</th>
<th>Status</th>
<th title="codex score">codex</th><th title="claude score">claude</th>
<th>Temp</th>
<th>Load (s)</th>
<th>Tokens</th>
<th>Gen (s)</th>
<th style="min-width:200px">Tokens/sec</th>
<th>Prompt</th>
</tr>
<tr onclick="scrollToResponse(0)" style="cursor:pointer" title="Click to view full response">
<td class="rank">1</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-code-fence <span class="variant-tag">stop-code-fence</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>2.0</td>
<td>122</td>
<td>2.76</td>
<td><div class="bar"><div class="bar-fill" style="width:100%;background:#3fb950"></div><div class="bar-label">44.1</div></div></td>
<td class="preview" title="Write a Python function that computes factorial, inside a code block.">Write a Python function that computes factorial, inside a co...</td>
</tr>
<tr onclick="scrollToResponse(1)" style="cursor:pointer" title="Click to view full response">
<td class="rank">2</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-no-match <span class="variant-tag">stop-guided-json-no-match</span><br><span class="afm-tag">--guided-json '{"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"}},"required":["name","age"]}'</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#3fb950;font-weight:700">5</td>
<td>0.0</td>
<td>1.0</td>
<td>1150</td>
<td>27.85</td>
<td><div class="bar"><div class="bar-fill" style="width:94%;background:#3fb950"></div><div class="bar-label">41.3</div></div></td>
<td class="preview" title="Generate a person record for Bob aged 25.">Generate a person record for Bob aged 25.</td>
</tr>
<tr onclick="scrollToResponse(2)" style="cursor:pointer" title="Click to view full response">
<td class="rank">3</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-brace <span class="variant-tag">stop-guided-json-brace</span><br><span class="afm-tag">--guided-json '{"type":"object","properties":{"color":{"type":"string"},"hex":{"type":"string"}},"required":["color","hex"]}'</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#d29922;font-weight:700">3</td><td class="ai-score" style="color:#58a6ff;font-weight:700">4</td>
<td>0.0</td>
<td>1.0</td>
<td>819</td>
<td>19.84</td>
<td><div class="bar"><div class="bar-fill" style="width:93%;background:#3fb950"></div><div class="bar-label">41.3</div></div></td>
<td class="preview" title="Describe the color blue with its hex code.">Describe the color blue with its hex code.</td>
</tr>
<tr onclick="scrollToResponse(3)" style="cursor:pointer" title="Click to view full response">
<td class="rank">4</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-no-match <span class="variant-tag">stop-no-match</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#3fb950;font-weight:700">5</td>
<td>0.0</td>
<td>1.0</td>
<td>134</td>
<td>3.37</td>
<td><div class="bar"><div class="bar-fill" style="width:90%;background:#3fb950"></div><div class="bar-label">39.8</div></div></td>
<td class="preview" title="What is 2+2? Answer briefly.">What is 2+2? Answer briefly.</td>
</tr>
<tr onclick="scrollToResponse(4)" style="cursor:pointer" title="Click to view full response">
<td class="rank">5</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-word <span class="variant-tag">stop-api-word</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>90</td>
<td>2.34</td>
<td><div class="bar"><div class="bar-fill" style="width:87%;background:#3fb950"></div><div class="bar-label">38.4</div></div></td>
<td class="preview" title="Name 5 programming languages and briefly describe each one.">Name 5 programming languages and briefly describe each one.</td>
</tr>
<tr onclick="scrollToResponse(5)" style="cursor:pointer" title="Click to view full response">
<td class="rank">6</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-system-numbered <span class="variant-tag">stop-system-numbered</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>138</td>
<td>3.9</td>
<td><div class="bar"><div class="bar-fill" style="width:80%;background:#3fb950"></div><div class="bar-label">35.4</div></div></td>
<td class="preview" title="What are the main benefits of exercise?">What are the main benefits of exercise?</td>
</tr>
<tr onclick="scrollToResponse(6)" style="cursor:pointer" title="Click to view full response">
<td class="rank">7</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-streaming <span class="variant-tag">stop-streaming</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f85149;font-weight:700">1</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>161</td>
<td>4.79</td>
<td><div class="bar"><div class="bar-fill" style="width:76%;background:#3fb950"></div><div class="bar-label">33.6</div></div></td>
<td class="preview" title="List 10 planets or celestial objects, numbered 1 through 10, one per line.">List 10 planets or celestial objects, numbered 1 through 10,...</td>
</tr>
<tr onclick="scrollToResponse(7)" style="cursor:pointer" title="Click to view full response">
<td class="rank">8</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-non-streaming <span class="variant-tag">stop-non-streaming</span><br><span class="afm-tag">--no-streaming</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>2.0</td>
<td>161</td>
<td>5.0</td>
<td><div class="bar"><div class="bar-fill" style="width:73%;background:#3fb950"></div><div class="bar-label">32.2</div></div></td>
<td class="preview" title="List 10 planets or celestial objects, numbered 1 through 10, one per line.">List 10 planets or celestial objects, numbered 1 through 10,...</td>
</tr>
<tr onclick="scrollToResponse(8)" style="cursor:pointer" title="Click to view full response">
<td class="rank">9</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-multi-word <span class="variant-tag">stop-multi-word</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>2.0</td>
<td>111</td>
<td>3.48</td>
<td><div class="bar"><div class="bar-fill" style="width:72%;background:#3fb950"></div><div class="bar-label">31.9</div></div></td>
<td class="preview" title="Write a 5-step recipe for making tea. Label each step as "Step 1", "Step 2", etc.">Write a 5-step recipe for making tea. Label each step as "St...</td>
</tr>
<tr onclick="scrollToResponse(9)" style="cursor:pointer" title="Click to view full response">
<td class="rank">10</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-top-p <span class="variant-tag">stop-top-p</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>111</td>
<td>3.54</td>
<td><div class="bar"><div class="bar-fill" style="width:71%;background:#3fb950"></div><div class="bar-label">31.4</div></div></td>
<td class="preview" title="List 10 rivers, numbered 1 through 10, one per line.">List 10 rivers, numbered 1 through 10, one per line.</td>
</tr>
<tr onclick="scrollToResponse(10)" style="cursor:pointer" title="Click to view full response">
<td class="rank">11</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-long-phrase <span class="variant-tag">stop-long-phrase</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f85149;font-weight:700">1</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>44</td>
<td>1.41</td>
<td><div class="bar"><div class="bar-fill" style="width:71%;background:#3fb950"></div><div class="bar-label">31.3</div></div></td>
<td class="preview" title="Write a 3-paragraph essay about renewable energy. Start the last paragraph with "In conclusion".">Write a 3-paragraph essay about renewable energy. Start the ...</td>
</tr>
<tr onclick="scrollToResponse(11)" style="cursor:pointer" title="Click to view full response">
<td class="rank">12</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-comma <span class="variant-tag">stop-guided-json-comma</span><br><span class="afm-tag">--guided-json '{"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"},"city":{"type":"string"}},"required":["name","age","city"]}'</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>58</td>
<td>1.89</td>
<td><div class="bar"><div class="bar-fill" style="width:69%;background:#3fb950"></div><div class="bar-label">30.7</div></div></td>
<td class="preview" title="Generate a person profile for someone named Alice who is 30 and lives in Paris.">Generate a person profile for someone named Alice who is 30 ...</td>
</tr>
<tr onclick="scrollToResponse(12)" style="cursor:pointer" title="Click to view full response">
<td class="rank">13</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-html-tag <span class="variant-tag">stop-html-tag</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>107</td>
<td>3.54</td>
<td><div class="bar"><div class="bar-fill" style="width:68%;background:#3fb950"></div><div class="bar-label">30.2</div></div></td>
<td class="preview" title="Write an HTML unordered list of 5 fruits using <ul> and <li> tags.">Write an HTML unordered list of 5 fruits using <ul> and <li>...</td>
</tr>
<tr onclick="scrollToResponse(13)" style="cursor:pointer" title="Click to view full response">
<td class="rank">14</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-unicode <span class="variant-tag">stop-unicode</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>38</td>
<td>1.27</td>
<td><div class="bar"><div class="bar-fill" style="width:68%;background:#3fb950"></div><div class="bar-label">30.0</div></div></td>
<td class="preview" title="List 5 items about space using bullet points (•).">List 5 items about space using bullet points (•).</td>
</tr>
<tr onclick="scrollToResponse(14)" style="cursor:pointer" title="Click to view full response">
<td class="rank">15</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-multi <span class="variant-tag">stop-api-multi</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>98</td>
<td>3.45</td>
<td><div class="bar"><div class="bar-fill" style="width:64%;background:#3fb950"></div><div class="bar-label">28.4</div></div></td>
<td class="preview" title="List 10 animals, numbered 1 through 10, one per line.">List 10 animals, numbered 1 through 10, one per line.</td>
</tr>
<tr onclick="scrollToResponse(15)" style="cursor:pointer" title="Click to view full response">
<td class="rank">16</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-high-temp <span class="variant-tag">stop-high-temp</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>1.0</td>
<td>1.0</td>
<td>144</td>
<td>5.08</td>
<td><div class="bar"><div class="bar-fill" style="width:64%;background:#3fb950"></div><div class="bar-label">28.3</div></div></td>
<td class="preview" title="List 10 random words, numbered 1 through 10, one per line.">List 10 random words, numbered 1 through 10, one per line.</td>
</tr>
<tr onclick="scrollToResponse(16)" style="cursor:pointer" title="Click to view full response">
<td class="rank">17</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-seed-run2 <span class="variant-tag">stop-seed-run2</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>80</td>
<td>2.83</td>
<td><div class="bar"><div class="bar-fill" style="width:64%;background:#3fb950"></div><div class="bar-label">28.3</div></div></td>
<td class="preview" title="List 10 flowers, numbered 1 through 10, one per line.">List 10 flowers, numbered 1 through 10, one per line.</td>
</tr>
<tr onclick="scrollToResponse(17)" style="cursor:pointer" title="Click to view full response">
<td class="rank">18</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-single <span class="variant-tag">stop-api-single</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#d29922;font-weight:700">3</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>7.0</td>
<td>90</td>
<td>3.19</td>
<td><div class="bar"><div class="bar-fill" style="width:64%;background:#3fb950"></div><div class="bar-label">28.3</div></div></td>
<td class="preview" title="List 10 colors, numbered 1 through 10, one per line.">List 10 colors, numbered 1 through 10, one per line.</td>
</tr>
<tr onclick="scrollToResponse(18)" style="cursor:pointer" title="Click to view full response">
<td class="rank">19</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-value <span class="variant-tag">stop-guided-json-value</span><br><span class="afm-tag">--guided-json '{"type":"object","properties":{"cities":{"type":"array","items":{"type":"string"}}},"required":["cities"]}'</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>2.0</td>
<td>38</td>
<td>1.4</td>
<td><div class="bar"><div class="bar-fill" style="width:62%;background:#3fb950"></div><div class="bar-label">27.2</div></div></td>
<td class="preview" title="List 5 major world cities as a JSON array. Include Tokyo.">List 5 major world cities as a JSON array. Include Tokyo.</td>
</tr>
<tr onclick="scrollToResponse(19)" style="cursor:pointer" title="Click to view full response">
<td class="rank">20</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-low-max-tokens <span class="variant-tag">stop-low-max-tokens</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#8b949e">—</td>
<td>0.0</td>
<td>1.0</td>
<td>40</td>
<td>1.5</td>
<td><div class="bar"><div class="bar-fill" style="width:60%;background:#3fb950"></div><div class="bar-label">26.6</div></div></td>
<td class="preview" title="List 10 mountains, numbered 1 through 10, one per line.">List 10 mountains, numbered 1 through 10, one per line.</td>
</tr>
<tr onclick="scrollToResponse(20)" style="cursor:pointer" title="Click to view full response">
<td class="rank">21</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-system-pirate <span class="variant-tag">stop-system-pirate</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>2.0</td>
<td>374</td>
<td>15.15</td>
<td><div class="bar"><div class="bar-fill" style="width:56%;background:#d29922"></div><div class="bar-label">24.7</div></div></td>
<td class="preview" title="Tell me about treasure hunting on the high seas.">Tell me about treasure hunting on the high seas.</td>
</tr>
<tr onclick="scrollToResponse(21)" style="cursor:pointer" title="Click to view full response">
<td class="rank">22</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-seed-run1 <span class="variant-tag">stop-seed-run1</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>80</td>
<td>3.32</td>
<td><div class="bar"><div class="bar-fill" style="width:55%;background:#d29922"></div><div class="bar-label">24.1</div></div></td>
<td class="preview" title="List 10 flowers, numbered 1 through 10, one per line.">List 10 flowers, numbered 1 through 10, one per line.</td>
</tr>
<tr onclick="scrollToResponse(22)" style="cursor:pointer" title="Click to view full response">
<td class="rank">23</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-four-max <span class="variant-tag">stop-four-max</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>2.0</td>
<td>102</td>
<td>4.56</td>
<td><div class="bar"><div class="bar-fill" style="width:51%;background:#d29922"></div><div class="bar-label">22.4</div></div></td>
<td class="preview" title="List 10 items, numbered 1 through 10. One per line.">List 10 items, numbered 1 through 10. One per line.</td>
</tr>
<tr onclick="scrollToResponse(23)" style="cursor:pointer" title="Click to view full response">
<td class="rank">24</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-immediate <span class="variant-tag">stop-immediate</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#58a6ff;font-weight:700">4</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>8</td>
<td>0.43</td>
<td><div class="bar"><div class="bar-fill" style="width:42%;background:#d29922"></div><div class="bar-label">18.7</div></div></td>
<td class="preview" title="What is the capital of Japan?">What is the capital of Japan?</td>
</tr>
<tr onclick="scrollToResponse(24)" style="cursor:pointer" title="Click to view full response">
<td class="rank">25</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-double-newline <span class="variant-tag">stop-api-double-newline</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>6</td>
<td>0.35</td>
<td><div class="bar"><div class="bar-fill" style="width:38%;background:#d29922"></div><div class="bar-label">16.9</div></div></td>
<td class="preview" title="Write a short paragraph about the ocean. Then write a second paragraph about mountains.">Write a short paragraph about the ocean. Then write a second...</td>
</tr>
<tr onclick="scrollToResponse(25)" style="cursor:pointer" title="Click to view full response">
<td class="rank">26</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-period <span class="variant-tag">stop-api-period</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>6</td>
<td>0.38</td>
<td><div class="bar"><div class="bar-fill" style="width:36%;background:#d29922"></div><div class="bar-label">15.9</div></div></td>
<td class="preview" title="Tell me about the sun in three sentences.">Tell me about the sun in three sentences.</td>
</tr>
<tr onclick="scrollToResponse(26)" style="cursor:pointer" title="Click to view full response">
<td class="rank">27</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-special-chars <span class="variant-tag">stop-special-chars</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>1.0</td>
<td>7</td>
<td>0.44</td>
<td><div class="bar"><div class="bar-fill" style="width:36%;background:#d29922"></div><div class="bar-label">15.7</div></div></td>
<td class="preview" title="List 3 facts about the moon. Use **bold** markdown for emphasis.">List 3 facts about the moon. Use **bold** markdown for empha...</td>
</tr>
<tr onclick="scrollToResponse(27)" style="cursor:pointer" title="Click to view full response">
<td class="rank">28</td>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-newline <span class="variant-tag">stop-api-newline</span></td>
<td class="status-ok">OK</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f0883e;font-weight:700">2</td>
<td>0.0</td>
<td>2.0</td>
<td>6</td>
<td>0.46</td>
<td><div class="bar"><div class="bar-fill" style="width:29%;background:#da6d28"></div><div class="bar-label">12.9</div></div></td>
<td class="preview" title="What is the capital of France? Answer in one sentence.">What is the capital of France? Answer in one sentence.</td>
</tr>
</table>
<h2>Failed Runs</h2>
<table>
<tr><th>Model</th><th>Error</th><th title="codex score">codex</th><th title="claude score">claude</th><th>Config</th><th>Load (s)</th></tr>
<tr>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-cli-only <span class="variant-tag">stop-cli-only
</span></td>
<td class="error-text" title="Error: Unknown option '--stop'. Did you mean '--top-p'?">Error: Unknown option '--stop'. Did you mean '--top-p'?</td>
<td class="ai-score" style="color:#f0883e;font-weight:700">2</td><td class="ai-score" style="color:#f85149;font-weight:700">1</td>
<td><span class="temp-tag">t=0.0</span> <span class="afm-tag">--stop "3."
</span></td>
<td>1</td>
</tr>
<tr>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-cli-multi <span class="variant-tag">stop-cli-multi
</span></td>
<td class="error-text" title="Error: Unknown option '--stop'. Did you mean '--top-p'?">Error: Unknown option '--stop'. Did you mean '--top-p'?</td>
<td class="ai-score" style="color:#f85149;font-weight:700">1</td><td class="ai-score" style="color:#f85149;font-weight:700">1</td>
<td><span class="temp-tag">t=0.0</span> <span class="afm-tag">--stop "```,END"
</span></td>
<td>1</td>
</tr>
<tr>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-cli-api-merge <span class="variant-tag">stop-cli-api-merge
</span></td>
<td class="error-text" title="Error: Unknown option '--stop'. Did you mean '--top-p'?">Error: Unknown option '--stop'. Did you mean '--top-p'?</td>
<td class="ai-score" style="color:#f85149;font-weight:700">1</td><td class="ai-score" style="color:#f85149;font-weight:700">1</td>
<td><span class="temp-tag">t=0.0</span> <span class="afm-tag">--stop "5."
</span></td>
<td>1</td>
</tr>
<tr>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-cli-api-dedup <span class="variant-tag">stop-cli-api-dedup
</span></td>
<td class="error-text" title="Error: Unknown option '--stop'. Did you mean '--top-p'?">Error: Unknown option '--stop'. Did you mean '--top-p'?</td>
<td class="ai-score" style="color:#f85149;font-weight:700">1</td><td class="ai-score" style="color:#f85149;font-weight:700">1</td>
<td><span class="temp-tag">t=0.0</span> <span class="afm-tag">--stop "3."
</span></td>
<td>1</td>
</tr>
<tr>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-json-object-key <span class="variant-tag">stop-json-object-key</span></td>
<td class="error-text" title="Error code: 400 - {'error': {'message': 'The operation couldn’t be completed. (Jinja.TemplateException error 1.)', 'type': 'mlx_error'}}">Error code: 400 - {'error': {'message': 'The operation couldn’t be completed. (Jinja.TemplateException error 1.)', 'type': 'mlx_error'}}</td>
<td class="ai-score" style="color:#58a6ff;font-weight:700">4</td><td class="ai-score" style="color:#f85149;font-weight:700">1</td>
<td><span class="temp-tag">t=0.0</span> </td>
<td>2.0</td>
</tr>
<tr>
<td class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-json-object-no-match <span class="variant-tag">stop-json-object-no-match</span></td>
<td class="error-text" title="Error code: 400 - {'error': {'message': 'The operation couldn’t be completed. (Jinja.TemplateException error 1.)', 'type': 'mlx_error'}}">Error code: 400 - {'error': {'message': 'The operation couldn’t be completed. (Jinja.TemplateException error 1.)', 'type': 'mlx_error'}}</td>
<td class="ai-score" style="color:#f85149;font-weight:700">1</td><td class="ai-score" style="color:#f85149;font-weight:700">1</td>
<td><span class="temp-tag">t=0.0</span> </td>
<td>1.0</td>
</tr>
</table>
<h2>AI Analysis (--smart)</h2>
<div class="response-section">
<h3 class="response-header" onclick="toggleSmart_0()">
<span class="toggle-icon" id="smart-icon-0">▶</span>
<span>codex Analysis · avg score: 2.0/5</span>
</h3>
<div class="response-body" id="smart-body-0" style="display:none">
<div class="rendered-content" id="smart-content-0"></div>
</div>
</div>
<script>
var smartData_0 = "## AFM Compatibility Report (Qwen3.5-35B-A3B-4bit variants)\n\n### 1) Broken Models\nAll hard failures are **feature-path failures**, not model-load failures.\n\n| Error type | Variants | Count | Likely cause | AFM bug vs model incompat |\n|---|---|---:|---|---|\n| CLI option parse error: `Unknown option '--stop'` | `stop-cli-only`, `stop-cli-multi`, `stop-cli-api-merge`, `stop-cli-api-dedup` | 4 | AFM CLI does not accept `--stop` though tests expect it | **Likely AFM bug / CLI-contract mismatch** |\n| `response_format=json_object` + stop => `Jinja.TemplateException` 400 | `stop-json-object-key`, `stop-json-object-no-match` | 2 | Template/rendering path crashes when combining stop + json_object | **Likely AFM bug** |\n\n### 2) Anomalies & Red Flags\n- **Major regression:** most `status=OK` runs return **empty `content`** and only internal reasoning text in `reasoning_content`.\n - Representative snippet (first ~100 chars): `\"Thinking Process:\\n\\n1. **Analyze the Request:**\\n * Task: List 10 colors...\"`\n - Affects nearly all stop tests (API, system, seed, top-p, streaming).\n- **Stop-sequence over-triggering/truncation:** many outputs stop inside planning text:\n - `stop-api-newline`: snippet `\"Thinking Process:\"`\n - `stop-api-period`: snippet `\"Thinking Process:\\n\\n1\"`\n - `stop-immediate`: snippet `\"Thinking Process:\\n\\n1. **\"`\n- **Guided JSON not enforced consistently:**\n - `stop-guided-json-no-match` returns freeform prose + markdown JSON block, not schema-constrained object.\n - `stop-guided-json-brace` returns plain paragraph (`Blue is a primary color...`) despite guided-json schema.\n- **No clear repetition loops/gibberish** observed.\n- **Thinking-budget exhaustion pattern:** **not observed** (empty `content` was generally from stop-trigger behavior, not near-max-token reasoning-only runs).\n\n### 3) Variant Comparison\n| Variant pair/group | Better | Worse | Notes |\n|---|---|---|---|\n| `stop-streaming` vs `stop-non-streaming` | Tie | Tie | Nearly identical reasoning-only output; issue is independent of streaming mode. |\n| `stop-seed-run1` vs `stop-seed-run2` | Tie | Tie | Deterministic (same output), but both reasoning-only and truncated. |\n| Guided JSON variants | `stop-guided-json-brace` (has usable final content) | `stop-guided-json-value`, `stop-guided-json-comma` (empty content), `...no-match` (schema drift) | Structured-output path is inconsistent under stop constraints. |\n| Stop matching | `stop-no-match` | Most explicit stop variants | When stop token never matches, model can return actual answer (`\"4\"`). |\n\n### 4) Quality Assessment (Coherence/Relevance 1-5)\nFlagged below 3 on either metric are marked `\u26a0`.\n\n| Variant | Coh | Rel | Notes |\n|---|---:|---:|---|\n| stop-api-single | 3 | 2 \u26a0 | Reasoning only, no user answer |\n| stop-api-multi | 3 | 2 \u26a0 | Reasoning only |\n| stop-api-newline | 2 \u26a0 | 1 \u26a0 | Immediate truncation |\n| stop-api-double-newline | 2 \u26a0 | 1 \u26a0 | Immediate truncation |\n| stop-api-word | 3 | 2 \u26a0 | Reasoning only |\n| stop-api-period | 2 \u26a0 | 1 \u26a0 | Truncated at period stop |\n| stop-streaming | 3 | 2 \u26a0 | Reasoning only |\n| stop-non-streaming | 3 | 2 \u26a0 | Reasoning only |\n| stop-guided-json-value | 2 \u26a0 | 1 \u26a0 | Truncated + invalid JSON (`is_valid_json=false`) |\n| stop-guided-json-comma | 2 \u26a0 | 1 \u26a0 | Truncated reasoning only |\n| stop-guided-json-no-match | 4 | 3 | Usable but ignores schema intent |\n| stop-guided-json-brace | 4 | 4 | Good answer content, but not schema-shaped |\n| stop-long-phrase | 2 \u26a0 | 1 \u26a0 | Truncated at `\"In conclusion\"` stop |\n| stop-multi-word | 3 | 2 \u26a0 | Stops before usable final answer |\n| stop-code-fence | 3 | 2 \u26a0 | Plans code, outputs none |\n| stop-no-match | 4 | 5 | Correct final answer `\"4\"` |\n| stop-immediate | 2 \u26a0 | 1 \u26a0 | Immediate stop hit |\n| stop-special-chars | 2 \u26a0 | 1 \u26a0 | Immediate stop hit |\n| stop-html-tag | 3 | 2 \u26a0 | Stops in planning markup |\n| stop-unicode | 2 \u26a0 | 1 \u26a0 | Immediate stop hit |\n| stop-four-max | 3 | 2 \u26a0 | Stops at numbered token |\n| stop-system-pirate | 3 | 2 \u26a0 | Persona planning only |\n| stop-system-numbered | 3 | 2 \u26a0 | Planning only |\n| stop-high-temp | 3 | 2 \u26a0 | Planning only |\n| stop-seed-run1 | 3 | 2 \u26a0 | Planning only |\n| stop-seed-run2 | 3 | 2 \u26a0 | Planning only |\n| stop-top-p | 3 | 2 \u26a0 | Planning only |\n| stop-low-max-tokens | 2 \u26a0 | 1 \u26a0 | Stops at `\"2.\"` before answer |\n\n### 5) Performance Summary (sorted by tokens/sec)\n| Variant | tok/s | Status | Flag |\n|---|---:|---|---|\n| stop-code-fence | 44.15 | OK | Suspiciously fast for reasoning-only/no final answer |\n| stop-guided-json-no-match | 41.29 | OK | High throughput with schema drift |\n| stop-guided-json-brace | 41.28 | OK | High throughput; structured-output mismatch |\n| stop-no-match | 39.80 | OK | Healthy content return |\n| stop-api-word | 38.38 | OK | Reasoning-only |\n| stop-system-numbered | 35.37 | OK | Reasoning-only |\n| stop-streaming | 33.59 | OK | Reasoning-only |\n| stop-non-streaming | 32.20 | OK | Reasoning-only |\n| stop-multi-word | 31.92 | OK | Truncated |\n| stop-top-p | 31.40 | OK | Reasoning-only |\n| stop-long-phrase | 31.28 | OK | Truncated |\n| stop-guided-json-comma | 30.67 | OK | Truncated |\n| stop-html-tag | 30.22 | OK | Truncated |\n| stop-unicode | 30.02 | OK | Immediate stop |\n| stop-api-multi | 28.43 | OK | Reasoning-only |\n| stop-high-temp | 28.32 | OK | Reasoning-only |\n| stop-seed-run2 | 28.31 | OK | Reasoning-only |\n| stop-api-single | 28.26 | OK | Reasoning-only |\n| stop-guided-json-value | 27.19 | OK | Invalid/truncated |\n| stop-low-max-tokens | 26.61 | OK | Early stop |\n| stop-system-pirate | 24.69 | OK | Reasoning-only |\n| stop-seed-run1 | 24.13 | OK | Reasoning-only |\n| stop-four-max | 22.39 | OK | Early stop |\n| stop-immediate | 18.70 | OK | Immediate stop |\n| stop-api-double-newline | 16.95 | OK | Immediate stop |\n| stop-api-period | 15.95 | OK | Immediate stop |\n| stop-special-chars | 15.74 | OK | Immediate stop |\n| stop-api-newline | 12.93 | OK | Immediate stop |\n| stop-cli-only | n/a | FAIL | CLI option error |\n| stop-cli-multi | n/a | FAIL | CLI option error |\n| stop-cli-api-merge | n/a | FAIL | CLI option error |\n| stop-cli-api-dedup | n/a | FAIL | CLI option error |\n| stop-json-object-key | n/a | FAIL | Jinja template crash |\n| stop-json-object-no-match | n/a | FAIL | Jinja template crash |\n\n### 6) Recommendations (Prioritized)\n\n**Likely AFM bug**\n1. **Stop token handling leaks/truncates on reasoning stream** across most API stop tests; user-visible `content` is empty while hidden reasoning is emitted.\n2. **CLI/API parity:** implement or document `--stop` support; current CLI rejects expected flag.\n3. **`response_format=json_object` + stop crash** (`Jinja.TemplateException`) should be fixed urgently.\n4. **Guided JSON + stop interaction** is inconsistent (invalid JSON, schema bypass).\n\n**Model quality issue**\n1. No primary evidence this is model-quality-specific; failures are mostly harness/runtime behavior, not intrinsic text degeneration.\n\n**Working well**\n1. `stop-no-match` (returns correct final answer).\n2. `stop-guided-json-brace` has coherent content for prompt semantics, though structured-output contract still questionable.";
var smartRendered_0 = false;
function toggleSmart_0() {
var body = document.getElementById('smart-body-0');
var icon = document.getElementById('smart-icon-0');
if (body.style.display === 'none') {
body.style.display = 'block';
icon.classList.add('open');
if (!smartRendered_0) {
smartRendered_0 = true;
var el = document.getElementById('smart-content-0');
if (typeof marked !== 'undefined') {
el.innerHTML = marked.parse(smartData_0);
} else {
el.innerHTML = '<pre>' + smartData_0.replace(/</g, '<') + '</pre>';
}
}
} else {
body.style.display = 'none';
icon.classList.remove('open');
}
}
</script>
<div class="response-section">
<h3 class="response-header" onclick="toggleSmart_1()">
<span class="toggle-icon" id="smart-icon-1">▶</span>
<span>claude Analysis · avg score: 2.1/5</span>
</h3>
<div class="response-body" id="smart-body-1" style="display:none">
<div class="rendered-content" id="smart-content-1"></div>
</div>
</div>
<script>
var smartData_1 = "# Stop Sequence Test Report \u2014 Qwen3.5-35B-A3B-4bit\n\n**AFM Version:** v0.9.5-0cfba17 | **Date:** 2026-02-25 | **Model:** mlx-community/Qwen3.5-35B-A3B-4bit\n\n## 1. Broken Models\n\n### CLI `--stop` flag not implemented (4 failures)\n\n| Variant | Error |\n|---------|-------|\n| stop-cli-only | `Unknown option '--stop'. Did you mean '--top-p'?` |\n| stop-cli-multi | Same |\n| stop-cli-api-merge | Same |\n| stop-cli-api-dedup | Same |\n\n**Verdict: AFM bug.** The `--stop` CLI flag does not exist. Stop sequences only work via the API `stop` field. This is a missing feature, not a model issue.\n\n### `response_format: json_object` + stop sequences (2 failures)\n\n| Variant | Error |\n|---------|-------|\n| stop-json-object-key | `Jinja.TemplateException error 1` |\n| stop-json-object-no-match | Same |\n\n**Verdict: AFM bug.** The Jinja template rendering crashes when `response_format: json_object` is used. This is independent of stop sequences \u2014 the stop field just happens to be present. The template error likely occurs because the `json_object` response format injects a system prompt that the Qwen3.5 chat template can't handle.\n\n## 2. Anomalies & Red Flags\n\n### Critical: Stop sequences match inside `<think>` reasoning, not just visible content\n\nNearly every API stop-sequence test stopped generation inside the model's `<think>` block, producing **empty `content`** with only `reasoning_content`. The stop sequences are being applied to the raw token stream (including think tags), not just the user-visible output.\n\nExamples:\n- **stop-api-newline** (`stop: [\"\\n\"]`): Stopped after just \"Thinking Process:\" \u2014 6 tokens, empty content\n- **stop-api-double-newline** (`stop: [\"\\n\\n\"]`): Same \u2014 6 tokens, empty content\n- **stop-api-period** (`stop: [\".\"]`): Stopped at \"Thinking Process:\\n\\n1\" \u2014 6 tokens\n- **stop-immediate** (`stop: [\"The\",\"I\",\"A\"]`): Stopped at \"Thinking Process:\\n\\n1. **\" \u2014 8 tokens\n- **stop-special-chars** (`stop: [\"**\"]`): Stopped at \"Thinking Process:\\n\\n1.\" \u2014 7 tokens\n- **stop-unicode** (`stop: [\"\u2022\"]`): 38 tokens, still only reasoning\n\n**This is likely an AFM bug.** Stop sequences should arguably only apply to the visible `content` stream, not to reasoning tokens inside `<think>` tags. For thinking models, the stop sequence implementation needs to be aware of the think/content boundary.\n\n### Thinking-Budget Exhaustion Pattern\n\nSeveral tests show the model spending its entire (effective) budget on reasoning with no visible content. This is **not** budget exhaustion in the max_tokens sense \u2014 it's stop sequences cutting off generation while still in the reasoning phase.\n\n| Variant | Tokens | Content | Stopped By |\n|---------|--------|---------|------------|\n| stop-api-single | 90 | empty | \"3.\" in reasoning |\n| stop-api-multi | 98 | empty | \"4.\" in reasoning |\n| stop-streaming | 161 | empty | \"3.\" in reasoning |\n| stop-non-streaming | 161 | empty | \"3.\" in reasoning |\n| stop-long-phrase | 44 | empty | \"In conclusion\" in reasoning |\n| stop-multi-word | 111 | empty | \"Step 3\" in reasoning |\n| stop-code-fence | 122 | empty | \"```\" in reasoning |\n| stop-html-tag | 107 | empty | \"</li>\" in reasoning |\n\n### Tests that worked correctly\n\n| Variant | Tokens | Content | Notes |\n|---------|--------|---------|-------|\n| stop-no-match (`XYZZY`) | 134 | \"4\" | Correct \u2014 stop string never matched, model completed |\n| stop-guided-json-no-match (`XYZZY`) | 1150 | Full response | Correct \u2014 guided JSON + non-matching stop |\n| stop-guided-json-brace (`}`) | 819 | Full response | Correct \u2014 stop at `}` but model finished before hitting it in content |\n\n## 3. Variant Comparison\n\n### Streaming vs Non-Streaming (stop-streaming vs stop-non-streaming)\nSame prompt, same stop `[\"3.\"]` \u2014 **identical behavior**: both produced 161 tokens, empty content, reasoning stopped at same point. Stop sequence handling is consistent across modes.\n\n### Seed Reproducibility (stop-seed-run1 vs stop-seed-run2)\nSame prompt, same seed 42, same stop `[\"3.\"]` \u2014 **identical**: 80 tokens each, same reasoning content. Deterministic generation confirmed.\n\n### Guided JSON variants\n- **stop-guided-json-value** (`stop: [\"Tokyo\"]`): 38 tokens, empty content \u2014 stopped in reasoning\n- **stop-guided-json-comma** (`stop: [\",\"]`): 58 tokens, empty content \u2014 stopped in reasoning\n- **stop-guided-json-no-match** (`stop: [\"XYZZY\"]`): 1150 tokens, **full response with content** \u2014 correctly unaffected\n- **stop-guided-json-brace** (`stop: [\"}\"]`): 819 tokens, **full response with content** \u2014 interestingly worked\n\nThe `}` and `XYZZY` cases produced full content because the model's reasoning didn't contain those strings, allowing it to exit `<think>` and produce visible output.\n\n### Temperature (stop-high-temp, temp=1.0)\n144 tokens, empty content, stopped in reasoning. Higher temperature didn't change the fundamental stop-in-think issue.\n\n## 4. Quality Assessment\n\n| Variant | Coherence | Relevance | Notes |\n|---------|-----------|-----------|-------|\n| stop-no-match | 5 | 5 | Clean, correct answer \"4\" |\n| stop-guided-json-no-match | 4 | 4 | Good response, extra plain-text version unnecessary |\n| stop-guided-json-brace | 5 | 5 | Clean description with hex code |\n| All others with content=empty | N/A | N/A | Cannot rate \u2014 stopped in reasoning |\n\nModels scoring below 3: None had bad quality *when they produced output*. The issue is stop sequences preventing output entirely.\n\n## 5. Performance Summary\n\n| Variant | tok/s | Tokens | Notes |\n|---------|-------|--------|-------|\n| stop-code-fence | 44.15 | 122 | |\n| stop-guided-json-no-match | 41.29 | 1150 | Best sustained throughput |\n| stop-guided-json-brace | 41.28 | 819 | |\n| stop-no-match | 39.80 | 134 | |\n| stop-api-word | 38.38 | 90 | |\n| stop-system-numbered | 35.37 | 138 | |\n| stop-streaming | 33.59 | 161 | |\n| stop-non-streaming | 32.20 | 161 | |\n| stop-multi-word | 31.92 | 111 | |\n| stop-top-p | 31.40 | 111 | |\n| stop-long-phrase | 31.28 | 44 | |\n| stop-guided-json-comma | 30.67 | 58 | |\n| stop-html-tag | 30.22 | 107 | |\n| stop-unicode | 30.02 | 38 | |\n| stop-api-multi | 28.43 | 98 | |\n| stop-high-temp | 28.32 | 144 | |\n| stop-api-single | 28.26 | 90 | |\n| stop-seed-run2 | 28.31 | 80 | |\n| stop-low-max-tokens | 26.61 | 40 | |\n| stop-system-pirate | 24.69 | 374 | |\n| stop-seed-run1 | 24.13 | 80 | |\n| stop-four-max | 22.39 | 102 | |\n| stop-immediate | 18.70 | 8 | Low token count = startup overhead |\n| stop-api-double-newline | 16.95 | 6 | |\n| stop-api-period | 15.95 | 6 | |\n| stop-special-chars | 15.74 | 7 | |\n| stop-api-newline | 12.93 | 6 | |\n\nPerformance is consistent at ~28-44 tok/s for sustained generation. Low tok/s values (12-19) correlate with very short generations (<10 tokens) where startup overhead dominates. No suspicious outliers.\n\n## 6. Recommendations\n\n### Likely AFM Bugs (Priority Order)\n\n1. **Stop sequences match inside `<think>` tags** \u2014 This is the highest-priority issue. For thinking models, stop sequences should only apply to content *after* the closing `</think>` tag. This causes 80%+ of tests to produce empty responses. The fix should be in the streaming loop where stop sequence matching occurs.\n\n2. **`--stop` CLI flag missing** \u2014 4 tests fail because the CLI doesn't accept `--stop`. Either add it as a CLI option or document that stop sequences are API-only.\n\n3. **`response_format: json_object` crashes with Jinja error** \u2014 Template rendering fails for this model when json_object format is requested. Likely needs a model-specific template fix or fallback.\n\n### Model Quality Issues\n- None. When the model produces output (stop-no-match, guided-json-no-match, guided-json-brace), quality is good.\n\n### Working Well\n- API stop sequences correctly stop generation when the stop string appears (the matching itself works)\n- Streaming and non-streaming produce identical results\n- Seed-based reproducibility works\n- Guided JSON + stop sequences coexist (when no template error)\n- Non-matching stop sequences correctly allow full generation";
var smartRendered_1 = false;
function toggleSmart_1() {
var body = document.getElementById('smart-body-1');
var icon = document.getElementById('smart-icon-1');
if (body.style.display === 'none') {
body.style.display = 'block';
icon.classList.add('open');
if (!smartRendered_1) {
smartRendered_1 = true;
var el = document.getElementById('smart-content-1');
if (typeof marked !== 'undefined') {
el.innerHTML = marked.parse(smartData_1);
} else {
el.innerHTML = '<pre>' + smartData_1.replace(/</g, '<') + '</pre>';
}
}
} else {
body.style.display = 'none';
icon.classList.remove('open');
}
}
</script>
<h2>Full Responses</h2>
<button class="expand-all" onclick="toggleAll()">Expand / Collapse All</button>
<script>var responseData = {};</script>
<div class="response-section" id="resp-0">
<h3 class="response-header" onclick="toggleResponse(0)">
<span class="toggle-icon" id="icon-0">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-code-fence</span>
<span class="response-meta">122 tokens · 44.1 tok/s</span>
</h3>
<div class="response-body" id="body-0" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-code-fence</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>```</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>2.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>2.76s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>18</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>122</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>44.1</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Write a Python function that computes factorial, inside a code block.</div>
<div class="rendered-content" id="content-0"></div>
</div>
</div>
<script>responseData[0] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThe user wants me to write a Python function that computes factorial. I should provide this inside a code block as requested.\n\nLet me write a clear, well-documented function that calculates the factorial of a number. I'll include:\n1. Input validation\n2. The factorial calculation\n3. Some example usage\n\nI'll provide both an iterative and recursive approach, or just one clear solution. Let me go with a clean iterative approach as it's more efficient for larger numbers.\n\n</details>\n\nThe user wants me to write a Python function that computes factorial. I should provide this inside a code block as requested. Let me write a clear, well-documented function that calculates the factorial of a number. I'll include: 1. Input validation 2. The factorial calculation 3. Some example usag";</script>
<div class="response-section" id="resp-1">
<h3 class="response-header" onclick="toggleResponse(1)">
<span class="toggle-icon" id="icon-1">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-no-match</span>
<span class="response-meta">1150 tokens · 41.3 tok/s</span>
</h3>
<div class="response-body" id="body-1" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-guided-json-no-match</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>XYZZY</strong></span> <span class="config-badge" style="border-color:#f0883e">afm: <strong>--guided-json '{"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"}},"required":["name","age"]}'</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>27.85s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>32</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>1150</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>41.3</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Generate a person record for Bob aged 25.</div>
<div class="rendered-content" id="content-1"></div>
</div>
</div>
<script>responseData[1] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: Generate a person record.\n * Subject: Bob.\n * Age: 25.\n * Goal: Create a structured or formatted representation of a person's data.\n\n2. **Determine the Format:**\n * There's no specific format requested (e.g., JSON, SQL, CSV, plain text).\n * Best approach: Provide a clear, common format like JSON or a simple key-value list, as these are standard for \"records.\" I'll provide a few common formats to be helpful.\n\n3. **Draft Content:**\n * Name: Bob\n * Age: 25\n * Need to add some plausible additional fields to make it a realistic \"record\" (e.g., ID, Email, Address, Date of Birth).\n * *Self-Correction:* Keep it simple but realistic. Don't invent too much sensitive data, but enough to look like a record.\n\n4. **Drafting the Record:**\n * *Option 1 (JSON):* Good for programming contexts.\n * *Option 2 (SQL/Database):* Good for database contexts.\n * *Option 3 (Plain Text):* Good for general reading.\n\n Let's go with a JSON-like structure as it's very common for \"records\" in tech contexts, but also offer a plain text version for clarity.\n\n * Name: Bob\n * Age: 25\n * ID: (Generate a dummy ID)\n * Email: (Generate a dummy email)\n * Status: Active\n\n5. **Refining the Output:**\n * Make it look professional.\n * Ensure the age is exactly 25 as requested.\n\n6. **Final Polish:**\n * Create a JSON object.\n * Add a brief explanation.\n\n *Draft:*\n ```json\n {\n \"id\": 1001,\n \"first_name\": \"Bob\",\n \"last_name\": \"Smith\",\n \"age\": 25,\n \"email\": \"bob.smith@example.com\",\n \"created_at\": \"2023-10-27\"\n }\n ```\n\n7. **Final Decision:** Provide a clear, structured JSON format as it's the most standard way to represent a \"record\" in modern contexts, but keep the data generic enough.\n\n *Wait, keep it simple.* Just a clear text block or JSON is fine. I'll provide a JSON format as it's the most versatile \"record\" format.\n\n *Revised Plan:*\n 1. State the record.\n * Use JSON format.\n * Include basic fields (ID, Name, Age, Email, Status).\n\n8. **Execution:** (Writing the response)\n * Header: Person Record\n * Content: JSON block.\n * Ensure \"Bob\" and \"25\" are prominent.\n\n *Wait, one more check:* Is there any safety concern? No, generating a dummy record for \"Bob\" is harmless.\n\n *Let's make it slightly more detailed to be useful.*\n Fields: ID, First Name, Last Name, Age, Email, Address, Created Date.\n\n *Let's write it.*\n ```json\n {\n \"id\": \"USR-2023-001\",\n \"first_name\": \"Bob\",\n \"last_name\": \"Smith\",\n \"age\": 25,\n \"email\": \"bob.smith@example.com\",\n \"status\": \"active\"\n }\n ```\n\n *Actually, let's keep it even simpler to directly answer the prompt without over-engineering.*\n Just a clear representation.\n\n *Decision:* I will provide a JSON format as it is the standard for data records.\n\n9. **Final Output Generation:** (Matches the thought process above).\n * Name: Bob\n * Age: 25\n * Add dummy fields for completeness.\n\n *Wait, I should check if there are any constraints I missed.* No.\n *Okay, proceed.*\n\n *Wait, I need to make sure I don't hallucinate specific real-world data.* Use placeholders or generic data.\n\n *Okay, ready to generate.*\n\n</details>\n\nHere is a sample person record for Bob, formatted in JSON:\n\n```json\n{\n \"id\": \"P-2023-8492\",\n \"first_name\": \"Bob\",\n \"last_name\": \"Smith\",\n \"age\": 25,\n \"email\": \"bob.smith@example.com\",\n \"status\": \"active\",\n \"created_at\": \"2023-10-27T14:30:00Z\"\n}\n```\n\n**Plain Text Version:**\n* **ID:** P-2023-8492\n* **Name:** Bob Smith\n* **Age:** 25\n* **Email:** bob.smith@example.com\n* **Status:** Active";</script>
<div class="response-section" id="resp-2">
<h3 class="response-header" onclick="toggleResponse(2)">
<span class="toggle-icon" id="icon-2">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-brace</span>
<span class="response-meta">819 tokens · 41.3 tok/s</span>
</h3>
<div class="response-body" id="body-2" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-guided-json-brace</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>}</strong></span> <span class="config-badge" style="border-color:#f0883e">afm: <strong>--guided-json '{"type":"object","properties":{"color":{"type":"string"},"hex":{"type":"string"}},"required":["color","hex"]}'</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>19.84s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>30</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>819</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>41.3</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Describe the color blue with its hex code.</div>
<div class="rendered-content" id="content-2"></div>
</div>
</div>
<script>responseData[2] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: Describe the color blue.\n * Requirement: Include its hex code.\n * Tone: Helpful, informative.\n\n2. **Identify the Color:**\n * Color: Blue.\n * Hex Code: There isn't just one \"blue.\" There are many shades (royal blue, navy, sky blue, etc.). However, the standard \"blue\" in web colors (HTML/CSS) is usually `#0000FF`. I should mention that there are variations but provide the standard one.\n\n3. **Draft the Description:**\n * *Visual/Emotional:* Cool, calm, trustworthy, vast (sky, ocean), cold, serene.\n * *Technical:* Hex code `#0000FF`.\n * *Context:* Primary color, subtractive color model (CMYK), etc.\n\n4. **Refine the Output:**\n * Make it clear and concise.\n * Provide the standard hex code.\n * Add some descriptive flair about what blue represents.\n\n5. **Drafting the Content:**\n * Start with the hex code.\n * Describe the visual appearance (pure blue).\n * Add symbolic meaning (calm, depth).\n * Mention variations (optional but helpful).\n\n6. **Final Polish:**\n * \"Blue is a primary color often associated with the sky and the ocean.\"\n * \"Its standard hex code is #0000FF.\"\n * Combine into a cohesive paragraph.\n\n7. **Final Review:** Does it meet all constraints? Yes.\n\n * *Self-Correction on Hex Code:* While `#0000FF` is the web standard \"Blue,\" sometimes people mean a generic blue. I should specify that this is the pure spectral blue used in digital design.\n\n8. **Final Output Generation:** (Matches the drafted thought process).\n * Hex: #0000FF\n * Description: Cool, deep, digital blue.\n\n *Wait, let's make it a bit more descriptive.*\n \"Blue is a cool, calming color...\"\n \"Hex Code: #0000FF\"\n\n Let's combine for a nice response.\n\n *Revised Plan:*\n 1. State the hex code clearly.\n * Describe the visual and emotional qualities.\n * Mention it's a primary color.\n\n *Draft:*\n Blue is a primary color that evokes feelings of calmness, stability, and trust. It is often associated with the sky and the sea. In digital design, the standard web color for blue is represented by the hex code **#0000FF**. This specific shade is a vivid, saturated blue that sits between cyan and violet on the color spectrum.\n\n *Wait, keep it simple as per the prompt \"Describe the color blue with its hex code.\"*\n\n *Final Version:*\n Blue is a primary color associated with the sky, the ocean, and depth. It is often perceived as cool, serene, and trustworthy.\n\n **Hex Code:** `#0000FF`\n\n This specific code represents pure, saturated blue in the RGB color model.\n\n *Looks good.*\n\n9. **Final Output Construction:** (Proceed to generate response).\n\n</details>\n\nBlue is a primary color often associated with the sky, the ocean, and depth. It is typically perceived as cool, calm, and trustworthy, making it a popular choice for branding and design.\n\n**Hex Code:** `#0000FF`\n\nThis specific code represents pure, saturated blue in the RGB color model used for screens and the web.";</script>
<div class="response-section" id="resp-3">
<h3 class="response-header" onclick="toggleResponse(3)">
<span class="toggle-icon" id="icon-3">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-no-match</span>
<span class="response-meta">134 tokens · 39.8 tok/s</span>
</h3>
<div class="response-body" id="body-3" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>256</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-no-match</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>XYZZY_NEVER_MATCH</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.37s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>31</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>134</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>39.8</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> What is 2+2? Answer briefly.</div>
<div class="rendered-content" id="content-3"></div>
</div>
</div>
<script>responseData[3] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Question: \"What is 2+2?\"\n * Constraint: \"Answer briefly.\"\n\n2. **Determine the Answer:**\n * Mathematical fact: 2 + 2 = 4.\n\n3. **Formulate the Output:**\n * Keep it short and direct.\n * Draft: \"4.\" or \"The answer is 4.\"\n\n4. **Final Selection:** \"4.\" is the briefest and most accurate.\n\n5. **Output:** 4.cw\n\n</details>\n\n4";</script>
<div class="response-section" id="resp-4">
<h3 class="response-header" onclick="toggleResponse(4)">
<span class="toggle-icon" id="icon-4">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-word</span>
<span class="response-meta">90 tokens · 38.4 tok/s</span>
</h3>
<div class="response-body" id="body-4" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-api-word</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>Python</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>2.34s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>16</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>90</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>38.4</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Name 5 programming languages and briefly describe each one.</div>
<div class="rendered-content" id="content-4"></div>
</div>
</div>
<script>responseData[4] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: Name 5 programming languages.\n * Requirement: Briefly describe each one.\n * Constraint: Keep it concise and informative.\n\n2. **Select Programming Languages:**\n * I need to choose 5 popular, distinct, and widely recognized languages to provide a good overview.\n * Options:\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: Name 5 programming languages. * Requirement: Briefly describe each one. * Constraint: Keep it concise and informative. 2. **Select Programming Languages:** * I need to choose 5 popular, distinct, and widely recognized ";</script>
<div class="response-section" id="resp-5">
<h3 class="response-header" onclick="toggleResponse(5)">
<span class="toggle-icon" id="icon-5">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-system-numbered</span>
<span class="response-meta">138 tokens · 35.4 tok/s</span>
</h3>
<div class="response-body" id="body-5" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-system-numbered</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>4.</strong></span> <span class="config-badge" style="border-color:#3fb950">system: <strong>Always respond with numbered lists. Never use bullet points.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.9s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>28</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>138</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>35.4</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> What are the main benefits of exercise?</div>
<div class="rendered-content" id="content-5"></div>
</div>
</div>
<script>responseData[5] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Topic: Main benefits of exercise.\n * Constraint 1: Always respond with numbered lists.\n * Constraint 2: Never use bullet points.\n\n2. **Identify the Content:**\n * I need to list the key benefits of exercising.\n * Common benefits include: physical health (weight, heart, muscles), mental health (mood, stress), longevity, sleep, energy, etc.\n\n3. **Format the Output:**\n * Must be a numbered list (1., 2., 3., ...).\n * Must NOT use bullet points (-, *, etc.).\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Topic: Main benefits of exercise. * Constraint 1: Always respond with numbered lists. * Constraint 2: Never use bullet points. 2. **Identify the Content:** * I need to list the key benefits of exercising. * Common bene";</script>
<div class="response-section" id="resp-6">
<h3 class="response-header" onclick="toggleResponse(6)">
<span class="toggle-icon" id="icon-6">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-streaming</span>
<span class="response-meta">161 tokens · 33.6 tok/s</span>
</h3>
<div class="response-body" id="body-6" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-streaming</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>4.79s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>20</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>161</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>33.6</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 planets or celestial objects, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-6"></div>
</div>
</div>
<script>responseData[6] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 planets or celestial objects.\n * Format: Numbered 1 through 10.\n * Constraint: One per line.\n\n2. **Identify Candidates:**\n * Planets in our solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune. (8 total)\n * Other celestial objects: Pluto (dwarf planet), Moon (Earth's), Sun (star), Mars' moons (Phobos, Deimos), Jupiter's moons (Io, Europa, etc.), Comets, Asteroids, Exoplanets, etc.\n * Selection: I need 10 items. I can use the 8 planets plus 2 other notable celestial objects (like the Sun and the Moon, or Pluto).\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 planets or celestial objects. * Format: Numbered 1 through 10. * Constraint: One per line. 2. **Identify Candidates:** * Planets in our solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptun";</script>
<div class="response-section" id="resp-7">
<h3 class="response-header" onclick="toggleResponse(7)">
<span class="toggle-icon" id="icon-7">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-non-streaming</span>
<span class="response-meta">161 tokens · 32.2 tok/s</span>
</h3>
<div class="response-body" id="body-7" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-non-streaming</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3.</strong></span> <span class="config-badge" style="border-color:#f0883e">afm: <strong>--no-streaming</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>2.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>5.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>20</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>161</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>32.2</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 planets or celestial objects, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-7"></div>
</div>
</div>
<script>responseData[7] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 planets or celestial objects.\n * Format: Numbered 1 through 10.\n * Constraint: One per line.\n\n2. **Identify Candidates:**\n * Planets in our solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune. (8 total)\n * Other celestial objects: Pluto (dwarf planet), Moon (Earth's), Sun (star), Mars' moons (Phobos, Deimos), Jupiter's moons (Io, Europa, etc.), Comets, Asteroids, Exoplanets, etc.\n * Selection: I need 10 items. I can use the 8 planets plus 2 other notable celestial objects (like the Sun and the Moon, or Pluto).\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 planets or celestial objects. * Format: Numbered 1 through 10. * Constraint: One per line. 2. **Identify Candidates:** * Planets in our solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptun";</script>
<div class="response-section" id="resp-8">
<h3 class="response-header" onclick="toggleResponse(8)">
<span class="toggle-icon" id="icon-8">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-multi-word</span>
<span class="response-meta">111 tokens · 31.9 tok/s</span>
</h3>
<div class="response-body" id="body-8" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-multi-word</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>Step 3</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>2.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.48s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>22</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>111</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>31.9</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Write a 5-step recipe for making tea. Label each step as "Step 1", "Step 2", etc.</div>
<div class="rendered-content" id="content-8"></div>
</div>
</div>
<script>responseData[8] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: Write a 5-step recipe for making tea.\n * Constraint: Label each step as \"Step 1\", \"Step 2\", etc.\n * Goal: Provide clear, actionable instructions for making tea.\n\n2. **Determine the Content:**\n * What are the essential steps for making a standard cup of tea (e.g., black or green tea)?\n * Step 1: Boil water.\n * Step 2: Prepare the cup/teapot.\n *\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: Write a 5-step recipe for making tea. * Constraint: Label each step as \"Step 1\", \"Step 2\", etc. * Goal: Provide clear, actionable instructions for making tea. 2. **Determine the Content:** * What are the essential step";</script>
<div class="response-section" id="resp-9">
<h3 class="response-header" onclick="toggleResponse(9)">
<span class="toggle-icon" id="icon-9">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-top-p</span>
<span class="response-meta">111 tokens · 31.4 tok/s</span>
</h3>
<div class="response-body" id="body-9" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-top-p</strong></span> <span class="config-badge" style="border-color:#58a6ff">top_p: <strong>0.9</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.54s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>14</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>111</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>31.4</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 rivers, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-9"></div>
</div>
</div>
<script>responseData[9] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 rivers.\n * Format: Numbered 1 through 10.\n * Constraint: One river per line.\n\n2. **Select Rivers:**\n * I need 10 well-known rivers to ensure accuracy and recognition.\n * Options: Nile, Amazon, Yangtze, Mississippi, Congo, Yenisei, Yellow, Ob, Parana, Ganges. (Or mix of continents).\n * Selection:\n 1. Nile\n 2. Amazon\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 rivers. * Format: Numbered 1 through 10. * Constraint: One river per line. 2. **Select Rivers:** * I need 10 well-known rivers to ensure accuracy and recognition. * Options: Nile, Amazon, Yangtze, Mississ";</script>
<div class="response-section" id="resp-10">
<h3 class="response-header" onclick="toggleResponse(10)">
<span class="toggle-icon" id="icon-10">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-long-phrase</span>
<span class="response-meta">44 tokens · 31.3 tok/s</span>
</h3>
<div class="response-body" id="body-10" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-long-phrase</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>In conclusion</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>1.41s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>25</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>44</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>31.3</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Write a 3-paragraph essay about renewable energy. Start the last paragraph with "In conclusion".</div>
<div class="rendered-content" id="content-10"></div>
</div>
</div>
<script>responseData[10] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Topic: Renewable energy.\n * Format: 3-paragraph essay.\n * Constraint: The last paragraph must start with \"\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Topic: Renewable energy. * Format: 3-paragraph essay. * Constraint: The last paragraph must start with \"";</script>
<div class="response-section" id="resp-11">
<h3 class="response-header" onclick="toggleResponse(11)">
<span class="toggle-icon" id="icon-11">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-comma</span>
<span class="response-meta">58 tokens · 30.7 tok/s</span>
</h3>
<div class="response-body" id="body-11" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-guided-json-comma</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>,</strong></span> <span class="config-badge" style="border-color:#f0883e">afm: <strong>--guided-json '{"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"},"city":{"type":"string"}},"required":["name","age","city"]}'</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>1.89s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>21</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>58</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>30.7</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Generate a person profile for someone named Alice who is 30 and lives in Paris.</div>
<div class="rendered-content" id="content-11"></div>
</div>
</div>
<script>responseData[11] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * **Task:** Generate a person profile.\n * **Name:** Alice.\n * **Age:** 30.\n * **Location:** Paris.\n * **Implicit Needs:** The profile should be realistic\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * **Task:** Generate a person profile. * **Name:** Alice. * **Age:** 30. * **Location:** Paris. * **Implicit Needs:** The profile should be realistic";</script>
<div class="response-section" id="resp-12">
<h3 class="response-header" onclick="toggleResponse(12)">
<span class="toggle-icon" id="icon-12">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-html-tag</span>
<span class="response-meta">107 tokens · 30.2 tok/s</span>
</h3>
<div class="response-body" id="body-12" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-html-tag</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong></li></strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.54s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>18</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>107</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>30.2</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Write an HTML unordered list of 5 fruits using <ul> and <li> tags.</div>
<div class="rendered-content" id="content-12"></div>
</div>
</div>
<script>responseData[12] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: Write an HTML unordered list.\n * Content: 5 fruits.\n * Tags required:<ul>` (unordered list) and<li>` (list item).\n * Quantity: 5 items.\n\n2. **Determine the Content:**\n * Need 5 fruit names.\n * Examples: Apple, Banana, Orange, Grape, Strawberry.\n\n3. **Construct the HTML:**\n * Opening tag:<ul>`\n * Items:\n * <li>Apple\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: Write an HTML unordered list. * Content: 5 fruits. * Tags required:<ul>` (unordered list) and<li>` (list item). * Quantity: 5 items. 2. **Determine the Content:** * Need 5 fruit names. * Examples: Apple, Ba";</script>
<div class="response-section" id="resp-13">
<h3 class="response-header" onclick="toggleResponse(13)">
<span class="toggle-icon" id="icon-13">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-unicode</span>
<span class="response-meta">38 tokens · 30.0 tok/s</span>
</h3>
<div class="response-body" id="body-13" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-unicode</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>•</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>1.27s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>13</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>38</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>30.0</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 5 items about space using bullet points (•).</div>
<div class="rendered-content" id="content-13"></div>
</div>
</div>
<script>responseData[13] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Topic: Space.\n * Quantity: 5 items.\n * Format: Bullet points using the symbol \"\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Topic: Space. * Quantity: 5 items. * Format: Bullet points using the symbol \"";</script>
<div class="response-section" id="resp-14">
<h3 class="response-header" onclick="toggleResponse(14)">
<span class="toggle-icon" id="icon-14">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-multi</span>
<span class="response-meta">98 tokens · 28.4 tok/s</span>
</h3>
<div class="response-body" id="body-14" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-api-multi</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>4., five</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.45s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>14</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>98</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>28.4</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 animals, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-14"></div>
</div>
</div>
<script>responseData[14] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 animals.\n * Format: Numbered 1 through 10.\n * Constraint: One animal per line.\n\n2. **Draft the Content:**\n * Need 10 distinct animals.\n * Need to number them 1 to 10.\n * Need to ensure each is on a new line.\n\n * Draft list:\n 1. Lion\n 2. Elephant\n 3. Tiger\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 animals. * Format: Numbered 1 through 10. * Constraint: One animal per line. 2. **Draft the Content:** * Need 10 distinct animals. * Need to number them 1 to 10. * Need to ensure each is on a new li";</script>
<div class="response-section" id="resp-15">
<h3 class="response-header" onclick="toggleResponse(15)">
<span class="toggle-icon" id="icon-15">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-high-temp</span>
<span class="response-meta">144 tokens · 28.3 tok/s</span>
</h3>
<div class="response-body" id="body-15" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>1.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-high-temp</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>5.08s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>16</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>144</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>28.3</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 random words, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-15"></div>
</div>
</div>
<script>responseData[15] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 random words.\n * Format: Numbered 1 through 10.\n * Structure: One word per line.\n\n2. **Generate Words:**\n * I need to choose 10 distinct, common, or interesting words.\n * Word 1: Sunlight\n * Word 2: Journey\n * Word 3: Mountain\n * Word 4: Whisper\n * Word 5: Garden\n * Word 6: Puzzle\n * Word 6: Canvas (Wait, I skipped 5) -> Word 5: Canvas\n * Word 6: Ocean\n * Word 7: Magic\n * Word 8: Clock\n * Word 9: Bread\n * Word 10: Rocket\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 random words. * Format: Numbered 1 through 10. * Structure: One word per line. 2. **Generate Words:** * I need to choose 10 distinct, common, or interesting words. * Word 1: Sunlight * Word 2: Journ";</script>
<div class="response-section" id="resp-16">
<h3 class="response-header" onclick="toggleResponse(16)">
<span class="toggle-icon" id="icon-16">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-seed-run2</span>
<span class="response-meta">80 tokens · 28.3 tok/s</span>
</h3>
<div class="response-body" id="body-16" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-seed-run2</strong></span> <span class="config-badge" style="border-color:#a371f7">seed: <strong>42</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>2.83s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>14</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>80</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>28.3</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 flowers, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-16"></div>
</div>
</div>
<script>responseData[16] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 flowers.\n * Format: Numbered 1 through 10.\n * Constraint: One per line.\n\n2. **Generate Content:**\n * I need 10 distinct flower names.\n * Examples: Rose, Tulip, Sunflower, Daisy, Lily, Orchid, Violet, Iris, Peony, Carnation.\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 flowers. * Format: Numbered 1 through 10. * Constraint: One per line. 2. **Generate Content:** * I need 10 distinct flower names. * Examples: Rose, Tulip, Sunflower, Daisy, Lily, Orchid, Violet, Iris, Peo";</script>
<div class="response-section" id="resp-17">
<h3 class="response-header" onclick="toggleResponse(17)">
<span class="toggle-icon" id="icon-17">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-single</span>
<span class="response-meta">90 tokens · 28.3 tok/s</span>
</h3>
<div class="response-body" id="body-17" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-api-single</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>7.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.19s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>14</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>90</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>28.3</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 colors, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-17"></div>
</div>
</div>
<script>responseData[17] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 colors.\n * Format: Numbered 1 through 10.\n * Constraint: One color per line.\n\n2. **Generate Content:**\n * I need 10 distinct colors.\n * Common colors: Red, Blue, Green, Yellow, Orange, Purple, Pink, Brown, Black, White.\n * Let's make them clear and distinct.\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 colors. * Format: Numbered 1 through 10. * Constraint: One color per line. 2. **Generate Content:** * I need 10 distinct colors. * Common colors: Red, Blue, Green, Yellow, Orange, Purple, Pink, Brown, Bla";</script>
<div class="response-section" id="resp-18">
<h3 class="response-header" onclick="toggleResponse(18)">
<span class="toggle-icon" id="icon-18">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-guided-json-value</span>
<span class="response-meta">38 tokens · 27.2 tok/s</span>
</h3>
<div class="response-body" id="body-18" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-guided-json-value</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>Tokyo</strong></span> <span class="config-badge" style="border-color:#f0883e">afm: <strong>--guided-json '{"type":"object","properties":{"cities":{"type":"array","items":{"type":"string"}}},"required":["cities"]}'</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#f85149">json: <strong>INVALID</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>2.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>1.4s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>16</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>38</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>27.2</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 5 major world cities as a JSON array. Include Tokyo.</div>
<div class="rendered-content" id="content-18"></div>
</div>
</div>
<script>responseData[18] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 5 major world cities.\n * Format: JSON array.\n * Constraint: Include\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 5 major world cities. * Format: JSON array. * Constraint: Include";</script>
<div class="response-section" id="resp-19">
<h3 class="response-header" onclick="toggleResponse(19)">
<span class="toggle-icon" id="icon-19">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-low-max-tokens</span>
<span class="response-meta">40 tokens · 26.6 tok/s</span>
</h3>
<div class="response-body" id="body-19" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>100</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-low-max-tokens</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>2.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>1.5s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>15</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>40</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>26.6</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 mountains, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-19"></div>
</div>
</div>
<script>responseData[19] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 mountains.\n * Format: Numbered 1 through 10.\n * Constraint: One per line.\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 mountains. * Format: Numbered 1 through 10. * Constraint: One per line.";</script>
<div class="response-section" id="resp-20">
<h3 class="response-header" onclick="toggleResponse(20)">
<span class="toggle-icon" id="icon-20">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-system-pirate</span>
<span class="response-meta">374 tokens · 24.7 tok/s</span>
</h3>
<div class="response-body" id="body-20" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-system-pirate</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>Arrr</strong></span> <span class="config-badge" style="border-color:#3fb950">system: <strong>You are a pirate. Speak like a pirate in all responses.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>2.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>15.15s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>29</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>374</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>24.7</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Tell me about treasure hunting on the high seas.</div>
<div class="rendered-content" id="content-20"></div>
</div>
</div>
<script>responseData[20] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * **User:** Asks about \"treasure hunting on the high seas.\"\n * **Constraint:** \"Speak like a pirate in all responses.\"\n * **Role:** Pirate.\n * **Task:** Provide information about treasure hunting while maintaining the pirate persona.\n\n2. **Adopt the Persona:**\n * **Tone:** Rough, adventurous, nautical, enthusiastic, slightly archaic (but understandable), using pirate slang (ahoy, matey, arrr, ship, sea, gold, doubloons, etc.).\n * **Vocabulary:** Sea, waves, compass, map, chest, gold, doubloons, kraken, mutiny, captain, crew, loot, plunder, etc.\n * **Grammar/Syntax:** Informal, maybe some dropped 'g's, direct address (\"ye\", \"ye scallywag\", \"ye landlubber\").\n * **Attitude:** Passionate about the sea, treasure, and the life of a pirate.\n\n3. **Drafting the Content:**\n * *Greeting:* Ahoy! Welcome aboard, matey!\n * *Topic:* Treasure hunting.\n * *Key Points to Cover:*\n * The thrill of the hunt.\n * Tools needed (maps, compass, shovel).\n * Dangers (storms, navy, other pirates).\n * The reward (gold, jewels, freedom).\n * Warning/Advice (don't get caught).\n * *Closing:* A pirate sign-off.\n\n4. **Refining the Voice (Iterative Process):**\n * *Draft 1:* Ahoy matey. I know about treasure. You need a map. Look for X marks the spot. Be careful of the navy.\n * *Critique 1:* Too plain. Needs more flavor.\n * *Draft 2:*\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * **User:** Asks about \"treasure hunting on the high seas.\" * **Constraint:** \"Speak like a pirate in all responses.\" * **Role:** Pirate. * **Task:** Provide information about treasure hunting while maintaining the pirate person";</script>
<div class="response-section" id="resp-21">
<h3 class="response-header" onclick="toggleResponse(21)">
<span class="toggle-icon" id="icon-21">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-seed-run1</span>
<span class="response-meta">80 tokens · 24.1 tok/s</span>
</h3>
<div class="response-body" id="body-21" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-seed-run1</strong></span> <span class="config-badge" style="border-color:#a371f7">seed: <strong>42</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>3.32s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>14</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>80</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>24.1</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 flowers, numbered 1 through 10, one per line.</div>
<div class="rendered-content" id="content-21"></div>
</div>
</div>
<script>responseData[21] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 flowers.\n * Format: Numbered 1 through 10.\n * Constraint: One per line.\n\n2. **Generate Content:**\n * I need 10 distinct flower names.\n * Examples: Rose, Tulip, Sunflower, Daisy, Lily, Orchid, Violet, Iris, Peony, Carnation.\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 flowers. * Format: Numbered 1 through 10. * Constraint: One per line. 2. **Generate Content:** * I need 10 distinct flower names. * Examples: Rose, Tulip, Sunflower, Daisy, Lily, Orchid, Violet, Iris, Peo";</script>
<div class="response-section" id="resp-22">
<h3 class="response-header" onclick="toggleResponse(22)">
<span class="toggle-icon" id="icon-22">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-four-max</span>
<span class="response-meta">102 tokens · 22.4 tok/s</span>
</h3>
<div class="response-body" id="body-22" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-four-max</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>3., three, Third, III</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>2.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>4.56s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>14</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>102</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>22.4</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 10 items, numbered 1 through 10. One per line.</div>
<div class="rendered-content" id="content-22"></div>
</div>
</div>
<script>responseData[22] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **Analyze the Request:**\n * Task: List 10 items.\n * Format: Numbered 1 through 10.\n * Constraint: One item per line.\n\n2. **Determine the Content:**\n * The prompt doesn't specify *what* kind of items (e.g., fruits, numbers, objects).\n * Decision: I'll choose common, generic items to ensure clarity and usefulness. Let's use everyday objects or concepts.\n\n</details>\n\nThinking Process: 1. **Analyze the Request:** * Task: List 10 items. * Format: Numbered 1 through 10. * Constraint: One item per line. 2. **Determine the Content:** * The prompt doesn't specify *what* kind of items (e.g., fruits, numbers, objects). * Decision: I'll ";</script>
<div class="response-section" id="resp-23">
<h3 class="response-header" onclick="toggleResponse(23)">
<span class="toggle-icon" id="icon-23">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-immediate</span>
<span class="response-meta">8 tokens · 18.7 tok/s</span>
</h3>
<div class="response-body" id="body-23" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-immediate</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>The, I, A</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>0.43s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>9</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>8</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>18.7</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> What is the capital of Japan?</div>
<div class="rendered-content" id="content-23"></div>
</div>
</div>
<script>responseData[23] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1. **\n\n</details>\n\nThinking Process: 1. **";</script>
<div class="response-section" id="resp-24">
<h3 class="response-header" onclick="toggleResponse(24)">
<span class="toggle-icon" id="icon-24">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-double-newline</span>
<span class="response-meta">6 tokens · 16.9 tok/s</span>
</h3>
<div class="response-body" id="body-24" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-api-double-newline</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>
</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>0.35s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>23</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>6</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>16.9</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Write a short paragraph about the ocean. Then write a second paragraph about mountains.</div>
<div class="rendered-content" id="content-24"></div>
</div>
</div>
<script>responseData[24] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n</details>\n\nThinking Process:";</script>
<div class="response-section" id="resp-25">
<h3 class="response-header" onclick="toggleResponse(25)">
<span class="toggle-icon" id="icon-25">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-period</span>
<span class="response-meta">6 tokens · 15.9 tok/s</span>
</h3>
<div class="response-body" id="body-25" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-api-period</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>.</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>0.38s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>12</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>6</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>15.9</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> Tell me about the sun in three sentences.</div>
<div class="rendered-content" id="content-25"></div>
</div>
</div>
<script>responseData[25] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1\n\n</details>\n\nThinking Process: 1";</script>
<div class="response-section" id="resp-26">
<h3 class="response-header" onclick="toggleResponse(26)">
<span class="toggle-icon" id="icon-26">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-special-chars</span>
<span class="response-meta">7 tokens · 15.7 tok/s</span>
</h3>
<div class="response-body" id="body-26" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-special-chars</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>**</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>1.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>0.44s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>17</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>7</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>15.7</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> List 3 facts about the moon. Use **bold** markdown for emphasis.</div>
<div class="rendered-content" id="content-26"></div>
</div>
</div>
<script>responseData[26] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n1.\n\n</details>\n\nThinking Process: 1.";</script>
<div class="response-section" id="resp-27">
<h3 class="response-header" onclick="toggleResponse(27)">
<span class="toggle-icon" id="icon-27">▶</span>
<span class="mono">mlx-community/Qwen3.5-35B-A3B-4bit @ stop-api-newline</span>
<span class="response-meta">6 tokens · 12.9 tok/s</span>
</h3>
<div class="response-body" id="body-27" style="display:none">
<div class="config-panel"><span class="config-badge" style="border-color:#d29922">temp: <strong>0.0</strong></span> <span class="config-badge" style="border-color:#58a6ff">max_tokens: <strong>4096</strong></span> <span class="config-badge" style="border-color:#a371f7">variant: <strong>stop-api-newline</strong></span> <span class="config-badge" style="border-color:#d29922">stop: <strong>
</strong></span> <span class="config-badge" style="border-color:#3fb950">finish: <strong>stop</strong></span> <span class="config-badge" style="border-color:#8b949e">load: <strong>2.0s</strong></span> <span class="config-badge" style="border-color:#8b949e">gen: <strong>0.46s</strong></span> <span class="config-badge" style="border-color:#8b949e">prompt_tok: <strong>15</strong></span> <span class="config-badge" style="border-color:#8b949e">comp_tok: <strong>6</strong></span> <span class="config-badge" style="border-color:#d29922">tok/s: <strong>12.9</strong></span></div>
<div class="prompt-box"><span class="prompt-label">PROMPT</span> What is the capital of France? Answer in one sentence.</div>
<div class="rendered-content" id="content-27"></div>
</div>
</div>
<script>responseData[27] = "<details><summary><strong>\ud83e\udde0 Reasoning</strong></summary>\n\nThinking Process:\n\n</details>\n\nThinking Process:";</script>
<script>
// Toggle individual response
function toggleResponse(idx) {
var body = document.getElementById('body-' + idx);
var icon = document.getElementById('icon-' + idx);
if (body.style.display === 'none') {
body.style.display = 'block';