-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathA2_improved_v7.sql
More file actions
828 lines (724 loc) · 31.8 KB
/
A2_improved_v7.sql
File metadata and controls
828 lines (724 loc) · 31.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
-- ============================================================
-- INDEX CONDITION PUSHDOWN (ICP) — Deep Dive
-- ISSS625 Query Processing & Optimisation
-- Dataset: Austin Animal Center Outcomes (173,775 rows)
-- ============================================================
-- KEY NUMBERS (confirmed from live testing):
-- Total rows : 173,775
-- Euthanasia rows : 10,833 (6.23%)
-- Euthanasia + age > 365 (range) : 5,413 (3.11%) → ICP triggered
-- Euthanasia + age > 730 (range) : 2,359 (1.36%) → ICP triggered
-- ============================================================
-- LIVE TEST RESULTS (confirmed):
-- Full scan: type=ALL, cost=17,614, 10-run avg=674.40s
-- ICP ON: type=range, Extra=Using index condition
-- rows=5,413, time=0.061..53.7ms, 10-run avg=74.19s
-- ICP OFF: type=range, Extra=Using where
-- rows=5,413, time=0.067..60.5ms, 10-run avg=81.46s
-- ICP ON tighter: rows=2,359, 10-run avg=21.72s
-- KEY DIFFERENCE: ICP ON filters inside B-tree (8.9% faster),
-- ICP OFF filters at server layer after row fetch.
-- ============================================================
-- SECTION MAP:
-- S1 : Setup & load data
-- S2 : Data profile (selectivity proof)
-- S3 : Techniques considered & rejected ← CRITERION 1
-- S4 : Baseline — no indexes
-- S5 : Apply indexes
-- S6 : ICP range — Using index condition ← CRITERION 2 + 3
-- S7 : Repeated runs + rows_examined ← CRITERION 3
-- S8 : Scaled tables (173k / 350k / 700k)
-- S9 : ICP benefits proof
-- S10 : ICP drawbacks proof
-- S11 : Head-to-head summary
-- S12 : Storage & write overhead
-- S13 : Schema statistics views
-- S14 : InnoDB settings verification
-- S15 : Big-O reference
-- ============================================================
USE animal_dataset;
-- ============================================================
-- SECTION 1: SETUP & LOAD DATA
-- ============================================================
DROP TABLE IF EXISTS animals;
SHOW VARIABLES LIKE 'secure_file_priv';
CREATE TABLE animals (
animal_id VARCHAR(20),
date_of_birth DATE,
name VARCHAR(100),
datetime DATETIME,
monthyear VARCHAR(20),
outcome_type VARCHAR(50),
outcome_subtype VARCHAR(50),
animal_type VARCHAR(50),
sex_upon_outcome VARCHAR(50),
age_upon_outcome VARCHAR(50),
breed VARCHAR(100),
color VARCHAR(100),
age_in_days INT
)
ENGINE = InnoDB
STATS_SAMPLE_PAGES = 25
STATS_PERSISTENT = 1
STATS_AUTO_RECALC = 1;
GRANT ALL PRIVILEGES ON *.* TO 'root'@'localhost';
-- Then refresh the permissions
FLUSH PRIVILEGES;
SET GLOBAL local_infile = 1;
LOAD DATA LOCAL INFILE 'C:/temp/animals.csv'
INTO TABLE animals
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(@animal_id, @dob, @name, @dt, @monthyear, @otype, @osub, @atype, @sex, @age, @breed, @color)
SET
animal_id = @animal_id,
date_of_birth = STR_TO_DATE(@dob, '%d/%m/%Y'),
name = NULLIF(@name, ''),
datetime = STR_TO_DATE(SUBSTRING_INDEX(@dt, 'T', 1), '%Y-%m-%d'),
monthyear = @monthyear,
outcome_type = @otype,
outcome_subtype = @osub,
animal_type = @atype,
sex_upon_outcome = @sex,
age_upon_outcome = @age,
breed = @breed,
color = @color;
SET SQL_SAFE_UPDATES = 0;
UPDATE animals
SET age_in_days =
CASE
WHEN age_upon_outcome LIKE '%year%'
THEN CAST(SUBSTRING_INDEX(age_upon_outcome, ' ', 1) AS SIGNED) * 365
WHEN age_upon_outcome LIKE '%month%'
THEN CAST(SUBSTRING_INDEX(age_upon_outcome, ' ', 1) AS SIGNED) * 30
WHEN age_upon_outcome LIKE '%week%'
THEN CAST(SUBSTRING_INDEX(age_upon_outcome, ' ', 1) AS SIGNED) * 7
WHEN age_upon_outcome LIKE '%day%'
THEN CAST(SUBSTRING_INDEX(age_upon_outcome, ' ', 1) AS SIGNED)
ELSE NULL
END;
SELECT COUNT(*) AS total_rows FROM animals;
-- ============================================================
-- SECTION 2: DATA PROFILE — understand selectivity first
-- ============================================================
-- 2a: Outcome type distribution — Euthanasia = 6.23%
SELECT
outcome_type,
COUNT(*) AS row_count,
ROUND(COUNT(*) / 173775 * 100, 2) AS pct_of_table
FROM animals
GROUP BY outcome_type
ORDER BY row_count DESC;
-- 2b: Subtype distribution within Euthanasia
SELECT
outcome_subtype,
COUNT(*) AS euth_rows,
ROUND(COUNT(*) / 173775 * 100, 4) AS pct_of_table,
ROUND(COUNT(*) / (SELECT COUNT(*) FROM animals WHERE outcome_type = 'Euthanasia') * 100, 1) AS pct_of_euth
FROM animals
WHERE outcome_type = 'Euthanasia'
GROUP BY outcome_subtype
ORDER BY euth_rows DESC;
-- 2c: age_in_days distribution — proves ICP range selectivity
SELECT
'Euthanasia total' AS segment,
COUNT(*) AS row_count,
ROUND(COUNT(*) / 173775*100, 2) AS pct_of_table
FROM animals WHERE outcome_type = 'Euthanasia'
UNION ALL
SELECT 'Euthanasia + age > 365', COUNT(*), ROUND(COUNT(*)/173775*100,2)
FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365
UNION ALL
SELECT 'Euthanasia + age > 730', COUNT(*), ROUND(COUNT(*)/173775*100,2)
FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
-- 2d: Cardinality comparison — proves why ICP composite index is viable
-- outcome_type alone: cardinality ~12 (too low for standalone use)
-- With age_in_days: cardinality ~340 (high → optimizer uses it)
-- With animal_type: cardinality ~47 (low → optimizer uses it but less effective)
SELECT
'outcome_type alone' AS index_columns,
COUNT(DISTINCT outcome_type) AS cardinality
FROM animals
UNION ALL
SELECT
'outcome_type + age_in_days',
COUNT(DISTINCT CONCAT(outcome_type, '|', age_in_days))
FROM animals
UNION ALL
SELECT
'outcome_type + animal_type',
COUNT(DISTINCT CONCAT(outcome_type, '|', animal_type))
FROM animals;
-- ============================================================
-- SECTION 3: TECHNIQUES CONSIDERED AND REJECTED
-- RUBRIC CRITERION 1 — show breadth of exploration
-- ============================================================
-- REJECTED 1: Partitioning — Euthanasia spread evenly across all years
SELECT
YEAR(datetime) AS yr,
COUNT(*) AS total_rows,
SUM(outcome_type = 'Euthanasia') AS euth_rows,
ROUND(SUM(outcome_type = 'Euthanasia') / COUNT(*) * 100, 1)
AS euth_pct_of_year
FROM animals
WHERE datetime IS NOT NULL
GROUP BY YEAR(datetime)
ORDER BY yr;
-- 4-10% every year → no partition to prune
-- REJECTED 2: Covering index — requires SELECT specific columns,
-- SELECT * makes it inapplicable
-- REJECTED 3: Composite ref (equality AND) — only useful when BOTH
-- columns use equality conditions (=). Our primary query
-- uses a range condition (age_in_days > 365), so a simple
-- composite ref lookup cannot apply; ICP is the correct
-- technique for equality + range patterns.
-- CHOSEN: Index Condition Pushdown (ICP)
-- Works on single table, no schema redesign needed.
-- ICP handles equality + range AND by pushing the range filter
-- inside B-tree leaf nodes, avoiding unnecessary server-layer filtering.
-- Produces measurable EXPLAIN differences (Using index condition
-- vs Using where) with distinct timing impact (8.9% faster confirmed).
-- ============================================================
-- SECTION 4: BASELINE — no indexes, full table scans
-- ============================================================
SHOW INDEX FROM animals;
-- Baseline: AND range query (the ICP target query)
EXPLAIN
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
EXPLAIN ANALYZE
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- Confirmed: cost=17,614, rows=173,775, time=0.057..283ms
-- 10-run avg: 674.40s
-- ============================================================
-- SECTION 5: APPLY INDEXES
-- ============================================================
-- ICP range index: outcome_type + age_in_days
-- outcome_type = equality entry, age_in_days = range condition
-- outcome_tyupe = equality entry, animal type = wild card
-- WILL trigger: Extra=Using index condition
CREATE INDEX idx_icp_age ON animals(outcome_type, age_in_days);
CREATE INDEX idx_icp_type ON animals(animal_type);
-- Keep single-column indexes for completeness / cardinality comparison
CREATE INDEX idx_outcome ON animals(outcome_type);
CREATE INDEX idx_type ON animals(animal_type);
ANALYZE TABLE animals;
SHOW INDEX FROM animals;
-- ============================================================
-- SECTION 6: ICP RANGE QUERY — genuine Using index condition
-- RUBRIC CRITERION 2 + 3
-- ============================================================
-- ICP triggers when the second index column is a RANGE condition.
-- outcome_type='Euthanasia' AND age_in_days > 365:
-- Step 1: outcome_type equality → enters B-tree at Euthanasia
-- Step 2: age_in_days > 365 checked INSIDE index leaf nodes
-- Step 3: only matching rows fetched from table
--
-- ICP ON: Extra=Using index condition, time=0.061..53.7ms
-- ICP OFF: Extra=Using where, time=0.067..60.5ms
-- Same rows (5,413) but filtering happens at different layers.
-- ICP ON is 8.9% faster (confirmed from 10-run averages).
-- ============================================================
SELECT @@optimizer_switch LIKE '%index_condition_pushdown=on%' AS icp_enabled;
-- Step 1: ICP ON — confirmed trigger
SET optimizer_switch = 'index_condition_pushdown=on';
EXPLAIN FORMAT= TRADITIONAL
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- LOOK FOR: Extra=Using index condition
EXPLAIN ANALYZE
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- Confirmed: cost=2,436, rows=5,413, time=0.061..53.7ms
-- Step 2: ICP OFF — same query, pushdown disabled
SET optimizer_switch = 'index_condition_pushdown=off';
EXPLAIN FORMAT= TRADITIONAL
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- Extra changes to: Using where
EXPLAIN ANALYZE
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- Confirmed: cost=2,436, rows=5,413, time=0.067..60.5ms
-- Same rows but filtered at server layer (slower)
-- Step 3: restore ICP
SET optimizer_switch = 'index_condition_pushdown=on';
-- Step 4: tighter range = bigger savings
EXPLAIN ANALYZE
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
-- Confirmed: cost=1,062, rows=2,359, time=0.041..10.3ms
-- Step 5: ICP savings summary (confirmed from live test)
SELECT
'Full scan (no index)' AS scenario,
173775 AS rows_examined,
17614 AS cost,
'674.40' AS ten_run_avg_sec,
'type=ALL' AS explain_extra
UNION ALL
SELECT 'ICP OFF (age > 365)',
5413, 2436, '81.46',
'Using where'
UNION ALL
SELECT 'ICP ON (age > 365)',
5413, 2436, '74.19',
'Using index condition'
UNION ALL
SELECT 'ICP ON (age > 730)',
2359, 1062, '21.72',
'Using index condition';
-- ============================================================
-- SECTION 7: REPEATED RUNS + ROWS EXAMINED
-- ============================================================
-- ============================================================
-- FIRST: Check what digest text actually looks like
-- ============================================================
SELECT
DIGEST_TEXT,
COUNT_STAR AS runs
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT LIKE '%animals%'
AND DIGEST_TEXT LIKE '%outcome_type%'
AND DIGEST_TEXT NOT LIKE '%performance_schema%'
AND DIGEST_TEXT NOT LIKE '%TRUNCATE%'
AND DIGEST_TEXT NOT LIKE '%EXPLAIN%'
AND DIGEST_TEXT NOT LIKE '%CREATE%'
AND DIGEST_TEXT NOT LIKE '%DROP%'
AND DIGEST_TEXT NOT LIKE '%ANALYZE%'
ORDER BY COUNT_STAR DESC
LIMIT 10;
-- ============================================================
-- 7a: SCENARIO 1 — ICP ON (range AND, age > 365), 10 runs
-- ============================================================
TRUNCATE performance_schema.events_statements_summary_by_digest;
SET optimizer_switch = 'index_condition_pushdown=on';
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT
'ICP ON (range AND, age > 365)' AS scenario,
COUNT_STAR AS runs,
ROUND(AVG_TIMER_WAIT / 1e9, 4) AS avg_sec,
ROUND(MIN_TIMER_WAIT / 1e9, 4) AS min_sec,
ROUND(MAX_TIMER_WAIT / 1e9, 4) AS max_sec,
ROUND(SUM_ROWS_EXAMINED / COUNT_STAR, 0) AS avg_rows_examined,
ROUND(SUM_ROWS_SENT / COUNT_STAR, 0) AS avg_rows_returned
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT LIKE '%age_in_days%'
AND DIGEST_TEXT LIKE 'SELECT%'
AND COUNT_STAR >= 10
LIMIT 1;
-- ============================================================
-- 7b: SCENARIO 2 — ICP OFF (range AND, age > 365), 10 runs
-- ============================================================
TRUNCATE performance_schema.events_statements_summary_by_digest;
SET optimizer_switch = 'index_condition_pushdown=off';
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT
'ICP OFF (range AND, age > 365)' AS scenario,
COUNT_STAR AS runs,
ROUND(AVG_TIMER_WAIT / 1e9, 4) AS avg_sec,
ROUND(MIN_TIMER_WAIT / 1e9, 4) AS min_sec,
ROUND(MAX_TIMER_WAIT / 1e9, 4) AS max_sec,
ROUND(SUM_ROWS_EXAMINED / COUNT_STAR, 0) AS avg_rows_examined,
ROUND(SUM_ROWS_SENT / COUNT_STAR, 0) AS avg_rows_returned
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT LIKE '%age_in_days%'
AND DIGEST_TEXT LIKE 'SELECT%'
AND COUNT_STAR >= 10
LIMIT 1;
-- Restore ICP
SET optimizer_switch = 'index_condition_pushdown=on';
-- ============================================================
-- 7c: SCENARIO 3 — ICP ON tighter range (age > 730), 10 runs
-- ============================================================
TRUNCATE performance_schema.events_statements_summary_by_digest;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 730;
SELECT
'ICP ON (tighter, age > 730)' AS scenario,
COUNT_STAR AS runs,
ROUND(AVG_TIMER_WAIT / 1e9, 4) AS avg_sec,
ROUND(MIN_TIMER_WAIT / 1e9, 4) AS min_sec,
ROUND(MAX_TIMER_WAIT / 1e9, 4) AS max_sec,
ROUND(SUM_ROWS_EXAMINED / COUNT_STAR, 0) AS avg_rows_examined,
ROUND(SUM_ROWS_SENT / COUNT_STAR, 0) AS avg_rows_returned
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT LIKE '%age_in_days%'
AND DIGEST_TEXT LIKE 'SELECT%'
AND COUNT_STAR >= 10
LIMIT 1;
-- ============================================================
-- 7d: SCENARIO 4 — Full scan baseline (no index), 10 runs
-- ============================================================
DROP INDEX idx_icp_age ON animals;
DROP INDEX idx_outcome ON animals;
DROP INDEX idx_type ON animals;
TRUNCATE performance_schema.events_statements_summary_by_digest;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT
'Full scan — no index' AS scenario,
COUNT_STAR AS runs,
ROUND(AVG_TIMER_WAIT / 1e9, 4) AS avg_sec,
ROUND(MIN_TIMER_WAIT / 1e9, 4) AS min_sec,
ROUND(MAX_TIMER_WAIT / 1e9, 4) AS max_sec,
ROUND(SUM_ROWS_EXAMINED / COUNT_STAR, 0) AS avg_rows_examined,
ROUND(SUM_ROWS_SENT / COUNT_STAR, 0) AS avg_rows_returned
FROM performance_schema.events_statements_summary_by_digest
WHERE DIGEST_TEXT LIKE '%age_in_days%'
AND DIGEST_TEXT LIKE 'SELECT%'
AND COUNT_STAR >= 10
LIMIT 1;
-- Recreate indexes
CREATE INDEX idx_icp_age ON animals(outcome_type, age_in_days);
CREATE INDEX idx_icp_age ON animals(outcome_type, animal_type);
CREATE INDEX idx_outcome ON animals(outcome_type);
CREATE INDEX idx_type ON animals(animal_type);
ANALYZE TABLE animals;
-- ============================================================
-- SECTION 8: SCALED TABLES — 173k / 350k / 700k
-- ============================================================
DROP TABLE IF EXISTS animals_350k;
DROP TABLE IF EXISTS animals_700k;
CREATE TABLE animals_350k
ENGINE = InnoDB STATS_SAMPLE_PAGES=25 STATS_PERSISTENT=1 STATS_AUTO_RECALC=1
AS SELECT * FROM animals;
INSERT INTO animals_350k SELECT * FROM animals;
SELECT COUNT(*) AS rows_350k FROM animals_350k;
CREATE TABLE animals_700k
ENGINE = InnoDB STATS_SAMPLE_PAGES=25 STATS_PERSISTENT=1 STATS_AUTO_RECALC=1
AS SELECT * FROM animals_350k;
INSERT INTO animals_700k SELECT * FROM animals_350k;
SELECT COUNT(*) AS rows_700k FROM animals_700k;
-- Full scan baselines
EXPLAIN ANALYZE SELECT * FROM animals_350k
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
EXPLAIN ANALYZE SELECT * FROM animals_700k
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- Apply indexes
CREATE INDEX idx_icp_age ON animals_350k(outcome_type, age_in_days);
CREATE INDEX idx_icp_age ON animals_700k(outcome_type, age_in_days);
ANALYZE TABLE animals_350k;
ANALYZE TABLE animals_700k;
-- Optimised runs
EXPLAIN ANALYZE SELECT * FROM animals_350k
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
EXPLAIN ANALYZE SELECT * FROM animals_700k
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- ============================================================
-- SECTION 9: ICP BENEFITS PROOF
-- ============================================================
-- BENEFIT 1: Engine-Level Filtering (8.9% faster confirmed)
-- ICP ON and OFF return the same 5,413 rows, but ICP ON filters
-- inside the B-tree (Using index condition) while ICP OFF filters
-- at the server layer (Using where). 10-run avg: 74.19s vs 81.46s.
SELECT
'ICP OFF — filter at server layer' AS scenario,
5413 AS rows_returned,
81.46 AS ten_run_avg_sec,
'Using where' AS explain_extra
UNION ALL
SELECT
'ICP ON — filter inside B-tree',
5413,
74.19,
'Using index condition';
-- BENEFIT 2: Lower Memory Usage (Buffer Pool Efficiency)
-- When ICP is ON, the storage engine evaluates the range condition
-- using index data only. Rows failing the condition are skipped
-- without loading full table pages into the buffer pool.
-- This reduces buffer pool pressure on concurrent workloads.
-- ============================================================
-- BUFFER POOL PROOF: ICP ON vs ICP OFF
-- ============================================================
-- Step 1: ICP ON — measure buffer pool pages read
SET optimizer_switch = 'index_condition_pushdown=on';
FLUSH STATUS;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT VARIABLE_NAME, VARIABLE_VALUE
FROM performance_schema.session_status
WHERE VARIABLE_NAME IN (
'Innodb_buffer_pool_read_requests',
'Innodb_buffer_pool_reads',
'Innodb_data_reads',
'Innodb_pages_read',
'Handler_read_next',
'Handler_read_rnd_next'
);
-- Screenshot → "ICP ON buffer pool stats"
-- Step 2: ICP OFF — same query, measure again
SET optimizer_switch = 'index_condition_pushdown=off';
FLUSH STATUS;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT VARIABLE_NAME, VARIABLE_VALUE
FROM performance_schema.session_status
WHERE VARIABLE_NAME IN (
'Innodb_buffer_pool_read_requests',
'Innodb_buffer_pool_reads',
'Innodb_data_reads',
'Innodb_pages_read',
'Handler_read_next',
'Handler_read_rnd_next'
);
-- Screenshot → "ICP OFF buffer pool stats"
SET optimizer_switch = 'index_condition_pushdown=on';
-- BENEFIT 3: Smarter Range Handling via Cardinality
-- Adding the range column significantly improves index selectivity.
-- 1. Create a temp table to hold our research results
CREATE TEMPORARY TABLE IF NOT EXISTS icp_research (
strategy VARCHAR(50),
cardinality INT,
buffer_requests INT,
rows_touched INT
);
TRUNCATE TABLE icp_research;
-- Reset everything
FLUSH STATUS;
-- 1. Capture 'Before' values
SET @buf_before = (SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_BUFFER_POOL_READ_REQUESTS');
SET @rows_before = (SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_ROWS_READ');
-- TEST 1: Smart ICP (Age)
FLUSH STATUS;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
INSERT INTO icp_research SELECT 'Outcome + Age (Most effective)',
(SELECT COUNT(DISTINCT CONCAT(outcome_type, age_in_days)) FROM animals),
(SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_BUFFER_POOL_READ_REQUESTS') - @buf_before AS actual_buffer_requests,
(SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_ROWS_READ') - @rows_before AS actual_rows_touched;
-- TEST 2: Broad ICP (Animal)
FLUSH STATUS;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND animal_type LIKE '%r%';
INSERT INTO icp_research SELECT 'Outcome + Animal (Too broad)',
(SELECT COUNT(DISTINCT CONCAT(outcome_type, animal_type)) FROM animals),
(SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_BUFFER_POOL_READ_REQUESTS') - @buf_before AS actual_buffer_requests,
(SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_ROWS_READ') - @rows_before AS actual_rows_touched;
-- TEST 3: Baseline (No ICP)
FLUSH STATUS;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia';
INSERT INTO icp_research SELECT 'Outcome Only (Base)',
(SELECT COUNT(DISTINCT outcome_type) FROM animals),
(SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_BUFFER_POOL_READ_REQUESTS') - @buf_before AS actual_buffer_requests,
(SELECT VARIABLE_VALUE FROM performance_schema.SESSION_STATUS WHERE VARIABLE_NAME = 'INNODB_ROWS_READ') - @rows_before AS actual_rows_touched;
-- 2. FINAL RESULT
SELECT
strategy,
cardinality,
buffer_requests,
rows_touched,
ROUND((cardinality * 1.0) / NULLIF(rows_touched, 0), 6) AS selectivity_ratio,
ROUND((buffer_requests * 1.0) / NULLIF(rows_touched, 0), 2) AS io_intensity_ratio
FROM icp_research;
-- ============================================================
-- SECTION 10: ICP DRAWBACKS PROOF
-- ============================================================
-- DRAWBACK 1: Random I/O vs Sequential I/O
-- Compare disk read patterns between full scan and ICP
-- Step 1: Full scan (no indexes)
DROP INDEX idx_icp_age ON animals;
DROP INDEX idx_outcome ON animals;
DROP INDEX idx_type ON animals;
FLUSH STATUS;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT VARIABLE_NAME, VARIABLE_VALUE
FROM performance_schema.session_status
WHERE VARIABLE_NAME IN (
'Innodb_buffer_pool_read_requests',
'Innodb_buffer_pool_reads',
'Innodb_data_reads',
'Innodb_data_read',
'Innodb_pages_read',
'Handler_read_rnd_next'
);
-- Screenshot → "Full scan I/O stats"
-- Handler_read_rnd_next is HIGH (sequential table scan)
-- Recreate indexes
CREATE INDEX idx_icp_age ON animals(outcome_type, age_in_days);
CREATE INDEX idx_outcome ON animals(outcome_type);
CREATE INDEX idx_type ON animals(animal_type);
ANALYZE TABLE animals;
-- Step 2: ICP ON
SET optimizer_switch = 'index_condition_pushdown=on';
FLUSH STATUS;
SELECT * FROM animals WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
SELECT VARIABLE_NAME, VARIABLE_VALUE
FROM performance_schema.session_status
WHERE VARIABLE_NAME IN (
'Innodb_buffer_pool_read_requests',
'Innodb_buffer_pool_reads',
'Innodb_data_reads',
'Innodb_data_read',
'Innodb_pages_read',
'Handler_read_next'
);
-- Screenshot → "ICP ON I/O stats"
-- Handler_read_next is ~5,413 (index sequential reads)
SET optimizer_switch = 'index_condition_pushdown=on';
-- DRAWBACK 2: Column Order is Critical (The "Wrong Order" Test)
-- Reversing the index column order degrades performance.
-- key_len drops, filtered % drops to 9.09%.
CREATE INDEX idx_icp_wrong ON animals(age_in_days, outcome_type);
ANALYZE TABLE animals;
EXPLAIN
SELECT * FROM animals
FORCE INDEX (idx_icp_wrong)
WHERE outcome_type = 'Euthanasia' AND age_in_days > 365;
-- Result: key_len=5 indicates outcome_type is IGNORED for navigation.
-- ICP cannot push the equality condition effectively when it's not
-- the leading column.
DROP INDEX idx_icp_wrong ON animals;
-- DRAWBACK 3: Virtual/Generated Column Limitation
-- ICP cannot handle complex non-indexed expressions.
EXPLAIN
SELECT * FROM animals
WHERE outcome_type = 'Euthanasia' AND UPPER(name) = 'BUDDY';
-- Result: Extra will show "Using where", not "Using index condition"
-- The UPPER() function cannot be evaluated at the index level.
-- ============================================================
-- SECTION 11: HEAD-TO-HEAD SUMMARY
-- ============================================================
SELECT
'Full scan (no index)' AS strategy,
17614 AS cost,
674.40 AS ten_run_avg_sec,
173775 AS rows_examined,
5413 AS rows_returned,
'O(n), n=173,775' AS big_o,
'type=ALL' AS explain_extra,
'Baseline' AS vs_full_scan,
'-' AS vs_icp_off
UNION ALL
SELECT
'ICP OFF (age > 365)',
2436,
81.46,
5413,
5413,
'O(k·log n), k=5,413',
'type=range, Using where',
'87.9% faster',
'Baseline for ICP'
UNION ALL
SELECT
'ICP ON (age > 365)',
2436,
74.19,
5413,
5413,
'O(k·log n), k=5,413',
'type=range, Using index condition',
'89.0% faster',
'8.9% faster'
UNION ALL
SELECT
'ICP ON tighter (age > 730)',
1062,
21.72,
2359,
2359,
'O(k·log n), k=2,359',
'type=range, Using index condition',
'96.8% faster',
'-';
-- ============================================================
-- SECTION 12: STORAGE AND WRITE OVERHEAD
-- ============================================================
SELECT
its.table_name,
its.n_rows AS row_count,
ROUND(its.clustered_index_size
* @@innodb_page_size / (1024*1024), 2) AS data_size_MB,
ROUND(its.sum_of_other_index_sizes
* @@innodb_page_size / (1024*1024), 2) AS index_storage_MB,
ROUND(its.sum_of_other_index_sizes * 100.0
/ NULLIF(its.clustered_index_size, 0), 1) AS index_pct_of_data
FROM mysql.innodb_table_stats AS its
WHERE its.database_name = 'animal_dataset'
AND its.table_name IN ('animals', 'animals_350k', 'animals_700k')
ORDER BY its.n_rows;
-- ============================================================
-- SECTION 13: SCHEMA STATISTICS VIEWS
-- ============================================================
SELECT * FROM sys.schema_tables_with_full_table_scans;
SELECT * FROM sys.statements_with_full_table_scans;
SELECT * FROM sys.schema_unused_indexes;
SELECT * FROM sys.schema_redundant_indexes;
-- ============================================================
-- SECTION 14: VERIFY InnoDB SETTINGS
-- ============================================================
SELECT @@innodb_stats_persistent;
SELECT @@innodb_stats_persistent_sample_pages;
SELECT @@innodb_stats_auto_recalc;
SELECT @@optimizer_switch LIKE '%index_condition_pushdown=on%' AS icp_enabled;
SELECT @@optimizer_switch LIKE '%mrr=on%' AS mrr_enabled;
-- ============================================================
-- SECTION 15: BIG-O REFERENCE
-- ============================================================
--
-- Full table scan:
-- O(n) n=173,775
-- Reads every row regardless of filter. Cost=17,614.
-- 10-run avg: 674.40s
--
-- ICP ON (range AND):
-- O(k·log n) k=5,413
-- Equality entry on outcome_type, range filter on age_in_days
-- applied INSIDE B-tree leaf nodes.
-- Extra=Using index condition (confirmed live).
-- 10-run avg: 74.19s (89% faster than full scan)
--
-- ICP OFF (range AND):
-- O(k·log n) k=5,413
-- Same composite index used, but range filter applied at the
-- MySQL server layer AFTER row fetch from table.
-- Extra=Using where
-- 10-run avg: 81.46s (8.9% slower than ICP ON)
-- Same rows examined (5,413) — the difference is WHERE
-- filtering happens, not HOW MANY rows are filtered.
--
-- ICP ON tighter range:
-- O(k·log n) k=2,359
-- Tighter range (age > 730) reduces k further.
-- 10-run avg: 21.72s (96.8% faster than full scan)
--
-- Scaling validation:
-- Full scan cost: 17,614 (173K) → 35,705 (350K) → 73,243 (700K)
-- Cost doubles when data doubles → O(n) confirmed
-- ICP cost reduction: 86.2% (173K) → 85.6% (350K) → 77.7% (700K)
-- ICP advantage holds at scale with slight erosion
--
-- ============================================================