-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcontrasts.html
911 lines (624 loc) · 84.3 KB
/
contrasts.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="icon" href="_static/favicon.ico">
<title>Patsy: Contrast Coding Systems for categorical variables - statsmodels 0.15.0 (+617)</title>
<link rel="icon" type="image/png" sizes="32x32" href="_static/icons/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="_static/icons/favicon-16x16.png">
<link rel="manifest" href="_static/icons/site.webmanifest">
<link rel="mask-icon" href="_static/icons/safari-pinned-tab.svg" color="#919191">
<meta name="msapplication-TileColor" content="#2b5797">
<meta name="msapplication-config" content="_static/icons/browserconfig.xml">
<link rel="stylesheet" href="_static/stylesheets/examples.css">
<link rel="stylesheet" href="_static/stylesheets/deprecation.css">
<meta name="theme-color" content="#4051b5">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" type="text/css" href="_static/sphinx_immaterial_theme.02cb18745d09eea51.min.css?v=ff456132" />
<link rel="stylesheet" type="text/css" href="_static/graphviz.css?v=fd3f3429" />
<link rel="stylesheet" type="text/css" href="_static/plot_directive.css" />
<script>__md_scope=new URL(".",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="" data-md-color-primary="indigo" data-md-color-accent="blue">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
</div>
<div data-md-component="announce">
</div>
<div data-md-component="outdated" hidden>
</div>
<header class="md-header" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="index.html" title="statsmodels 0.15.0 (+617)" class="md-header__button md-logo" aria-label="statsmodels 0.15.0 (+617)" data-md-component="logo">
<img src="_static/statsmodels-logo-v2-bw.svg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
statsmodels 0.15.0 (+617)
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Patsy: Contrast Coding Systems for categorical variables
</span>
</div>
</div>
</div>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
</button>
</nav>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/statsmodels/statsmodels/" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
statsmodels
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="index.html" title="statsmodels 0.15.0 (+617)" class="md-nav__button md-logo" aria-label="statsmodels 0.15.0 (+617)" data-md-component="logo">
<img src="_static/statsmodels-logo-v2-bw.svg" alt="logo">
</a>
statsmodels 0.15.0 (+617)
</label>
<div class="md-nav__source">
<a href="https://github.com/statsmodels/statsmodels/" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
statsmodels
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="install.html" class="md-nav__link">
<span title="/install.rst (reference label)" class="md-ellipsis">Installing statsmodels</span>
</a>
</li>
<li class="md-nav__item">
<a href="gettingstarted.html" class="md-nav__link">
<span title="/gettingstarted.rst (reference label)" class="md-ellipsis">Getting started</span>
</a>
</li>
<li class="md-nav__item">
<a href="user-guide.html" class="md-nav__link">
<span title="/user-guide.rst (reference label)" class="md-ellipsis">User Guide</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/index.html" class="md-nav__link">
<span title="/examples/index.rst (reference label)" class="md-ellipsis">Examples</span>
</a>
</li>
<li class="md-nav__item">
<a href="api.html" class="md-nav__link">
<span title="/api.rst (reference label)" class="md-ellipsis">API Reference</span>
</a>
</li>
<li class="md-nav__item">
<a href="about.html" class="md-nav__link">
<span title="/about.rst (reference label)" class="md-ellipsis">About statsmodels</span>
</a>
</li>
<li class="md-nav__item">
<a href="dev/index.html" class="md-nav__link">
<span title="/dev/index.rst (reference label)" class="md-ellipsis">Developer Page</span>
</a>
</li>
<li class="md-nav__item">
<a href="release/index.html" class="md-nav__link">
<span title="/release/index.rst (reference label)" class="md-ellipsis">Release Notes</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary">
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset" role="main">
<h1 id="patsy-contrast-coding-systems-for-categorical-variables">Patsy: Contrast Coding Systems for categorical variables<a class="headerlink" href="#patsy-contrast-coding-systems-for-categorical-variables" title="Link to this heading">¶</a></h1>
<div class="note admonition">
<p class="admonition-title">Note</p>
<p>This document is based on <a class="reference external" href="https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/">this excellent resource from UCLA</a>.</p>
</div>
<p>A categorical variable of K categories, or levels, usually enters a regression as a sequence of K-1 dummy variables. This amounts to a linear hypothesis on the level means. That is, each test statistic for these variables amounts to testing whether the mean for that level is statistically significantly different from the mean of the base category. This dummy coding is called Treatment coding in R parlance, and we will follow this convention. There are, however, different coding methods that amount to different sets of linear hypotheses.</p>
<p>In fact, the dummy coding is not technically a contrast coding. This is because the dummy variables add to one and are not functionally independent of the model’s intercept. On the other hand, a set of <em>contrasts</em> for a categorical variable with <cite>k</cite> levels is a set of <cite>k-1</cite> functionally independent linear combinations of the factor level means that are also independent of the sum of the dummy variables. The dummy coding is not wrong <em>per se</em>. It captures all of the coefficients, but it complicates matters when the model assumes independence of the coefficients such as in ANOVA. Linear regression models do not assume independence of the coefficients and thus dummy coding is often the only coding that is taught in this context.</p>
<p>To have a look at the contrast matrices in Patsy, we will use data from UCLA ATS. First let’s load the data.</p>
<h2 id="example-data">Example Data<a class="headerlink" href="#example-data" title="Link to this heading">¶</a></h2>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [1]: </span><span class="kn">import</span><span class="w"> </span><span class="nn">pandas</span>
<span class="gp">In [2]: </span><span class="n">url</span> <span class="o">=</span> <span class="s1">'https://stats.idre.ucla.edu/stat/data/hsb2.csv'</span>
<span class="gp">In [3]: </span><span class="n">hsb2</span> <span class="o">=</span> <span class="n">pandas</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
</code></pre></div>
</div>
<p>It will be instructive to look at the mean of the dependent variable, write, for each level of race ((1 = Hispanic, 2 = Asian, 3 = African American and 4 = Caucasian)).</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [4]: </span><span class="n">hsb2</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'race'</span><span class="p">)[</span><span class="s1">'write'</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="gh">Out[4]: </span>
<span class="go">race</span>
<span class="go">1 46.458333</span>
<span class="go">2 58.000000</span>
<span class="go">3 48.200000</span>
<span class="go">4 54.055172</span>
<span class="go">Name: write, dtype: float64</span>
</code></pre></div>
</div>
<h2 id="treatment-dummy-coding">Treatment (Dummy) Coding<a class="headerlink" href="#treatment-dummy-coding" title="Link to this heading">¶</a></h2>
<p>Dummy coding is likely the most well known coding scheme. It compares each level of the categorical variable to a base reference level. The base reference level is the value of the intercept. It is the default contrast in Patsy for unordered categorical factors. The Treatment contrast matrix for race would be</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [5]: </span><span class="kn">from</span><span class="w"> </span><span class="nn">patsy.contrasts</span><span class="w"> </span><span class="kn">import</span> <span class="n">Treatment</span>
<span class="gp">In [6]: </span><span class="n">levels</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">]</span>
<span class="gp">In [7]: </span><span class="n">contrast</span> <span class="o">=</span> <span class="n">Treatment</span><span class="p">(</span><span class="n">reference</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">code_without_intercept</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp">In [8]: </span><span class="nb">print</span><span class="p">(</span><span class="n">contrast</span><span class="o">.</span><span class="n">matrix</span><span class="p">)</span>
<span class="go">[[0. 0. 0.]</span>
<span class="go"> [1. 0. 0.]</span>
<span class="go"> [0. 1. 0.]</span>
<span class="go"> [0. 0. 1.]]</span>
</code></pre></div>
</div>
<p>Here we used <cite>reference=0</cite>, which implies that the first level, Hispanic, is the reference category against which the other level effects are measured. As mentioned above, the columns do not sum to zero and are thus not independent of the intercept. To be explicit, let’s look at how this would encode the <cite>race</cite> variable.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [9]: </span><span class="n">contrast</span><span class="o">.</span><span class="n">matrix</span><span class="p">[</span><span class="n">hsb2</span><span class="o">.</span><span class="n">race</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="p">:][:</span><span class="mi">20</span><span class="p">]</span>
<span class="gh">Out[9]: </span>
<span class="go">array([[0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 1., 0.],</span>
<span class="go"> [0., 0., 0.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 1., 0.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 1., 0.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.],</span>
<span class="go"> [0., 0., 1.]])</span>
</code></pre></div>
</div>
<p>This is a bit of a trick, as the <cite>race</cite> category conveniently maps to zero-based indices. If it does not, this conversion happens under the hood, so this will not work in general but nonetheless is a useful exercise to fix ideas. The below illustrates the output using the three contrasts above</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [10]: </span><span class="kn">from</span><span class="w"> </span><span class="nn">statsmodels.formula.api</span><span class="w"> </span><span class="kn">import</span> <span class="n">ols</span>
<span class="gp">In [11]: </span><span class="n">mod</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s2">"write ~ C(race, Treatment)"</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">hsb2</span><span class="p">)</span>
<span class="gp">In [12]: </span><span class="n">res</span> <span class="o">=</span> <span class="n">mod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="gp">In [13]: </span><span class="nb">print</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
<span class="go"> OLS Regression Results </span>
<span class="go">==============================================================================</span>
<span class="go">Dep. Variable: write R-squared: 0.107</span>
<span class="go">Model: OLS Adj. R-squared: 0.093</span>
<span class="go">Method: Least Squares F-statistic: 7.833</span>
<span class="go">Date: Wed, 19 Feb 2025 Prob (F-statistic): 5.78e-05</span>
<span class="go">Time: 12:52:08 Log-Likelihood: -721.77</span>
<span class="go">No. Observations: 200 AIC: 1452.</span>
<span class="go">Df Residuals: 196 BIC: 1465.</span>
<span class="go">Df Model: 3 </span>
<span class="go">Covariance Type: nonrobust </span>
<span class="go">===========================================================================================</span>
<span class="go"> coef std err t P>|t| [0.025 0.975]</span>
<span class="gt">-------------------------------------------------------------------------------------------</span>
<span class="n">Intercept</span> <span class="mf">46.4583</span> <span class="mf">1.842</span> <span class="mf">25.218</span> <span class="mf">0.000</span> <span class="mf">42.825</span> <span class="mf">50.091</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Treatment</span><span class="p">)[</span><span class="n">T</span><span class="mf">.2</span><span class="p">]</span> <span class="mf">11.5417</span> <span class="mf">3.286</span> <span class="mf">3.512</span> <span class="mf">0.001</span> <span class="mf">5.061</span> <span class="mf">18.022</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Treatment</span><span class="p">)[</span><span class="n">T</span><span class="mf">.3</span><span class="p">]</span> <span class="mf">1.7417</span> <span class="mf">2.732</span> <span class="mf">0.637</span> <span class="mf">0.525</span> <span class="o">-</span><span class="mf">3.647</span> <span class="mf">7.131</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Treatment</span><span class="p">)[</span><span class="n">T</span><span class="mf">.4</span><span class="p">]</span> <span class="mf">7.5968</span> <span class="mf">1.989</span> <span class="mf">3.820</span> <span class="mf">0.000</span> <span class="mf">3.675</span> <span class="mf">11.519</span>
<span class="o">==============================================================================</span>
<span class="ne">Omnibus</span>: 10.487 Durbin-Watson: 1.779
<span class="n">Prob</span><span class="p">(</span><span class="n">Omnibus</span><span class="p">):</span> <span class="mf">0.005</span> <span class="n">Jarque</span><span class="o">-</span><span class="n">Bera</span> <span class="p">(</span><span class="n">JB</span><span class="p">):</span> <span class="mf">11.031</span>
<span class="ne">Skew</span>: -0.551 Prob(JB): 0.00402
<span class="ne">Kurtosis</span>: 2.670 Cond. No. 8.25
<span class="o">==============================================================================</span>
<span class="ne">Notes</span>:
<span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">Standard</span> <span class="n">Errors</span> <span class="n">assume</span> <span class="n">that</span> <span class="n">the</span> <span class="n">covariance</span> <span class="n">matrix</span> <span class="n">of</span> <span class="n">the</span> <span class="n">errors</span> <span class="ow">is</span> <span class="n">correctly</span> <span class="n">specified</span><span class="o">.</span>
</code></pre></div>
</div>
<p>We explicitly gave the contrast for race; however, since Treatment is the default, we could have omitted this.</p>
<h2 id="simple-coding">Simple Coding<a class="headerlink" href="#simple-coding" title="Link to this heading">¶</a></h2>
<p>Like Treatment Coding, Simple Coding compares each level to a fixed reference level. However, with simple coding, the intercept is the grand mean of all the levels of the factors. See <a class="reference internal" href="#user-defined"><span class="std std-ref">User-Defined Coding</span></a> for how to implement the Simple contrast.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [14]: </span><span class="n">contrast</span> <span class="o">=</span> <span class="n">Simple</span><span class="p">()</span><span class="o">.</span><span class="n">code_without_intercept</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp">In [15]: </span><span class="nb">print</span><span class="p">(</span><span class="n">contrast</span><span class="o">.</span><span class="n">matrix</span><span class="p">)</span>
<span class="go">[[-0.25 -0.25 -0.25]</span>
<span class="go"> [ 0.75 -0.25 -0.25]</span>
<span class="go"> [-0.25 0.75 -0.25]</span>
<span class="go"> [-0.25 -0.25 0.75]]</span>
<span class="gp">In [16]: </span><span class="n">mod</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s2">"write ~ C(race, Simple)"</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">hsb2</span><span class="p">)</span>
<span class="gp">In [17]: </span><span class="n">res</span> <span class="o">=</span> <span class="n">mod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="gp">In [18]: </span><span class="nb">print</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
<span class="go"> OLS Regression Results </span>
<span class="go">==============================================================================</span>
<span class="go">Dep. Variable: write R-squared: 0.107</span>
<span class="go">Model: OLS Adj. R-squared: 0.093</span>
<span class="go">Method: Least Squares F-statistic: 7.833</span>
<span class="go">Date: Wed, 19 Feb 2025 Prob (F-statistic): 5.78e-05</span>
<span class="go">Time: 12:52:08 Log-Likelihood: -721.77</span>
<span class="go">No. Observations: 200 AIC: 1452.</span>
<span class="go">Df Residuals: 196 BIC: 1465.</span>
<span class="go">Df Model: 3 </span>
<span class="go">Covariance Type: nonrobust </span>
<span class="go">===========================================================================================</span>
<span class="go"> coef std err t P>|t| [0.025 0.975]</span>
<span class="gt">-------------------------------------------------------------------------------------------</span>
<span class="n">Intercept</span> <span class="mf">51.6784</span> <span class="mf">0.982</span> <span class="mf">52.619</span> <span class="mf">0.000</span> <span class="mf">49.741</span> <span class="mf">53.615</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Simple</span><span class="p">)[</span><span class="n">Simp</span><span class="mf">.1</span><span class="p">]</span> <span class="mf">11.5417</span> <span class="mf">3.286</span> <span class="mf">3.512</span> <span class="mf">0.001</span> <span class="mf">5.061</span> <span class="mf">18.022</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Simple</span><span class="p">)[</span><span class="n">Simp</span><span class="mf">.2</span><span class="p">]</span> <span class="mf">1.7417</span> <span class="mf">2.732</span> <span class="mf">0.637</span> <span class="mf">0.525</span> <span class="o">-</span><span class="mf">3.647</span> <span class="mf">7.131</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Simple</span><span class="p">)[</span><span class="n">Simp</span><span class="mf">.3</span><span class="p">]</span> <span class="mf">7.5968</span> <span class="mf">1.989</span> <span class="mf">3.820</span> <span class="mf">0.000</span> <span class="mf">3.675</span> <span class="mf">11.519</span>
<span class="o">==============================================================================</span>
<span class="ne">Omnibus</span>: 10.487 Durbin-Watson: 1.779
<span class="n">Prob</span><span class="p">(</span><span class="n">Omnibus</span><span class="p">):</span> <span class="mf">0.005</span> <span class="n">Jarque</span><span class="o">-</span><span class="n">Bera</span> <span class="p">(</span><span class="n">JB</span><span class="p">):</span> <span class="mf">11.031</span>
<span class="ne">Skew</span>: -0.551 Prob(JB): 0.00402
<span class="ne">Kurtosis</span>: 2.670 Cond. No. 7.03
<span class="o">==============================================================================</span>
<span class="ne">Notes</span>:
<span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">Standard</span> <span class="n">Errors</span> <span class="n">assume</span> <span class="n">that</span> <span class="n">the</span> <span class="n">covariance</span> <span class="n">matrix</span> <span class="n">of</span> <span class="n">the</span> <span class="n">errors</span> <span class="ow">is</span> <span class="n">correctly</span> <span class="n">specified</span><span class="o">.</span>
</code></pre></div>
</div>
<h2 id="sum-deviation-coding">Sum (Deviation) Coding<a class="headerlink" href="#sum-deviation-coding" title="Link to this heading">¶</a></h2>
<p>Sum coding compares the mean of the dependent variable for a given level to the overall mean of the dependent variable over all the levels. That is, it uses contrasts between each of the first k-1 levels and level k In this example, level 1 is compared to all the others, level 2 to all the others, and level 3 to all the others.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [19]: </span><span class="kn">from</span><span class="w"> </span><span class="nn">patsy.contrasts</span><span class="w"> </span><span class="kn">import</span> <span class="n">Sum</span>
<span class="gp">In [20]: </span><span class="n">contrast</span> <span class="o">=</span> <span class="n">Sum</span><span class="p">()</span><span class="o">.</span><span class="n">code_without_intercept</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp">In [21]: </span><span class="nb">print</span><span class="p">(</span><span class="n">contrast</span><span class="o">.</span><span class="n">matrix</span><span class="p">)</span>
<span class="go">[[ 1. 0. 0.]</span>
<span class="go"> [ 0. 1. 0.]</span>
<span class="go"> [ 0. 0. 1.]</span>
<span class="go"> [-1. -1. -1.]]</span>
<span class="gp">In [22]: </span><span class="n">mod</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s2">"write ~ C(race, Sum)"</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">hsb2</span><span class="p">)</span>
<span class="gp">In [23]: </span><span class="n">res</span> <span class="o">=</span> <span class="n">mod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="gp">In [24]: </span><span class="nb">print</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
<span class="go"> OLS Regression Results </span>
<span class="go">==============================================================================</span>
<span class="go">Dep. Variable: write R-squared: 0.107</span>
<span class="go">Model: OLS Adj. R-squared: 0.093</span>
<span class="go">Method: Least Squares F-statistic: 7.833</span>
<span class="go">Date: Wed, 19 Feb 2025 Prob (F-statistic): 5.78e-05</span>
<span class="go">Time: 12:52:08 Log-Likelihood: -721.77</span>
<span class="go">No. Observations: 200 AIC: 1452.</span>
<span class="go">Df Residuals: 196 BIC: 1465.</span>
<span class="go">Df Model: 3 </span>
<span class="go">Covariance Type: nonrobust </span>
<span class="go">=====================================================================================</span>
<span class="go"> coef std err t P>|t| [0.025 0.975]</span>
<span class="gt">-------------------------------------------------------------------------------------</span>
<span class="n">Intercept</span> <span class="mf">51.6784</span> <span class="mf">0.982</span> <span class="mf">52.619</span> <span class="mf">0.000</span> <span class="mf">49.741</span> <span class="mf">53.615</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Sum</span><span class="p">)[</span><span class="n">S</span><span class="mf">.1</span><span class="p">]</span> <span class="o">-</span><span class="mf">5.2200</span> <span class="mf">1.631</span> <span class="o">-</span><span class="mf">3.200</span> <span class="mf">0.002</span> <span class="o">-</span><span class="mf">8.437</span> <span class="o">-</span><span class="mf">2.003</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Sum</span><span class="p">)[</span><span class="n">S</span><span class="mf">.2</span><span class="p">]</span> <span class="mf">6.3216</span> <span class="mf">2.160</span> <span class="mf">2.926</span> <span class="mf">0.004</span> <span class="mf">2.061</span> <span class="mf">10.582</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Sum</span><span class="p">)[</span><span class="n">S</span><span class="mf">.3</span><span class="p">]</span> <span class="o">-</span><span class="mf">3.4784</span> <span class="mf">1.732</span> <span class="o">-</span><span class="mf">2.008</span> <span class="mf">0.046</span> <span class="o">-</span><span class="mf">6.895</span> <span class="o">-</span><span class="mf">0.062</span>
<span class="o">==============================================================================</span>
<span class="ne">Omnibus</span>: 10.487 Durbin-Watson: 1.779
<span class="n">Prob</span><span class="p">(</span><span class="n">Omnibus</span><span class="p">):</span> <span class="mf">0.005</span> <span class="n">Jarque</span><span class="o">-</span><span class="n">Bera</span> <span class="p">(</span><span class="n">JB</span><span class="p">):</span> <span class="mf">11.031</span>
<span class="ne">Skew</span>: -0.551 Prob(JB): 0.00402
<span class="ne">Kurtosis</span>: 2.670 Cond. No. 6.72
<span class="o">==============================================================================</span>
<span class="ne">Notes</span>:
<span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">Standard</span> <span class="n">Errors</span> <span class="n">assume</span> <span class="n">that</span> <span class="n">the</span> <span class="n">covariance</span> <span class="n">matrix</span> <span class="n">of</span> <span class="n">the</span> <span class="n">errors</span> <span class="ow">is</span> <span class="n">correctly</span> <span class="n">specified</span><span class="o">.</span>
</code></pre></div>
</div>
<p>This corresponds to a parameterization that forces all the coefficients to sum to zero. Notice that the intercept here is the grand mean where the grand mean is the mean of means of the dependent variable by each level.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [25]: </span><span class="n">hsb2</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'race'</span><span class="p">)[</span><span class="s1">'write'</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="gh">Out[25]: </span><span class="go">np.float64(51.67837643678162)</span>
</code></pre></div>
</div>
<h2 id="backward-difference-coding">Backward Difference Coding<a class="headerlink" href="#backward-difference-coding" title="Link to this heading">¶</a></h2>
<p>In backward difference coding, the mean of the dependent variable for a level is compared with the mean of the dependent variable for the prior level. This type of coding may be useful for a nominal or an ordinal variable.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [26]: </span><span class="kn">from</span><span class="w"> </span><span class="nn">patsy.contrasts</span><span class="w"> </span><span class="kn">import</span> <span class="n">Diff</span>
<span class="gp">In [27]: </span><span class="n">contrast</span> <span class="o">=</span> <span class="n">Diff</span><span class="p">()</span><span class="o">.</span><span class="n">code_without_intercept</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp">In [28]: </span><span class="nb">print</span><span class="p">(</span><span class="n">contrast</span><span class="o">.</span><span class="n">matrix</span><span class="p">)</span>
<span class="go">[[-0.75 -0.5 -0.25]</span>
<span class="go"> [ 0.25 -0.5 -0.25]</span>
<span class="go"> [ 0.25 0.5 -0.25]</span>
<span class="go"> [ 0.25 0.5 0.75]]</span>
<span class="gp">In [29]: </span><span class="n">mod</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s2">"write ~ C(race, Diff)"</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">hsb2</span><span class="p">)</span>
<span class="gp">In [30]: </span><span class="n">res</span> <span class="o">=</span> <span class="n">mod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="gp">In [31]: </span><span class="nb">print</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
<span class="go"> OLS Regression Results </span>
<span class="go">==============================================================================</span>
<span class="go">Dep. Variable: write R-squared: 0.107</span>
<span class="go">Model: OLS Adj. R-squared: 0.093</span>
<span class="go">Method: Least Squares F-statistic: 7.833</span>
<span class="go">Date: Wed, 19 Feb 2025 Prob (F-statistic): 5.78e-05</span>
<span class="go">Time: 12:52:08 Log-Likelihood: -721.77</span>
<span class="go">No. Observations: 200 AIC: 1452.</span>
<span class="go">Df Residuals: 196 BIC: 1465.</span>
<span class="go">Df Model: 3 </span>
<span class="go">Covariance Type: nonrobust </span>
<span class="go">======================================================================================</span>
<span class="go"> coef std err t P>|t| [0.025 0.975]</span>
<span class="gt">--------------------------------------------------------------------------------------</span>
<span class="n">Intercept</span> <span class="mf">51.6784</span> <span class="mf">0.982</span> <span class="mf">52.619</span> <span class="mf">0.000</span> <span class="mf">49.741</span> <span class="mf">53.615</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Diff</span><span class="p">)[</span><span class="n">D</span><span class="mf">.1</span><span class="p">]</span> <span class="mf">11.5417</span> <span class="mf">3.286</span> <span class="mf">3.512</span> <span class="mf">0.001</span> <span class="mf">5.061</span> <span class="mf">18.022</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Diff</span><span class="p">)[</span><span class="n">D</span><span class="mf">.2</span><span class="p">]</span> <span class="o">-</span><span class="mf">9.8000</span> <span class="mf">3.388</span> <span class="o">-</span><span class="mf">2.893</span> <span class="mf">0.004</span> <span class="o">-</span><span class="mf">16.481</span> <span class="o">-</span><span class="mf">3.119</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Diff</span><span class="p">)[</span><span class="n">D</span><span class="mf">.3</span><span class="p">]</span> <span class="mf">5.8552</span> <span class="mf">2.153</span> <span class="mf">2.720</span> <span class="mf">0.007</span> <span class="mf">1.610</span> <span class="mf">10.101</span>
<span class="o">==============================================================================</span>
<span class="ne">Omnibus</span>: 10.487 Durbin-Watson: 1.779
<span class="n">Prob</span><span class="p">(</span><span class="n">Omnibus</span><span class="p">):</span> <span class="mf">0.005</span> <span class="n">Jarque</span><span class="o">-</span><span class="n">Bera</span> <span class="p">(</span><span class="n">JB</span><span class="p">):</span> <span class="mf">11.031</span>
<span class="ne">Skew</span>: -0.551 Prob(JB): 0.00402
<span class="ne">Kurtosis</span>: 2.670 Cond. No. 8.30
<span class="o">==============================================================================</span>
<span class="ne">Notes</span>:
<span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">Standard</span> <span class="n">Errors</span> <span class="n">assume</span> <span class="n">that</span> <span class="n">the</span> <span class="n">covariance</span> <span class="n">matrix</span> <span class="n">of</span> <span class="n">the</span> <span class="n">errors</span> <span class="ow">is</span> <span class="n">correctly</span> <span class="n">specified</span><span class="o">.</span>
</code></pre></div>
</div>
<p>For example, here the coefficient on level 1 is the mean of <cite>write</cite> at level 2 compared with the mean at level 1. Ie.,</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [32]: </span><span class="n">res</span><span class="o">.</span><span class="n">params</span><span class="p">[</span><span class="s2">"C(race, Diff)[D.1]"</span><span class="p">]</span>
<span class="gh">Out[32]: </span><span class="go">np.float64(11.541666666666575)</span>
<span class="gp">In [33]: </span><span class="n">hsb2</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'race'</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">-</span> \
<span class="gp"> ....: </span> <span class="n">hsb2</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'race'</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="gp"> ....: </span>
<span class="gh">Out[33]: </span><span class="go">np.float64(11.541666666666664)</span>
</code></pre></div>
</div>
<h2 id="helmert-coding">Helmert Coding<a class="headerlink" href="#helmert-coding" title="Link to this heading">¶</a></h2>
<p>Our version of Helmert coding is sometimes referred to as Reverse Helmert Coding. The mean of the dependent variable for a level is compared to the mean of the dependent variable over all previous levels. Hence, the name ‘reverse’ being sometimes applied to differentiate from forward Helmert coding. This comparison does not make much sense for a nominal variable such as race, but we would use the Helmert contrast like so:</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [34]: </span><span class="kn">from</span><span class="w"> </span><span class="nn">patsy.contrasts</span><span class="w"> </span><span class="kn">import</span> <span class="n">Helmert</span>
<span class="gp">In [35]: </span><span class="n">contrast</span> <span class="o">=</span> <span class="n">Helmert</span><span class="p">()</span><span class="o">.</span><span class="n">code_without_intercept</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp">In [36]: </span><span class="nb">print</span><span class="p">(</span><span class="n">contrast</span><span class="o">.</span><span class="n">matrix</span><span class="p">)</span>
<span class="go">[[-1. -1. -1.]</span>
<span class="go"> [ 1. -1. -1.]</span>
<span class="go"> [ 0. 2. -1.]</span>
<span class="go"> [ 0. 0. 3.]]</span>
<span class="gp">In [37]: </span><span class="n">mod</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s2">"write ~ C(race, Helmert)"</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">hsb2</span><span class="p">)</span>
<span class="gp">In [38]: </span><span class="n">res</span> <span class="o">=</span> <span class="n">mod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="gp">In [39]: </span><span class="nb">print</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
<span class="go"> OLS Regression Results </span>
<span class="go">==============================================================================</span>
<span class="go">Dep. Variable: write R-squared: 0.107</span>
<span class="go">Model: OLS Adj. R-squared: 0.093</span>
<span class="go">Method: Least Squares F-statistic: 7.833</span>
<span class="go">Date: Wed, 19 Feb 2025 Prob (F-statistic): 5.78e-05</span>
<span class="go">Time: 12:52:08 Log-Likelihood: -721.77</span>
<span class="go">No. Observations: 200 AIC: 1452.</span>
<span class="go">Df Residuals: 196 BIC: 1465.</span>
<span class="go">Df Model: 3 </span>
<span class="go">Covariance Type: nonrobust </span>
<span class="go">=========================================================================================</span>
<span class="go"> coef std err t P>|t| [0.025 0.975]</span>
<span class="gt">-----------------------------------------------------------------------------------------</span>
<span class="n">Intercept</span> <span class="mf">51.6784</span> <span class="mf">0.982</span> <span class="mf">52.619</span> <span class="mf">0.000</span> <span class="mf">49.741</span> <span class="mf">53.615</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Helmert</span><span class="p">)[</span><span class="n">H</span><span class="mf">.2</span><span class="p">]</span> <span class="mf">5.7708</span> <span class="mf">1.643</span> <span class="mf">3.512</span> <span class="mf">0.001</span> <span class="mf">2.530</span> <span class="mf">9.011</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Helmert</span><span class="p">)[</span><span class="n">H</span><span class="mf">.3</span><span class="p">]</span> <span class="o">-</span><span class="mf">1.3431</span> <span class="mf">0.867</span> <span class="o">-</span><span class="mf">1.548</span> <span class="mf">0.123</span> <span class="o">-</span><span class="mf">3.054</span> <span class="mf">0.368</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Helmert</span><span class="p">)[</span><span class="n">H</span><span class="mf">.4</span><span class="p">]</span> <span class="mf">0.7923</span> <span class="mf">0.372</span> <span class="mf">2.130</span> <span class="mf">0.034</span> <span class="mf">0.059</span> <span class="mf">1.526</span>
<span class="o">==============================================================================</span>
<span class="ne">Omnibus</span>: 10.487 Durbin-Watson: 1.779
<span class="n">Prob</span><span class="p">(</span><span class="n">Omnibus</span><span class="p">):</span> <span class="mf">0.005</span> <span class="n">Jarque</span><span class="o">-</span><span class="n">Bera</span> <span class="p">(</span><span class="n">JB</span><span class="p">):</span> <span class="mf">11.031</span>
<span class="ne">Skew</span>: -0.551 Prob(JB): 0.00402
<span class="ne">Kurtosis</span>: 2.670 Cond. No. 7.26
<span class="o">==============================================================================</span>
<span class="ne">Notes</span>:
<span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">Standard</span> <span class="n">Errors</span> <span class="n">assume</span> <span class="n">that</span> <span class="n">the</span> <span class="n">covariance</span> <span class="n">matrix</span> <span class="n">of</span> <span class="n">the</span> <span class="n">errors</span> <span class="ow">is</span> <span class="n">correctly</span> <span class="n">specified</span><span class="o">.</span>
</code></pre></div>
</div>
<p>To illustrate, the comparison on level 4 is the mean of the dependent variable at the previous three levels taken from the mean at level 4</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [40]: </span><span class="n">grouped</span> <span class="o">=</span> <span class="n">hsb2</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'race'</span><span class="p">)</span>
<span class="gp">In [41]: </span><span class="n">grouped</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span> <span class="o">-</span> <span class="n">grouped</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[:</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="gh">Out[41]: </span><span class="go">np.float64(3.169061302681982)</span>
</code></pre></div>
</div>
<p>As you can see, these are only equal up to a constant. Other versions of the Helmert contrast give the actual difference in means. Regardless, the hypothesis tests are the same.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [42]: </span><span class="n">k</span> <span class="o">=</span> <span class="mi">4</span>
<span class="gp">In [43]: </span><span class="mf">1.</span><span class="o">/</span><span class="n">k</span> <span class="o">*</span> <span class="p">(</span><span class="n">grouped</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">-</span> <span class="n">grouped</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[:</span><span class="n">k</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span>
<span class="gh">Out[43]: </span><span class="go">np.float64(0.7922653256704955)</span>
<span class="gp">In [44]: </span><span class="n">k</span> <span class="o">=</span> <span class="mi">3</span>
<span class="gp">In [45]: </span><span class="mf">1.</span><span class="o">/</span><span class="n">k</span> <span class="o">*</span> <span class="p">(</span><span class="n">grouped</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">-</span> <span class="n">grouped</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s2">"write"</span><span class="p">]</span><span class="o">.</span><span class="n">loc</span><span class="p">[:</span><span class="n">k</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span>
<span class="gh">Out[45]: </span><span class="go">np.float64(-1.3430555555555561)</span>
</code></pre></div>
</div>
<h2 id="orthogonal-polynomial-coding">Orthogonal Polynomial Coding<a class="headerlink" href="#orthogonal-polynomial-coding" title="Link to this heading">¶</a></h2>
<p>The coefficients taken on by polynomial coding for <cite>k=4</cite> levels are the linear, quadratic, and cubic trends in the categorical variable. The categorical variable here is assumed to be represented by an underlying, equally spaced numeric variable. Therefore, this type of encoding is used only for ordered categorical variables with equal spacing. In general, the polynomial contrast produces polynomials of order <cite>k-1</cite>. Since <cite>race</cite> is not an ordered factor variable let’s use <cite>read</cite> as an example. First we need to create an ordered categorical from <cite>read</cite>.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [46]: </span><span class="n">_</span><span class="p">,</span> <span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">hsb2</span><span class="o">.</span><span class="n">read</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="gp">In [47]: </span><span class="k">try</span><span class="p">:</span> <span class="c1"># requires numpy main</span>
<span class="gp"> ....: </span> <span class="n">readcat</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">hsb2</span><span class="o">.</span><span class="n">read</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="gp"> ....: </span><span class="k">except</span><span class="p">:</span>
<span class="gp"> ....: </span> <span class="n">readcat</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">hsb2</span><span class="o">.</span><span class="n">read</span><span class="p">,</span> <span class="n">bins</span><span class="p">)</span>
<span class="gp"> ....: </span>
<span class="gp">In [48]: </span><span class="n">hsb2</span><span class="p">[</span><span class="s1">'readcat'</span><span class="p">]</span> <span class="o">=</span> <span class="n">readcat</span>
<span class="gp">In [49]: </span><span class="n">hsb2</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'readcat'</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()[</span><span class="s1">'write'</span><span class="p">]</span>
<span class="gh">Out[49]: </span>
<span class="go">readcat</span>
<span class="go">0 46.000000</span>
<span class="go">1 44.980392</span>
<span class="go">2 53.356436</span>
<span class="go">3 60.127660</span>
<span class="go">Name: write, dtype: float64</span>
</code></pre></div>
</div>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [50]: </span><span class="kn">from</span><span class="w"> </span><span class="nn">patsy.contrasts</span><span class="w"> </span><span class="kn">import</span> <span class="n">Poly</span>
<span class="gp">In [51]: </span><span class="n">levels</span> <span class="o">=</span> <span class="n">hsb2</span><span class="o">.</span><span class="n">readcat</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span>
<span class="gp">In [52]: </span><span class="n">contrast</span> <span class="o">=</span> <span class="n">Poly</span><span class="p">()</span><span class="o">.</span><span class="n">code_without_intercept</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp">In [53]: </span><span class="nb">print</span><span class="p">(</span><span class="n">contrast</span><span class="o">.</span><span class="n">matrix</span><span class="p">)</span>
<span class="go">[[-0.6708 0.5 -0.2236]</span>
<span class="go"> [-0.2236 -0.5 0.6708]</span>
<span class="go"> [ 0.2236 -0.5 -0.6708]</span>
<span class="go"> [ 0.6708 0.5 0.2236]]</span>
<span class="gp">In [54]: </span><span class="n">mod</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s2">"write ~ C(readcat, Poly)"</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">hsb2</span><span class="p">)</span>
<span class="gp">In [55]: </span><span class="n">res</span> <span class="o">=</span> <span class="n">mod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="gp">In [56]: </span><span class="nb">print</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
<span class="go"> OLS Regression Results </span>
<span class="go">==============================================================================</span>
<span class="go">Dep. Variable: write R-squared: 0.320</span>
<span class="go">Model: OLS Adj. R-squared: 0.309</span>
<span class="go">Method: Least Squares F-statistic: 30.73</span>
<span class="go">Date: Wed, 19 Feb 2025 Prob (F-statistic): 2.51e-16</span>
<span class="go">Time: 12:52:09 Log-Likelihood: -694.54</span>
<span class="go">No. Observations: 200 AIC: 1397.</span>
<span class="go">Df Residuals: 196 BIC: 1410.</span>
<span class="go">Df Model: 3 </span>
<span class="go">Covariance Type: nonrobust </span>
<span class="go">==============================================================================================</span>
<span class="go"> coef std err t P>|t| [0.025 0.975]</span>
<span class="gt">----------------------------------------------------------------------------------------------</span>
<span class="n">Intercept</span> <span class="mf">51.1161</span> <span class="mf">2.018</span> <span class="mf">25.324</span> <span class="mf">0.000</span> <span class="mf">47.135</span> <span class="mf">55.097</span>
<span class="n">C</span><span class="p">(</span><span class="n">readcat</span><span class="p">,</span> <span class="n">Poly</span><span class="p">)</span><span class="o">.</span><span class="n">Linear</span> <span class="mf">11.3501</span> <span class="mf">5.348</span> <span class="mf">2.122</span> <span class="mf">0.035</span> <span class="mf">0.803</span> <span class="mf">21.897</span>
<span class="n">C</span><span class="p">(</span><span class="n">readcat</span><span class="p">,</span> <span class="n">Poly</span><span class="p">)</span><span class="o">.</span><span class="n">Quadratic</span> <span class="mf">3.8954</span> <span class="mf">4.037</span> <span class="mf">0.965</span> <span class="mf">0.336</span> <span class="o">-</span><span class="mf">4.066</span> <span class="mf">11.857</span>
<span class="n">C</span><span class="p">(</span><span class="n">readcat</span><span class="p">,</span> <span class="n">Poly</span><span class="p">)</span><span class="o">.</span><span class="n">Cubic</span> <span class="o">-</span><span class="mf">2.4598</span> <span class="mf">1.998</span> <span class="o">-</span><span class="mf">1.231</span> <span class="mf">0.220</span> <span class="o">-</span><span class="mf">6.400</span> <span class="mf">1.480</span>
<span class="o">==============================================================================</span>
<span class="ne">Omnibus</span>: 9.741 Durbin-Watson: 1.699
<span class="n">Prob</span><span class="p">(</span><span class="n">Omnibus</span><span class="p">):</span> <span class="mf">0.008</span> <span class="n">Jarque</span><span class="o">-</span><span class="n">Bera</span> <span class="p">(</span><span class="n">JB</span><span class="p">):</span> <span class="mf">10.263</span>
<span class="ne">Skew</span>: -0.535 Prob(JB): 0.00591
<span class="ne">Kurtosis</span>: 2.703 Cond. No. 13.7
<span class="o">==============================================================================</span>
<span class="ne">Notes</span>:
<span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">Standard</span> <span class="n">Errors</span> <span class="n">assume</span> <span class="n">that</span> <span class="n">the</span> <span class="n">covariance</span> <span class="n">matrix</span> <span class="n">of</span> <span class="n">the</span> <span class="n">errors</span> <span class="ow">is</span> <span class="n">correctly</span> <span class="n">specified</span><span class="o">.</span>
</code></pre></div>
</div>
<p>As you can see, readcat has a significant linear effect on the dependent variable <cite>write</cite> but not a significant quadratic or cubic effect.</p>
<h2 id="user-defined-coding"><span id="user-defined"></span>User-Defined Coding<a class="headerlink" href="#user-defined-coding" title="Link to this heading">¶</a></h2>
<p>If you want to use your own coding, you must do so by writing a coding class that contains a code_with_intercept and a code_without_intercept method that return a <cite>patsy.contrast.ContrastMatrix</cite> instance.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><code><span class="gp">In [57]: </span><span class="kn">from</span><span class="w"> </span><span class="nn">patsy.contrasts</span><span class="w"> </span><span class="kn">import</span> <span class="n">ContrastMatrix</span>
<span class="gp"> ....: </span>
<span class="gp"> ....: </span><span class="k">def</span><span class="w"> </span><span class="nf">_name_levels</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">levels</span><span class="p">):</span>
<span class="gp"> ....: </span> <span class="k">return</span> <span class="p">[</span><span class="s2">"[</span><span class="si">%s%s</span><span class="s2">]"</span> <span class="o">%</span> <span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">level</span><span class="p">)</span> <span class="k">for</span> <span class="n">level</span> <span class="ow">in</span> <span class="n">levels</span><span class="p">]</span>
<span class="gp"> ....: </span>
<span class="gp">In [58]: </span><span class="k">class</span><span class="w"> </span><span class="nc">Simple</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="gp"> ....: </span> <span class="k">def</span><span class="w"> </span><span class="nf">_simple_contrast</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">levels</span><span class="p">):</span>
<span class="gp"> ....: </span> <span class="n">nlevels</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp"> ....: </span> <span class="n">contr</span> <span class="o">=</span> <span class="o">-</span><span class="mf">1.</span><span class="o">/</span><span class="n">nlevels</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">((</span><span class="n">nlevels</span><span class="p">,</span> <span class="n">nlevels</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span>
<span class="gp"> ....: </span> <span class="n">contr</span><span class="p">[</span><span class="mi">1</span><span class="p">:][</span><span class="n">np</span><span class="o">.</span><span class="n">diag_indices</span><span class="p">(</span><span class="n">nlevels</span><span class="o">-</span><span class="mi">1</span><span class="p">)]</span> <span class="o">=</span> <span class="p">(</span><span class="n">nlevels</span><span class="o">-</span><span class="mf">1.</span><span class="p">)</span><span class="o">/</span><span class="n">nlevels</span>
<span class="gp"> ....: </span> <span class="k">return</span> <span class="n">contr</span>
<span class="gp"> ....: </span>
<span class="gp"> ....: </span> <span class="k">def</span><span class="w"> </span><span class="nf">code_with_intercept</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">levels</span><span class="p">):</span>
<span class="gp"> ....: </span> <span class="n">contrast</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">column_stack</span><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">levels</span><span class="p">)),</span>
<span class="gp"> ....: </span> <span class="bp">self</span><span class="o">.</span><span class="n">_simple_contrast</span><span class="p">(</span><span class="n">levels</span><span class="p">)))</span>
<span class="gp"> ....: </span> <span class="k">return</span> <span class="n">ContrastMatrix</span><span class="p">(</span><span class="n">contrast</span><span class="p">,</span> <span class="n">_name_levels</span><span class="p">(</span><span class="s2">"Simp."</span><span class="p">,</span> <span class="n">levels</span><span class="p">))</span>
<span class="gp"> ....: </span>
<span class="gp"> ....: </span> <span class="k">def</span><span class="w"> </span><span class="nf">code_without_intercept</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">levels</span><span class="p">):</span>
<span class="gp"> ....: </span> <span class="n">contrast</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_simple_contrast</span><span class="p">(</span><span class="n">levels</span><span class="p">)</span>
<span class="gp"> ....: </span> <span class="k">return</span> <span class="n">ContrastMatrix</span><span class="p">(</span><span class="n">contrast</span><span class="p">,</span> <span class="n">_name_levels</span><span class="p">(</span><span class="s2">"Simp."</span><span class="p">,</span> <span class="n">levels</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span>
<span class="gp"> ....: </span>
<span class="go"> File <tokenize>:13</span>
<span class="go"> def code_without_intercept(self, levels):</span>
<span class="go"> ^</span>
<span class="go">IndentationError: unindent does not match any outer indentation level</span>
<span class="gp">In [60]: </span><span class="n">mod</span> <span class="o">=</span> <span class="n">ols</span><span class="p">(</span><span class="s2">"write ~ C(race, Simple)"</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">hsb2</span><span class="p">)</span>
<span class="gp"> ....: </span><span class="n">res</span> <span class="o">=</span> <span class="n">mod</span><span class="o">.</span><span class="n">fit</span><span class="p">()</span>
<span class="gp"> ....: </span><span class="nb">print</span><span class="p">(</span><span class="n">res</span><span class="o">.</span><span class="n">summary</span><span class="p">())</span>
<span class="gp"> ....: </span>
<span class="go"> OLS Regression Results </span>
<span class="go">==============================================================================</span>
<span class="go">Dep. Variable: write R-squared: 0.107</span>
<span class="go">Model: OLS Adj. R-squared: 0.093</span>
<span class="go">Method: Least Squares F-statistic: 7.833</span>
<span class="go">Date: Wed, 19 Feb 2025 Prob (F-statistic): 5.78e-05</span>
<span class="go">Time: 12:52:09 Log-Likelihood: -721.77</span>
<span class="go">No. Observations: 200 AIC: 1452.</span>
<span class="go">Df Residuals: 196 BIC: 1465.</span>
<span class="go">Df Model: 3 </span>
<span class="go">Covariance Type: nonrobust </span>
<span class="go">===========================================================================================</span>
<span class="go"> coef std err t P>|t| [0.025 0.975]</span>
<span class="gt">-------------------------------------------------------------------------------------------</span>
<span class="n">Intercept</span> <span class="mf">51.6784</span> <span class="mf">0.982</span> <span class="mf">52.619</span> <span class="mf">0.000</span> <span class="mf">49.741</span> <span class="mf">53.615</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Simple</span><span class="p">)[</span><span class="n">Simp</span><span class="mf">.1</span><span class="p">]</span> <span class="mf">11.5417</span> <span class="mf">3.286</span> <span class="mf">3.512</span> <span class="mf">0.001</span> <span class="mf">5.061</span> <span class="mf">18.022</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Simple</span><span class="p">)[</span><span class="n">Simp</span><span class="mf">.2</span><span class="p">]</span> <span class="mf">1.7417</span> <span class="mf">2.732</span> <span class="mf">0.637</span> <span class="mf">0.525</span> <span class="o">-</span><span class="mf">3.647</span> <span class="mf">7.131</span>
<span class="n">C</span><span class="p">(</span><span class="n">race</span><span class="p">,</span> <span class="n">Simple</span><span class="p">)[</span><span class="n">Simp</span><span class="mf">.3</span><span class="p">]</span> <span class="mf">7.5968</span> <span class="mf">1.989</span> <span class="mf">3.820</span> <span class="mf">0.000</span> <span class="mf">3.675</span> <span class="mf">11.519</span>
<span class="o">==============================================================================</span>
<span class="ne">Omnibus</span>: 10.487 Durbin-Watson: 1.779
<span class="n">Prob</span><span class="p">(</span><span class="n">Omnibus</span><span class="p">):</span> <span class="mf">0.005</span> <span class="n">Jarque</span><span class="o">-</span><span class="n">Bera</span> <span class="p">(</span><span class="n">JB</span><span class="p">):</span> <span class="mf">11.031</span>
<span class="ne">Skew</span>: -0.551 Prob(JB): 0.00402
<span class="ne">Kurtosis</span>: 2.670 Cond. No. 7.03
<span class="o">==============================================================================</span>
<span class="ne">Notes</span>:
<span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">Standard</span> <span class="n">Errors</span> <span class="n">assume</span> <span class="n">that</span> <span class="n">the</span> <span class="n">covariance</span> <span class="n">matrix</span> <span class="n">of</span> <span class="n">the</span> <span class="n">errors</span> <span class="ow">is</span> <span class="n">correctly</span> <span class="n">specified</span><span class="o">.</span>
</code></pre></div>
</div>
<hr>
<div class="md-source-file">
<small>
Last update:
Feb 19, 2025
</small>
</div>
</article>
</div>
</div>
</main>
<footer class="md-footer">
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-footer-copyright__highlight">
© Copyright 2009-2025, Josef Perktold, Skipper Seabold, Jonathan Taylor, statsmodels-developers.
</div>
Created using
<a href="https://www.sphinx-doc.org/" target="_blank" rel="noopener">Sphinx</a>
7.3.7.
and
<a href="https://github.com/jbms/sphinx-immaterial/" target="_blank" rel="noopener">Sphinx-Immaterial</a>
</div>
<div class="md-social">
<a href="https://github.com/statsmodels/statsmodels/" target="_blank" rel="noopener" title="Source on github.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</a>
<a href="https://pypi.org/project/statsmodels/" target="_blank" rel="noopener" title="pypi.org" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc.--><path d="M439.8 200.5c-7.7-30.9-22.3-54.2-53.4-54.2h-40.1v47.4c0 36.8-31.2 67.8-66.8 67.8H172.7c-29.2 0-53.4 25-53.4 54.3v101.8c0 29 25.2 46 53.4 54.3 33.8 9.9 66.3 11.7 106.8 0 26.9-7.8 53.4-23.5 53.4-54.3v-40.7H226.2v-13.6h160.2c31.1 0 42.6-21.7 53.4-54.2 11.2-33.5 10.7-65.7 0-108.6zM286.2 404c11.1 0 20.1 9.1 20.1 20.3 0 11.3-9 20.4-20.1 20.4-11 0-20.1-9.2-20.1-20.4.1-11.3 9.1-20.3 20.1-20.3zM167.8 248.1h106.8c29.7 0 53.4-24.5 53.4-54.3V91.9c0-29-24.4-50.7-53.4-55.6-35.8-5.9-74.7-5.6-106.8.1-45.2 8-53.4 24.7-53.4 55.6v40.7h106.9v13.6h-147c-31.1 0-58.3 18.7-66.8 54.2-9.8 40.7-10.2 66.1 0 108.6 7.6 31.6 25.7 54.2 56.8 54.2H101v-48.8c0-35.3 30.5-66.4 66.8-66.4zm-6.7-142.6c-11.1 0-20.1-9.1-20.1-20.3.1-11.3 9-20.4 20.1-20.4 11 0 20.1 9.2 20.1 20.4s-9 20.3-20.1 20.3z"/></svg>
</a>
<a href="https://doi.org/10.5281/zenodo.593847" target="_blank" rel="noopener" title="doi.org" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.2.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc.--><path d="M0 216C0 149.7 53.7 96 120 96h8c17.7 0 32 14.3 32 32s-14.3 32-32 32h-8c-30.9 0-56 25.1-56 56v8h64c35.3 0 64 28.7 64 64v64c0 35.3-28.7 64-64 64H64c-35.3 0-64-28.7-64-64V216zm256 0c0-66.3 53.7-120 120-120h8c17.7 0 32 14.3 32 32s-14.3 32-32 32h-8c-30.9 0-56 25.1-56 56v8h64c35.3 0 64 28.7 64 64v64c0 35.3-28.7 64-64 64h-64c-35.3 0-64-28.7-64-64V216z"/></svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": ".", "features": [], "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.config.lang": "en", "search.config.pipeline": "trimmer, stopWordFilter", "search.config.separator": "[\\s\\-]+", "search.placeholder": "Search", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version.title": "Select version"}, "version": {"provider": "mike", "staticVersions": null, "versionPath": "../versions-v3.json"}}</script>
<script src="_static/sphinx_immaterial_theme.f9d9eeeb247ace16c.min.js?v=8ec58cb5"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
</body>
</html>