Skip to content

Commit df07158

Browse files
committed
Work-around for Stata crash when number of fixed effects is very large: require # of FE as input, and don't represent them as linked list.
1 parent 249e41b commit df07158

File tree

4 files changed

+147
-40
lines changed

4 files changed

+147
-40
lines changed

boottest.ado

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
*! boottest 2.1.8 3 August 2018
1+
*! boottest 2.1.9 4 August 2018
22
*! Copyright (C) 2015-18 David Roodman
33

44
* This program is free software: you can redistribute it and/or modify
@@ -33,7 +33,7 @@ program define _boottest, rclass sortpreserve
3333
version 11
3434

3535
mata st_local("StataVersion", boottestStataVersion()); st_local("CodeVersion", boottestVersion())
36-
if `StataVersion' != c(stata_version) | "`CodeVersion'" < "02.01.00" {
36+
if `StataVersion' != c(stata_version) | "`CodeVersion'" < "02.01.09" {
3737
cap findfile "lboottest.mlib"
3838
while !_rc {
3939
erase "`r(fn)'"
@@ -227,7 +227,8 @@ program define _boottest, rclass sortpreserve
227227
local scoreBS = "`boottype'"=="score"
228228

229229
local FEname = cond(inlist("`cmd'","xtreg","xtivreg","xtivreg2"), "`e(ivar)'", "`e(absvar)'`e(absvars)'")
230-
230+
local NFE = cond(inlist("`cmd'","xtreg","xtivreg","xtivreg2"), e(N_g) , 0`e(k_absorb)'`e(K1)')
231+
231232
if `"`seed'"'!="" set seed `seed'
232233

233234
tempname p padj se stat df df_r hold C C0 CC0 b V keepC keepW repsname repsFeasname
@@ -322,7 +323,7 @@ program define _boottest, rclass sortpreserve
322323

323324
mata _boottestp = J(`cons',1,`k') \ order(tokens("`colnames'")', 1)[invorder(order(tokens("`Xnames_exog' `Xnames_endog'")', 1))] \ `k'+1
324325

325-
if "`FEname'"!="" & `cons' {
326+
if 0`NFE' & `cons' {
326327
mata _boottestp = _boottestp[|2\.|]
327328
local cons 0
328329
local _cons
@@ -414,7 +415,7 @@ program define _boottest, rclass sortpreserve
414415
_estimates unhold `hold'
415416
if r(k_autoCns) mat `C0' = `C0'[r(k_autoCns)+1...,1...]
416417

417-
if "`FEname'" != "" {
418+
if `NFE' {
418419
mata st_local("rc", strofreal(any(0:!=select(st_matrix("`C0'"),st_matrixcolstripe("`C0'")[,2]':=="_cons"))))
419420

420421
if `rc' {
@@ -577,7 +578,7 @@ program define _boottest, rclass sortpreserve
577578
mata boottest_stata("`stat'", "`df'", "`df_r'", "`p'", "`padj'", "`cimat'", "`plotmat'", "`peakmat'", `level', `ML', `LIML', 0`fuller', `K', `ar', `null', `scoreBS', "`weighttype'", "`ptype'", ///
578579
"`madjust'", `N_h0s', "`Xnames_exog'", "`Xnames_endog'", 0`cons', ///
579580
"`Ynames'", "`b'", "`V'", "`W'", "`ZExclnames'", "`hold'", "`scnames'", `hasrobust', "`allclustvars'", `:word count `bootcluster'', `:word count `clustvars'', ///
580-
"`FEname'", "`wtname'", "`wtype'", "`C'", "`C0'", `reps', "`repsname'", "`repsFeasname'", `small', "`svmat'", "`dist'", ///
581+
"`FEname'", 0`NFE', "`wtname'", "`wtype'", "`C'", "`C0'", `reps', "`repsname'", "`repsFeasname'", `small', "`svmat'", "`dist'", ///
581582
`gridmin', `gridmax', `gridpoints', `matsizegb')
582583
_estimates unhold `hold'
583584

@@ -693,6 +694,7 @@ program define _boottest, rclass sortpreserve
693694
end
694695

695696
* Version history
697+
* 2.1.9 Work-around for Stata crash when number of fixed effects is very large: require # of FE as input, and don't represent them as linked list.
696698
* 2.1.8 Fixed 2.1.6 crash with FE. Fixed parsing of matsizegb() option.
697699
* 2.1.6 Changed to faster computation for WCR with many clusters, but not quite WB. In multiway only applies to intersection of all clusters.
698700
* 2.1.4 Fixed CI scaling issue introduced in 2.0.5 that affects scoretest, waldtest

boottest.mata

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
*! boottest 2.1.8 3 August 2018
1+
*! boottest 2.1.9 4 August 2018
22
*! Copyright (C) 2015-18 David Roodman
33

44
* This program is free software: you can redistribute it and/or modify
@@ -20,7 +20,7 @@ mata set mataoptimize on
2020
mata set matalnum off
2121
2222
string scalar boottestStataVersion() return("`c(stata_version)'")
23-
string scalar boottestVersion() return("02.01.00")
23+
string scalar boottestVersion() return("02.01.09")
2424
2525
struct smatrix {
2626
real matrix M
@@ -68,7 +68,7 @@ class boottestModel {
6868
pointer (class AnalyticalModel scalar) scalar pM_Repl, pM
6969
struct smatrix matrix denom
7070
struct smatrix colvector Kcd, XEndstar, XExXEndstar, ZExclXEndstar, XZi, eZi, euZVR0
71-
pointer (struct structFE scalar) scalar FEs
71+
struct structFE rowvector FEs
7272
pointer (real matrix) matrix pQ
7373
pointer (struct boottest_clust scalar) scalar pBootClust
7474
@@ -386,8 +386,8 @@ void boottestModel::setID (real matrix ID, | real scalar NBootClustVar, rea
386386
this.pID = &ID; this.NBootClustVar = editmissing(NBootClustVar,1); this.NErrClust=editmissing(NErrClust,1); setdirty(1)
387387
if (cols(ID)) this.robust = 1
388388
}
389-
void boottestModel::setFEID (real matrix ID) {
390-
this.pFEID = &ID; setdirty(1)
389+
void boottestModel::setFEID(real matrix ID, real scalar NFE) {
390+
this.pFEID = &ID; this.NFE = NFE; setdirty(1)
391391
}
392392
void boottestModel::setlevel (real scalar level )
393393
this.level = level
@@ -602,7 +602,7 @@ void boottestModel::boottest() {
602602
real colvector rAll, sortID, o, _FEID
603603
real rowvector val, ClustCols
604604
real matrix RAll, L, LAll, vec, Combs, t, IDErr
605-
real scalar i, j, c, minN, sumN, _reps, g
605+
real scalar i, j, c, minN, sumN, _reps, g, i_FE
606606
pointer (real matrix) scalar _pR0, pIDAll
607607
class AnalyticalModel scalar M_WRE
608608
pragma unset vec; pragma unset val; pragma unused M_WRE
@@ -713,37 +713,36 @@ void boottestModel::boottest() {
713713
J_ClustN_NBootClust = J(Clust.N, NBootClust, 0)
714714
}
715715
716-
if (cols(*pFEID)) { // fixed effect prep
716+
if (NFE) {
717717
sortID = (*pFEID)[o = order(*pFEID, 1)]
718-
NFE = 1; FEboot = reps>0; j = Nobs; _FEID = wtFE = J(Nobs, 1, 1)
718+
i_FE = 1; FEboot = reps>0; j = Nobs; _FEID = wtFE = J(Nobs, 1, 1)
719+
FEs = structFE(NFE)
719720
for (i=Nobs-1;i;i--) {
720721
if (sortID[i] != sortID[i+1]) {
721-
NFE++
722-
next = FEs; (FEs = &(structFE()))->next = next // add new FE to linked list
723-
FEs->is = o[|i+1\j|]
722+
FEs[i_FE].is = o[|i+1\j|]
724723
if (weights) {
725-
FEs->wt = (*pwt)[FEs->is]
726-
FEs->wt = FEs->wt / colsum(FEs->wt)
724+
t = (*pwt)[FEs[i_FE].is]
725+
FEs[i_FE].wt = t / colsum(t)
727726
} else
728-
FEs->wt = J(j-i,1,1/(j-i))
729-
wtFE[FEs->is] = FEs->wt
727+
FEs[i_FE].wt = J(j-i,1,1/(j-i))
728+
wtFE[FEs[i_FE].is] = FEs[i_FE].wt
730729
j = i
731730
732731
if (FEboot & NClustVar) { // are all of this FE's obs in same bootstrapping cluster?
733-
t = (*pID)[FEs->is, 1..NBootClustVar]
732+
t = (*pID)[FEs[i_FE].is, 1..NBootClustVar]
734733
FEboot = all(t :== t[1,])
735734
}
735+
++i_FE
736736
}
737-
_FEID[o[i]] = NFE
737+
_FEID[o[i]] = i_FE
738738
}
739-
next = FEs; (FEs = &(structFE()))->next = next
740-
FEs->is = FEs->is = o[|.\j|]
739+
FEs[NFE].is = FEs[NFE].is = o[|.\j|]
741740
if (weights) {
742-
FEs->wt = (*pwt)[FEs->is]
743-
FEs->wt = FEs->wt / colsum(FEs->wt)
741+
t = (*pwt)[FEs[NFE].is]
742+
FEs[NFE].wt = t / colsum(t)
744743
} else
745-
FEs->wt = J(j-i,1,1/(j-i))
746-
wtFE[FEs->is] = FEs->wt
744+
FEs[NFE].wt = J(j-i,1,1/(j-i))
745+
wtFE[FEs[NFE].is] = FEs[NFE].wt
747746
pFEID = &_FEID // ordinal fixed effect ID
748747
if (robust & !(scoreBS | FEboot) & granular < NErrClustCombs)
749748
infoBootAll = _panelsetup(*pIDAll, 1..NBootClustVar) // info for bootstrapping clusters wrt data collapsed to intersections of all bootstrapping & error clusters
@@ -1094,9 +1093,12 @@ real scalar boottestModel::MakeNonWRENumers(real scalar thisWeightGrpStart, real
10941093
10951094
10961095
1096+
1097+
1098+
1099+
10971100
void boottestModel::MakeNonWREStats(real scalar thisWeightGrpStart, real scalar thisWeightGrpStop) {
1098-
real scalar d, i, c, j, l; real matrix eu, eueu, t; real colvector numer_l
1099-
pointer (real matrix) scalar peZVR0, pVR0
1101+
real scalar d, i, c, j, l; real matrix eu, eueu, t; real colvector numer_l; pointer (real matrix) scalar peZVR0, pVR0
11001102
11011103
if (robust) {
11021104
if (granular < NErrClustCombs & thisWeightGrpStart==1) { // if pure robust and no multi-way clustering, initialized stuff that depends on r0 but not u
@@ -1169,7 +1171,7 @@ void boottestModel::MakeNonWREStats(real scalar thisWeightGrpStart, real scalar
11691171
denom[i,j].M = colsum(u :* QQ * u)
11701172
}
11711173
else { // alternative core computational loop, avoiding computing Q'Q which has cubic time cost in numbers of bootstrapping clusters
1172-
if (granular) // prep optimized treatment when bootstrapping by small groups
1174+
if (granular) // prep optimized treatment when bootstrapping by many/small groups
11731175
if (purerobust) {
11741176
eu = *M_DGP.partialFE(&(pM->e :* u)) - *pX * betadev
11751177
eueu = eu:*eu
@@ -1332,13 +1334,12 @@ real matrix boottestModel::count_binary(real scalar N, real scalar lo, real scal
13321334
13331335
// partial fixed effects out of a data matrix
13341336
pointer(real matrix) scalar AnalyticalModel::partialFE(pointer(real matrix) scalar pIn) {
1337+
real matrix Out, t; real scalar i
13351338
if (parent->NFE & pIn!=NULL) {
1336-
real matrix Out, t; pointer (struct structFE scalar) scalar thisFE
1337-
thisFE = parent->FEs; Out = J(rows(*pIn), cols(*pIn), .)
1338-
while (thisFE != NULL) {
1339-
t = (*pIn)[thisFE->is,]
1340-
Out[thisFE->is,] = t :- cross(thisFE->wt, t)
1341-
thisFE = thisFE->next
1339+
Out = *pIn
1340+
for (i=parent->NFE;i;i--) {
1341+
t = Out[parent->FEs[i].is,]
1342+
Out[parent->FEs[i].is,] = t :- cross(parent->FEs[i].wt, t)
13421343
}
13431344
return(&Out)
13441345
}
@@ -1516,7 +1517,7 @@ void boottest_stata(string scalar statname, string scalar dfname, string scalar
15161517
real scalar K, real scalar AR, real scalar null, real scalar scoreBS, string scalar weighttype, string scalar ptype, string scalar madjtype, real scalar NumH0s,
15171518
string scalar XExnames, string scalar XEndnames, real scalar hascons, string scalar Ynames, string scalar bname, string scalar Vname, string scalar Wname,
15181519
string scalar ZExclnames, string scalar samplename, string scalar scnames, real scalar robust, string scalar IDnames, real scalar NBootClustVar, real scalar NErrClust,
1519-
string scalar FEname, string scalar wtname, string scalar wttype, string scalar Cname, string scalar C0name, real scalar reps, string scalar repsname, string scalar repsFeasname,
1520+
string scalar FEname, real scalar NFE, string scalar wtname, string scalar wttype, string scalar Cname, string scalar C0name, real scalar reps, string scalar repsname, string scalar repsFeasname,
15201521
real scalar small, string scalar diststat, string scalar distname, real scalar gridmin, real scalar gridmax, real scalar gridpoints, real scalar MaxMatSize) {
15211522
15221523
real matrix C, R, C0, R0, ZExcl, ID, FEID, sc, XEnd, XEx
@@ -1548,7 +1549,7 @@ void boottest_stata(string scalar statname, string scalar dfname, string scalar
15481549
M.setZExcl(ZExcl)
15491550
M.setwt (wt)
15501551
M.setID(ID, NBootClustVar, NErrClust)
1551-
M.setFEID(FEID)
1552+
M.setFEID(FEID, NFE)
15521553
M.setR (R , r )
15531554
M.setR0(R0, r0)
15541555
M.setnull(null)

crosstabs.do

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
cap mata mata drop CT1()
2+
cap mata mata drop CT2()
3+
4+
mata
5+
mata clear
6+
mata set matastrict on
7+
mata set mataoptimize on
8+
mata set matalnum off
9+
10+
real matrix CT1(real colvector X, real colvector ID1, real colvector ID2) {
11+
transmorphic CT, inds1, inds2, loc1, loc2; real scalar i, i1, i2, ind1, ind2; real colvector key; real matrix retval
12+
13+
CT = asarray_create("real", 2); asarray_notfound(CT, 0)
14+
inds1 = asarray_create("real", 1)
15+
inds2 = asarray_create("real", 1)
16+
17+
for (i=rows(X);i;i--) {
18+
key = (ind1 = ID1[i]), (ind2 = ID2[i])
19+
asarray(CT, key, asarray(CT, key) + X[i])
20+
asarray(inds1, ID1[i], 1) // add to catalogs of observed instances
21+
asarray(inds2, ID2[i], 1)
22+
}
23+
24+
retval = J(asarray_elements(inds1), asarray_elements(inds2), 0)
25+
i1 = 0
26+
for (loc1=asarray_first(inds1); loc1!=NULL; loc1=asarray_next(inds1, loc1)) {
27+
ind1 = asarray_key(inds1, loc1)
28+
i1++
29+
i2 = 0
30+
for (loc2=asarray_first(inds2); loc2!=NULL; loc2=asarray_next(inds2, loc2)) {
31+
ind2 = asarray_key(inds2, loc2)
32+
retval[i1,++i2] = asarray(CT, (ind1,ind2))
33+
}
34+
}
35+
return(retval)
36+
}
37+
38+
real matrix CT2(real colvector X, real colvector ID1, real colvector ID2) {
39+
class Factor scalar F
40+
F = _factor((ID1,ID2))
41+
return(rowshape(aggregate_sum(F, F.sort(X), ., ""), rows(uniqrows(F.keys[,1]))))
42+
}
43+
44+
real matrix CT3(real colvector X, real colvector ID1, real colvector ID2) {
45+
real scalar i, j, t, N, N1, N2; real matrix retval; real colvector sortID1, sortID2, sortX, _ID2, _ID2i, Xi, o1, o2; real matrix info
46+
47+
N = rows(X)
48+
49+
sortID1 = ID1[o1 = order(ID1, 1)]
50+
sortID2 = ID2[o1]
51+
sortX = X[o1]
52+
53+
N1 = rows(info = _panelsetup(sortID1, 1))
54+
55+
sortID2 = sortID2[o2 = order(sortID2, 1)]
56+
57+
N2 = 1; _ID2 = J(N, 1, 1)
58+
for (i=N-1;i;i--) {
59+
if (sortID2[i] != sortID2[i+1])
60+
N2++
61+
_ID2[o2[i]] = N2 // ordinal version of ID2 expressed with respect to ID1-sorted data
62+
}
63+
64+
retval = J(N1, N2, 0)
65+
for (i=N1;i;i--) {
66+
_ID2i = panelsubmatrix(_ID2, i, info)
67+
Xi = panelsubmatrix(sortX , i, info)
68+
for (j=rows(_ID2i);j;j--) {
69+
t = _ID2i[j]
70+
retval[i,t] = retval[i,t] + Xi[j]
71+
}
72+
}
73+
return(retval)
74+
}
75+
76+
real matrix CT4(real colvector X, real colvector ID1, real colvector ID2) {
77+
real colvector ID, o; real matrix info
78+
ID = ID1, ID2
79+
ID = ID[o = order(ID, (1,2)),]
80+
info = _panelsetup(ID, (1,2))
81+
return(1)
82+
// return(_panelsum(X[o], _panelsetup(ID, (1,2))))
83+
}
84+
85+
86+
rseed(9821374)
87+
N = 1000000
88+
N1 = 5
89+
N2 = 5
90+
X = runiform(N,1)
91+
ID1 = rdiscrete(N,1,J(N1,1,1/N1)) :+ 5
92+
ID2 = rdiscrete(N,1,J(N2,1,1/N2)) :+ 10
93+
//CT1(X,ID1,ID2)
94+
CT2(X,ID1,ID2)
95+
CT3(X,ID1,ID2)
96+
CT4(X,ID1,ID2)
97+
98+
timer_clear()
99+
//timer_on(1);(void) CT1(X,ID1,ID2);timer_off(1)
100+
timer_on(2);for(i=1;i;i--) (void) CT2(X,ID1,ID2);timer_off(2)
101+
timer_on(3);for(i=1;i;i--) (void) CT3(X,ID1,ID2);timer_off(3)
102+
timer_on(4);for(i=1;i;i--) (void) CT4(X,ID1,ID2);timer_off(4)
103+
timer()
104+
end

lboottest.mlib

-136 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)