say more about "BY" -- fixing PR#17136

maechler · maechler · commit a581735087d7 · 2026-01-21T10:58:05.000Z
git-svn-id: https://svn.r-project.org/R/trunk@89312 00db46b3-68df-0310-9c12-caf00c1e9a41
diff --git a/src/library/stats/man/p.adjust.Rd b/src/library/stats/man/p.adjust.Rd
@@ -1,6 +1,6 @@
 % File src/library/stats/man/p.adjust.Rd
 % Part of the R package, https://www.R-project.org
-% Copyright 1995-2025 R Core Team
+% Copyright 1995-2026 R Core Team
 % Distributed under GPL 2 or later
 
 \name{p.adjust}
@@ -55,10 +55,18 @@ p.adjust.methods
 
   The \code{"BH"} (aka \code{"fdr"}) and \code{"BY"} methods of
   \I{Benjamini}, \I{Hochberg}, and \I{Yekutieli} control the false discovery rate,
-  the expected proportion of false discoveries amongst the rejected
-  hypotheses.  The false discovery rate is a less stringent condition
-  than the family-wise error rate, so these methods are more powerful
-  than the others.
+  \I{FDR}, the expected proportion of false discoveries amongst the
+  rejected hypotheses.
+
+  The \code{"BY"} correction modifies the \I{BH} procedure by replacing the
+  target level \eqn{q} with \eqn{q / \sum_{i=1}^{m} 1/i}, where \eqn{m} is
+  the number of tests (Theorem 1.3 in the reference), which controls the
+  \I{FDR} under the most general form of dependence structure.  This will
+  be more conservative than \code{"BH"}, for small \code{p} even more than
+  \I{Bonferroni}, see the example.
+  The \I{FDR} as implemented by the \code{"BH"} method is a less stringent
+  condition than the family-wise error rate, so it is typically more
+  powerful than the others.
 
   Note that you can set \code{n} larger than \code{length(p)} which
   means the unobserved p-values are assumed to be greater than all the
@@ -99,15 +107,44 @@ p.adjust.M <- p.adjust.methods[p.adjust.methods != "fdr"]
 p.adj    <- sapply(p.adjust.M, function(meth) p.adjust(p, meth))
 p.adj.60 <- sapply(p.adjust.M, function(meth) p.adjust(p, meth, n = 60))
 stopifnot(identical(p.adj[,"none"], p), p.adj <= p.adj.60)
+
 round(p.adj, 3)
 ## or a bit nicer:
-noquote(apply(p.adj, 2, format.pval, digits = 3))
-
-
-## and a graphic:
+head(round(100 * p.adj[,c(7,1:6)], 2), n=21) # in [percent]:
+##       none  holm hochberg hommel bonferroni   BH    BY
+##  [1,] 0.00  0.00     0.00   0.00       0.00 0.00  0.01 *)
+##  [2,] 0.00  0.10     0.10   0.10       0.11 0.04  0.19 *)
+##  [3,] 0.00  0.12     0.12   0.12       0.13 0.04  0.19 *)
+##  [4,] 0.01  0.46     0.46   0.42       0.49 0.09  0.43
+##  [5,] 0.01  0.48     0.48   0.45       0.53 0.09  0.43
+##  .... ..........    ............       ...............
+##  .... ..........    ............      ................
+## [18,] 0.88 29.06    29.06  27.30      44.03 2.45 11.00
+## [19,] 0.94 30.08    30.08  29.14      47.01 2.47 11.13
+## [20,] 1.13 35.02    35.02  33.89      56.49 2.82 12.71
+## [21,] 2.12 63.45    63.45  57.11     100.00 5.04 22.66
+##
+## *) The smallest 3 Bonferroni values are smaller than the "BY" ones,
+##    (John Maindonald, PR#17136)
+
+## number of rejected H0 ("P" < 0.05):
+colSums(p.adj < 0.05)
+## holm   hochberg     hommel bonferroni         BH         BY       none 
+##   11         11         11         11         20         12         22 
+
+## visual comparison
 matplot(p, p.adj, ylab="p.adjust(p, meth)", type = "l", asp = 1, lty = 1:6,
-        main = "P-value adjustments")
-legend(0.7, 0.6, p.adjust.M, col = 1:6, lty = 1:6)
+        col = 1:7, main = "P-value adjustments")
+legR <- function() {
+  legend("bottomright", p.adjust.M, col = 1:7, lty = 1:6, bty = "n", inset = 0.05)
+  rug(p) }
+legR()
+
+## zoom in & log scale
+lim <- c(7e-7, .20)
+matplot(p, p.adj, ylab="p.adjust(p, meth)", type = "l", asp = 1, lty = 1:6, col = 1:7,
+        main = "P-value adjustments [log-log]", log = "xy", xlim=lim, ylim=lim, las=1)
+legR()
 
 ## Can work with NAs:
 pN <- p; iN <- c(46, 47); pN[iN] <- NA
diff --git a/tests/Examples/stats-Ex.Rout.save b/tests/Examples/stats-Ex.Rout.save
@@ -1,6 +1,6 @@
 
-R Under development (unstable) (2025-12-26 r89234) -- "Unsuffered Consequences"
-Copyright (C) 2025 The R Foundation for Statistical Computing
+R Under development (unstable) (2026-01-21 r89311) -- "Unsuffered Consequences"
+Copyright (C) 2026 The R Foundation for Statistical Computing
 Platform: x86_64-pc-linux-gnu
 
 R is free software and comes with ABSOLUTELY NO WARRANTY.
@@ -3353,7 +3353,7 @@ Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm) :
  [1] 2 2 3 6 6 6 5 5 3 3 3
 > 
 > ## 'ties' + NAs -- notably NAs for tied x[], situation as PR#17604
->
+> 
 > x <- c(2:3, 3:5, 5:7)
 > y <- c(1,NA,2:4,NA,1,0)
 > ## allapprox() [defined above] for all variants :
@@ -8370,10 +8370,10 @@ Residual Deviance: 0.01267 	AIC: 27.03
 > ## tree from the cluster centers.
 > hc <- hclust(dist(USArrests)^2, "cen")
 > memb <- cutree(hc, k = 10)
-> cent <- NULL
-> for(k in 1:10){
-+   cent <- rbind(cent, colMeans(USArrests[memb == k, , drop = FALSE]))
-+ }
+> cent <- matrix(numeric(), 10, 4)
+> for(k in 1:10)
++   cent[k,] <- colMeans(USArrests[memb == k, , drop = FALSE])
+> 
 > hc1 <- hclust(dist(cent)^2, method = "cen", members = table(memb))
 > opar <- par(mfrow = c(1, 2))
 > plot(hc,  labels = FALSE, hang = -1, main = "Original Tree")
@@ -11875,6 +11875,7 @@ $objective
 > p.adj    <- sapply(p.adjust.M, function(meth) p.adjust(p, meth))
 > p.adj.60 <- sapply(p.adjust.M, function(meth) p.adjust(p, meth, n = 60))
 > stopifnot(identical(p.adj[,"none"], p), p.adj <= p.adj.60)
+> 
 > round(p.adj, 3)
        holm hochberg hommel bonferroni    BH    BY  none
  [1,] 0.000    0.000  0.000      0.000 0.000 0.000 0.000
@@ -11928,64 +11929,66 @@ $objective
 [49,] 1.000    0.944  0.944      1.000 0.930 1.000 0.912
 [50,] 1.000    0.944  0.944      1.000 0.944 1.000 0.944
 > ## or a bit nicer:
-> noquote(apply(p.adj, 2, format.pval, digits = 3))
-      holm     hochberg hommel   bonferroni BH       BY      none    
- [1,] 1.18e-05 1.18e-05 1.18e-05 1.18e-05   1.18e-05 5.3e-05 2.35e-07
- [2,] 0.00103  0.00103  0.00101  0.00105    0.000429 0.00193 2.10e-05
- [3,] 0.00124  0.00124  0.00124  0.00129    0.000429 0.00193 2.58e-05
- [4,] 0.00461  0.00461  0.00422  0.00491    0.000947 0.00426 9.81e-05
- [5,] 0.00484  0.00484  0.00453  0.00526    0.000947 0.00426 0.000105
- [6,] 0.00559  0.00559  0.00521  0.00621    0.000947 0.00426 0.000124
- [7,] 0.00583  0.00583  0.00557  0.00663    0.000947 0.00426 0.000133
- [8,] 0.00674  0.00674  0.00659  0.00784    0.000980 0.00441 0.000157
- [9,] 0.00947  0.00947  0.00924  0.01127    0.001253 0.00564 0.000225
-[10,] 0.01556  0.01556  0.01518  0.01898    0.001898 0.00854 0.000380
-[11,] 0.02446  0.02446  0.02446  0.03057    0.002780 0.01251 0.000611
-[12,] 0.06294  0.06294  0.05810  0.08070    0.006725 0.03026 0.001614
-[13,] 0.12549  0.12549  0.10898  0.16512    0.012637 0.05686 0.003302
-[14,] 0.13092  0.13092  0.11677  0.17692    0.012637 0.05686 0.003538
-[15,] 0.18853  0.18853  0.16758  0.26185    0.017457 0.07854 0.005237
-[16,] 0.23912  0.23912  0.21179  0.34160    0.020762 0.09341 0.006832
-[17,] 0.24001  0.24001  0.21884  0.35296    0.020762 0.09341 0.007059
-[18,] 0.29057  0.29057  0.27296  0.44026    0.024459 0.11004 0.008805
-[19,] 0.30083  0.30083  0.29143  0.47005    0.024740 0.11131 0.009401
-[20,] 0.35024  0.35024  0.33894  0.56490    0.028245 0.12708 0.011298
-[21,] 0.63451  0.63451  0.57105  1.00000    0.050358 0.22657 0.021150
-[22,] 1.00000  0.94379  0.94379  1.00000    0.111880 0.50337 0.049227
-[23,] 1.00000  0.94379  0.94379  1.00000    0.130463 0.58698 0.060533
-[24,] 1.00000  0.94379  0.94379  1.00000    0.130463 0.58698 0.062622
-[25,] 1.00000  0.94379  0.94379  1.00000    0.147903 0.66545 0.073952
-[26,] 1.00000  0.94379  0.94379  1.00000    0.159252 0.71651 0.082811
-[27,] 1.00000  0.94379  0.94379  1.00000    0.159877 0.71932 0.086333
-[28,] 1.00000  0.94379  0.94379  1.00000    0.212617 0.95661 0.119065
-[29,] 1.00000  0.94379  0.94379  1.00000    0.325999 1.00000 0.189080
-[30,] 1.00000  0.94379  0.94379  1.00000    0.343082 1.00000 0.205849
-[31,] 1.00000  0.94379  0.94379  1.00000    0.356325 1.00000 0.220921
-[32,] 1.00000  0.94379  0.94379  1.00000    0.446250 1.00000 0.285600
-[33,] 1.00000  0.94379  0.94379  1.00000    0.461954 1.00000 0.304889
-[34,] 1.00000  0.94379  0.94379  1.00000    0.683577 1.00000 0.466068
-[35,] 1.00000  0.94379  0.94379  1.00000    0.683577 1.00000 0.483081
-[36,] 1.00000  0.94379  0.94379  1.00000    0.683577 1.00000 0.492175
-[37,] 1.00000  0.94379  0.94379  1.00000    0.718845 1.00000 0.531945
-[38,] 1.00000  0.94379  0.94379  1.00000    0.741435 1.00000 0.575155
-[39,] 1.00000  0.94379  0.94379  1.00000    0.741435 1.00000 0.578319
-[40,] 1.00000  0.94379  0.94379  1.00000    0.762606 1.00000 0.618589
-[41,] 1.00000  0.94379  0.94379  1.00000    0.762606 1.00000 0.636362
-[42,] 1.00000  0.94379  0.94379  1.00000    0.762606 1.00000 0.644859
-[43,] 1.00000  0.94379  0.94379  1.00000    0.762606 1.00000 0.655841
-[44,] 1.00000  0.94379  0.94379  1.00000    0.782487 1.00000 0.688588
-[45,] 1.00000  0.94379  0.94379  1.00000    0.798874 1.00000 0.718986
-[46,] 1.00000  0.94379  0.94379  1.00000    0.880265 1.00000 0.817954
-[47,] 1.00000  0.94379  0.94379  1.00000    0.880265 1.00000 0.827449
-[48,] 1.00000  0.94379  0.94379  1.00000    0.930478 1.00000 0.897130
-[49,] 1.00000  0.94379  0.94379  1.00000    0.930478 1.00000 0.911868
-[50,] 1.00000  0.94379  0.94379  1.00000    0.943789 1.00000 0.943789
-> 
-> 
-> ## and a graphic:
+> head(round(100 * p.adj[,c(7,1:6)], 2), n=21) # in [percent]:
+      none  holm hochberg hommel bonferroni   BH    BY
+ [1,] 0.00  0.00     0.00   0.00       0.00 0.00  0.01
+ [2,] 0.00  0.10     0.10   0.10       0.11 0.04  0.19
+ [3,] 0.00  0.12     0.12   0.12       0.13 0.04  0.19
+ [4,] 0.01  0.46     0.46   0.42       0.49 0.09  0.43
+ [5,] 0.01  0.48     0.48   0.45       0.53 0.09  0.43
+ [6,] 0.01  0.56     0.56   0.52       0.62 0.09  0.43
+ [7,] 0.01  0.58     0.58   0.56       0.66 0.09  0.43
+ [8,] 0.02  0.67     0.67   0.66       0.78 0.10  0.44
+ [9,] 0.02  0.95     0.95   0.92       1.13 0.13  0.56
+[10,] 0.04  1.56     1.56   1.52       1.90 0.19  0.85
+[11,] 0.06  2.45     2.45   2.45       3.06 0.28  1.25
+[12,] 0.16  6.29     6.29   5.81       8.07 0.67  3.03
+[13,] 0.33 12.55    12.55  10.90      16.51 1.26  5.69
+[14,] 0.35 13.09    13.09  11.68      17.69 1.26  5.69
+[15,] 0.52 18.85    18.85  16.76      26.18 1.75  7.85
+[16,] 0.68 23.91    23.91  21.18      34.16 2.08  9.34
+[17,] 0.71 24.00    24.00  21.88      35.30 2.08  9.34
+[18,] 0.88 29.06    29.06  27.30      44.03 2.45 11.00
+[19,] 0.94 30.08    30.08  29.14      47.01 2.47 11.13
+[20,] 1.13 35.02    35.02  33.89      56.49 2.82 12.71
+[21,] 2.12 63.45    63.45  57.11     100.00 5.04 22.66
+> ##       none  holm hochberg hommel bonferroni   BH    BY
+> ##  [1,] 0.00  0.00     0.00   0.00       0.00 0.00  0.01 *)
+> ##  [2,] 0.00  0.10     0.10   0.10       0.11 0.04  0.19 *)
+> ##  [3,] 0.00  0.12     0.12   0.12       0.13 0.04  0.19 *)
+> ##  [4,] 0.01  0.46     0.46   0.42       0.49 0.09  0.43
+> ##  [5,] 0.01  0.48     0.48   0.45       0.53 0.09  0.43
+> ##  .... ..........    ............       ...............
+> ##  .... ..........    ............      ................
+> ## [18,] 0.88 29.06    29.06  27.30      44.03 2.45 11.00
+> ## [19,] 0.94 30.08    30.08  29.14      47.01 2.47 11.13
+> ## [20,] 1.13 35.02    35.02  33.89      56.49 2.82 12.71
+> ## [21,] 2.12 63.45    63.45  57.11     100.00 5.04 22.66
+> ##
+> ## *) The smallest 3 Bonferroni values are smaller than the "BY" ones,
+> ##    (John Maindonald, PR#17136)
+> 
+> ## number of rejected H0 ("P" < 0.05):
+> colSums(p.adj < 0.05)
+      holm   hochberg     hommel bonferroni         BH         BY       none 
+        11         11         11         11         20         12         22 
+> ## holm   hochberg     hommel bonferroni         BH         BY       none 
+> ##   11         11         11         11         20         12         22 
+> 
+> ## visual comparison
 > matplot(p, p.adj, ylab="p.adjust(p, meth)", type = "l", asp = 1, lty = 1:6,
-+         main = "P-value adjustments")
-> legend(0.7, 0.6, p.adjust.M, col = 1:6, lty = 1:6)
++         col = 1:7, main = "P-value adjustments")
+> legR <- function() {
++   legend("bottomright", p.adjust.M, col = 1:7, lty = 1:6, bty = "n", inset = 0.05)
++   rug(p) }
+> legR()
+> 
+> ## zoom in & log scale
+> lim <- c(7e-7, .20)
+> matplot(p, p.adj, ylab="p.adjust(p, meth)", type = "l", asp = 1, lty = 1:6, col = 1:7,
++         main = "P-value adjustments [log-log]", log = "xy", xlim=lim, ylim=lim, las=1)
+> legR()
+Warning in rug(p) : some values will be clipped
 > 
 > ## Can work with NAs:
 > pN <- p; iN <- c(46, 47); pN[iN] <- NA
@@ -19682,7 +19685,7 @@ Number of Fisher Scoring iterations: 6
 > cleanEx()
 > options(digits = 7L)
 > base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n")
-Time elapsed:  5.797 0.16 5.961 0 0 
+Time elapsed:  3.598 0.325 4.098 0 0 
 > grDevices::dev.off()
 null device 
           1