42
42
43
43
44
44
NNS.CDF <- function (variable , degree = 0 , target = NULL , type = " CDF" , plot = TRUE ){
45
-
46
- if (any(class(variable )%in% c(" tbl" ," data.table" )) && dim(variable )[2 ]== 1 ){
45
+ if (any(class(variable ) %in% c(" tbl" , " data.table" )) && dim(variable )[2 ] == 1 ){
47
46
variable <- as.vector(unlist(variable ))
48
47
}
49
- if (any(class(variable )%in% c(" tbl" ," data.table" ))){
48
+ if (any(class(variable ) %in% c(" tbl" , " data.table" ))){
50
49
variable <- as.data.frame(variable )
51
50
}
52
51
53
52
if (! is.null(target )){
54
- if (is.null(dim(variable )) || dim(variable )[2 ]== 1 ){
55
- if (target < min(variable ) || target > max(variable )){
53
+ if (is.null(dim(variable )) || dim(variable )[2 ] == 1 ){
54
+ if (target < min(variable ) || target > max(variable )){
56
55
stop(" Please make sure target is within the observed values of variable." )
57
56
}
58
57
} else {
59
- if (target [1 ]< min(variable [,1 ]) || target [1 ]> max(variable [,1 ])){
58
+ if (target [1 ] < min(variable [,1 ]) || target [1 ] > max(variable [,1 ])){
60
59
stop(" Please make sure target 1 is within the observed values of variable 1." )
61
60
}
62
- if (target [2 ]< min(variable [,2 ]) || target [2 ]> max(variable [,2 ])){
61
+ if (target [2 ] < min(variable [,2 ]) || target [2 ] > max(variable [,2 ])){
63
62
stop(" Please make sure target 2 is within the observed values of variable 2." )
64
63
}
65
64
}
66
65
}
66
+
67
67
type <- tolower(type )
68
- if (! (type %in% c(" cdf" ," survival" , " hazard" , " cumulative hazard" ))){
68
+ if (! (type %in% c(" cdf" , " survival" , " hazard" , " cumulative hazard" ))){
69
69
stop(paste(" Please select a type from: " , " `CDF`, " , " `survival`, " , " `hazard`, " , " `cumulative hazard`" ))
70
70
}
71
+
72
+ # Univariate Case
71
73
if (is.null(dim(variable )) || dim(variable )[2 ] == 1 ){
72
74
overall_target <- sort(variable )
73
75
x <- overall_target
74
- if (degree > 0 ){
75
- CDF <- LPM.ratio(degree , overall_target , variable )
76
- } else {
77
- cdf_fun <- ecdf(x )
78
- CDF <- cdf_fun(overall_target )
79
- }
76
+ CDF <- LPM.ratio(degree , overall_target , variable )
80
77
values <- cbind.data.frame(sort(variable ), CDF )
81
78
colnames(values ) <- c(deparse(substitute(variable )), " CDF" )
82
79
if (! is.null(target )){
@@ -88,106 +85,177 @@ NNS.CDF <- function(variable, degree = 0, target = NULL, type = "CDF", plot = TR
88
85
if (type == " survival" ){
89
86
CDF <- 1 - CDF
90
87
P <- 1 - P
91
- }else if (type == " hazard" ){
92
- CDF <- exp(log(density(x , n = length(x ))$ y )- log(1 - CDF ))
88
+ ylabel <- " S(x)"
89
+ } else if (type == " hazard" ){
90
+ n <- length(x )
91
+ window <- min(10 , n - 1 )
92
+ f_proxy <- numeric (n )
93
+ for (i in 1 : n ){
94
+ start <- max(1 , i - window %/% 2 )
95
+ end <- min(n , i + window %/% 2 )
96
+ dx <- x [end ] - x [start ]
97
+ f_proxy [i ] <- (CDF [end ] - CDF [start ]) / dx
98
+ }
99
+ f_proxy <- pmax(f_proxy , 1e-10 )
100
+ reg_fit <- NNS.reg(x , f_proxy , order = NULL , n.best = 1 , point.est = if (! is.null(target )) target else NULL , plot = FALSE )
101
+ dens <- pmax(reg_fit $ Fitted $ y.hat , 1e-10 )
102
+ S <- pmax(1e-10 , 1 - CDF )
103
+ CDF <- dens / S
104
+ CDF <- pmin(CDF , 1e6 )
105
+ CDF <- pmax(0 , CDF )
93
106
ylabel <- " h(x)"
94
- P <- NNS.reg(x [- length(x )], CDF [- length(x )], order = " max" , point.est = c(x [length(x )], target ), plot = FALSE )$ Point.est
95
- CDF [is.infinite(CDF )] <- P [1 ]
96
- P <- P [- 1 ]
97
- }else if (type == " cumulative hazard" ){
98
- CDF <- - log((1 - CDF ))
107
+ if (! is.null(target )){
108
+ P <- reg_fit $ Point.est / S [which.min(abs(x - target ))]
109
+ P <- min(P , 1e6 )
110
+ P <- max(0 , P )
111
+ CDF [is.infinite(CDF )] <- P
112
+ }
113
+ } else if (type == " cumulative hazard" ){
114
+ S <- pmax(1e-10 , 1 - CDF )
115
+ CDF <- - log(S )
116
+ CDF <- pmax(0 , CDF )
117
+ if (! is.null(target )){
118
+ reg_fit <- NNS.reg(x , CDF , order = NULL , n.best = 1 , point.est = target , plot = FALSE )
119
+ P <- reg_fit $ Point.est
120
+ }
99
121
ylabel <- " H(x)"
100
- P <- NNS.reg(x [- length(x )], CDF [- length(x )], order = " max" , point.est = c(x [length(x )], target ), plot = FALSE )$ Point.est
101
- CDF [is.infinite(CDF )] <- P [1 ]
102
- P <- P [- 1 ]
103
122
}
104
123
if (plot ){
105
124
plot(x , CDF , pch = 19 , col = ' steelblue' , xlab = deparse(substitute(variable )), ylab = ylabel , main = toupper(type ), type = " s" , lwd = 2 )
106
125
points(x , CDF , pch = 19 , col = ' steelblue' )
107
- lines(x , CDF , lty = 2 , col = ' steelblue' )
126
+ lines(x , CDF , lty = 2 , col = ' steelblue' )
108
127
if (! is.null(target )){
109
- segments(target ,0 , target ,P , col = " red" , lwd = 2 , lty = 2 )
128
+ segments(target , 0 , target , P , col = " red" , lwd = 2 , lty = 2 )
110
129
segments(min(variable ), P , target , P , col = " red" , lwd = 2 , lty = 2 )
111
130
points(target , P , col = " green" , pch = 19 )
112
- mtext(text = round(P ,4 ), col = " red" , side = 2 , at = P , las = 2 )
113
- mtext(text = round(target ,4 ), col = " red" , side = 1 , at = target , las = 1 )
131
+ mtext(text = round(P , 4 ), col = " red" , side = 2 , at = P , las = 2 )
132
+ mtext(text = round(target , 4 ), col = " red" , side = 1 , at = target , las = 1 )
114
133
}
115
134
}
116
135
values <- data.table :: data.table(cbind.data.frame(x , CDF ))
117
136
colnames(values ) <- c(deparse(substitute(variable )), ylabel )
118
137
return (
119
138
list (
120
- " Function" = values ,
139
+ " Function" = values ,
121
140
" target.value" = P
122
141
)
123
142
)
124
- } else {
125
- overall_target_1 <- (variable [,1 ])
126
- overall_target_2 <- (variable [,2 ])
127
- CDF <- (
128
- Co.LPM(degree , sort(variable [,1 ]), sort(variable [,2 ]), overall_target_1 , overall_target_2 ) /
129
- (
130
- Co.LPM(degree , sort(variable [,1 ]), sort(variable [,2 ]), overall_target_1 , overall_target_2 ) +
131
- Co.UPM(degree , sort(variable [,1 ]), sort(variable [,2 ]), overall_target_1 , overall_target_2 ) +
132
- D.UPM(degree ,degree , sort(variable [,1 ]), sort(variable [,2 ]), overall_target_1 , overall_target_2 ) +
133
- D.LPM(degree ,degree , sort(variable [,1 ]), sort(variable [,2 ]), overall_target_1 , overall_target_2 )
134
- )
143
+ }
144
+ # Bivariate Case
145
+ else {
146
+ overall_target_1 <- variable [,1 ]
147
+ overall_target_2 <- variable [,2 ]
148
+
149
+ sorted_indices <- order(variable [,1 ], variable [,2 ])
150
+ sorted_x <- variable [sorted_indices , 1 ]
151
+ sorted_y <- variable [sorted_indices , 2 ]
152
+
153
+ joint_cdf <- (
154
+ Co.LPM(degree , overall_target_1 , overall_target_2 , overall_target_1 , overall_target_2 ) /
155
+ (
156
+ Co.LPM(degree , overall_target_1 , overall_target_2 , overall_target_1 , overall_target_2 ) +
157
+ Co.UPM(degree , overall_target_1 , overall_target_2 , overall_target_1 , overall_target_2 ) +
158
+ D.UPM(degree , degree , overall_target_1 , overall_target_2 , overall_target_1 , overall_target_2 ) +
159
+ D.LPM(degree , degree , overall_target_1 , overall_target_2 , overall_target_1 , overall_target_2 )
160
+ )
135
161
)
162
+ CDF <- joint_cdf [sorted_indices ]
163
+
164
+ marginal_X <- LPM.ratio(degree , sorted_x , overall_target_1 )
165
+ marginal_Y <- LPM.ratio(degree , sorted_y , overall_target_2 )
166
+
167
+ ylabel <- " Probability"
136
168
if (type == " survival" ){
137
- CDF <- 1 - CDF
169
+ CDF <- 1 - marginal_X - marginal_Y + CDF
170
+ CDF <- pmax(0 , pmin(1 , CDF ))
171
+ ylabel <- " S(x, y)"
138
172
} else if (type == " hazard" ){
139
- CDF <- sort(variable ) / (1 - CDF )
173
+ data_points <- data.frame (x = sorted_x , y = sorted_y )
174
+ dens_proxy <- pmax(joint_cdf [sorted_indices ], 1e-10 )
175
+ reg_fit <- NNS.reg(data_points , dens_proxy , order = " max" ,
176
+ point.est = if (! is.null(target )) data.frame (x = target [1 ], y = target [2 ]) else NULL ,
177
+ plot = FALSE )
178
+ dens <- pmax(reg_fit $ Fitted $ y.hat , 1e-10 )
179
+ S_xy <- pmax(1e-10 , 1 - marginal_X - marginal_Y + CDF )
180
+ CDF <- dens / S_xy
181
+ CDF <- pmax(0 , CDF )
182
+ ylabel <- " h(x, y)"
140
183
} else if (type == " cumulative hazard" ){
141
- CDF <- - log((1 - CDF ))
184
+ S_xy <- pmax(1e-10 , 1 - marginal_X - marginal_Y + CDF )
185
+ CDF <- - log(S_xy )
186
+ CDF <- pmax(0 , CDF )
187
+ ylabel <- " H(x, y)"
142
188
}
189
+
143
190
if (! is.null(target )){
144
191
P <- (
145
- Co.LPM(degree , variable [, 1 ], variable [, 2 ] , target [1 ], target [2 ]) /
146
- (
147
- Co.LPM(degree , variable [, 1 ], variable [, 2 ] , target [1 ], target [2 ]) +
148
- Co.UPM(degree , variable [, 1 ], variable [, 2 ] , target [1 ], target [2 ]) +
149
- D.LPM(degree ,degree , variable [, 1 ], variable [, 2 ] , target [1 ], target [2 ]) +
150
- D.UPM(degree ,degree , variable [, 1 ], variable [, 2 ] , target [1 ], target [2 ])
151
- )
192
+ Co.LPM(degree , overall_target_1 , overall_target_2 , target [1 ], target [2 ]) /
193
+ (
194
+ Co.LPM(degree , overall_target_1 , overall_target_2 , target [1 ], target [2 ]) +
195
+ Co.UPM(degree , overall_target_1 , overall_target_2 , target [1 ], target [2 ]) +
196
+ D.LPM(degree , degree , overall_target_1 , overall_target_2 , target [1 ], target [2 ]) +
197
+ D.UPM(degree , degree , overall_target_1 , overall_target_2 , target [1 ], target [2 ])
198
+ )
152
199
)
200
+ P_marginal_X <- LPM.ratio(degree , target [1 ], overall_target_1 )
201
+ P_marginal_Y <- LPM.ratio(degree , target [2 ], overall_target_2 )
202
+ if (type == " survival" ){
203
+ P <- 1 - P_marginal_X - P_marginal_Y + P
204
+ P <- max(0 , min(1 , P ))
205
+ } else if (type == " hazard" ){
206
+ P_dens <- pmax(reg_fit $ Point.est , 1e-10 )
207
+ S_target <- max(1e-10 , 1 - P_marginal_X - P_marginal_Y + P )
208
+ P <- P_dens / S_target
209
+ P <- min(P , 1e6 )
210
+ P <- max(0 , P )
211
+ } else if (type == " cumulative hazard" ){
212
+ S_target <- max(1e-10 , 1 - P_marginal_X - P_marginal_Y + P )
213
+ P <- - log(S_target )
214
+ P <- max(0 , P )
215
+ }
153
216
} else {
154
217
P <- NULL
155
218
}
219
+
156
220
if (plot ){
157
221
plot3d(
158
- variable [, 1 ], variable [, 2 ], CDF , col = " steelblue" ,
222
+ variable [sorted_indices , 1 ], variable [sorted_indices , 2 ], CDF , col = " steelblue" ,
159
223
xlab = deparse(substitute(variable [,1 ])), ylab = deparse(substitute(variable [,2 ])),
160
- zlab = " Probability " , box = FALSE , pch = 19
224
+ zlab = ylabel , box = FALSE , pch = 19
161
225
)
162
- if (! is.null(target )){
226
+ if (! is.null(target ) && ! is.na( P ) ){
163
227
points3d(target [1 ], target [2 ], P , col = " green" , pch = 19 )
164
228
points3d(target [1 ], target [2 ], 0 , col = " red" , pch = 15 , cex = 2 )
165
229
lines3d(
166
- x = c(target [1 ], max(variable [,1 ])),
167
- y = c(target [2 ], max(variable [,2 ])),
168
- z = c(P , P ),
169
- col = " red" , lwd = 2 , lty = 3
230
+ x = c(target [1 ], max(variable [,1 ])),
231
+ y = c(target [2 ], max(variable [,2 ])),
232
+ z = c(P , P ),
233
+ col = " red" , lwd = 2 , lty = 3
170
234
)
171
235
lines3d(
172
- x = c(target [1 ], target [1 ]),
173
- y = c(target [2 ], target [2 ]),
174
- z = c(0 , P ),
175
- col = " red" , lwd = 1 , lty = 3
236
+ x = c(target [1 ], target [1 ]),
237
+ y = c(target [2 ], target [2 ]),
238
+ z = c(0 , P ),
239
+ col = " red" , lwd = 1 , lty = 3
176
240
)
177
241
text3d(
178
- max(variable [,1 ]), max(variable [,2 ]), P , texts = paste0(" P = " , round(P ,4 )), pos = 4 , col = " red"
242
+ max(variable [,1 ]), max(variable [,2 ]), P , texts = paste0(" P = " , round(P , 4 )), pos = 4 , col = " red"
179
243
)
180
244
}
181
-
182
245
}
183
246
247
+ return (list (
248
+ " Function" = data.table :: data.table(cbind(
249
+ data.frame (variable [sorted_indices , ], row.names = NULL ),
250
+ CDF = CDF
251
+ )),
252
+ " target.value" = P
253
+ ))
184
254
}
185
-
186
- return (list (" CDF" = data.table :: data.table(cbind((variable ), CDF = CDF )),
187
- " P" = P ))
188
255
}
189
256
190
257
258
+
191
259
# ' NNS moments
192
260
# '
193
261
# ' This function returns the first 4 moments of the distribution.
@@ -227,7 +295,7 @@ NNS.moments <- function(x, population = TRUE){
227
295
kurtosis <- ((n * (n + 1 )) / ((n - 1 )* (n - 2 )* (n - 3 ))) * ((n * kurt_base ) / (variance * (n / (n - 1 )))^ 2 ) - ( (3 * ((n - 1 )^ 2 )) / ((n - 2 )* (n - 3 )))
228
296
variance <- variance * (n / (n - 1 ))
229
297
}
230
-
298
+
231
299
return (list (" mean" = mean ,
232
300
" variance" = variance ,
233
301
" skewness" = skewness ,
0 commit comments