Skip to content

Commit 6da79b9

Browse files
committed
upload codebase
1 parent c7d65bd commit 6da79b9

29 files changed

+2348
-2
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
*.DS_Store
2+
*.nc
3+
*.RData
4+
data

README.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
1-
# FTE
2-
The Fraction of Threshold Exceedance (FTE) metric for verification of spatial structure in ensembles of forecast fields
1+
# Fraction of Threshold Exceedance
2+
3+
This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details.
4+
5+
## Suggested citation
6+

beta_params_range.R

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
## Demonstrate dependence on domain size by analyzing range of ratios for a1 = 1,3 and tau fixed at 1
3+
4+
library(tidyverse)
5+
library(reshape2)
6+
7+
## Beta param plot for a1 = 2
8+
cont_fit_tab <- read.table('./data/cont_fit_tab.RData')
9+
10+
df <- cont_fit_tab %>%
11+
filter((tau==1 & s1==1) | (tau==1 & s1==3)) %>%
12+
dplyr::select(-tau) %>%
13+
melt(id.vars=c("s1", "ratio"))
14+
15+
range_labs <- c(
16+
`1` = "a0 = 1",
17+
`3` = "a0 = 3"
18+
)
19+
20+
png("beta_params_range.png", units="in", height=3.3, width=6.4, res=200, pointsize=9)
21+
22+
ggplot(data=df, aes(x=ratio, y=value, color=variable, linetype=variable)) +
23+
facet_grid(~s1, labeller = as_labeller(range_labs)) +
24+
geom_hline(yintercept=1, linetype=3, size=0.3) +
25+
geom_line(size=0.3) +
26+
geom_point(size=0.8) +
27+
scale_colour_manual(values=c(a="darkred", b="skyblue3")) +
28+
scale_linetype_manual(breaks=c("a","b"), values=c(2, 1)) +
29+
scale_y_continuous(breaks = c(0.75, 1.0, 1.25)) +
30+
labs(x="Ratio", y="Parameter") +
31+
theme_bw() +
32+
theme(legend.title = element_blank(),
33+
strip.background = element_blank(),
34+
text = element_text(color="black"),
35+
strip.text= element_text(size=12),
36+
axis.text = element_text(size=9, color="black"),
37+
legend.text = element_text(size=10),
38+
panel.grid.minor = element_blank(),
39+
panel.grid.major = element_blank(),
40+
aspect.ratio = 1/1,
41+
plot.margin = unit(c(0,0,0,0.1), "cm"))
42+
43+
dev.off()
44+

beta_params_tau.R

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
2+
## Demonstrate dependence on tau by analyzing range of taus at low, even, and high ratios (a1 = 2)
3+
4+
library(tidyverse)
5+
library(reshape2)
6+
7+
cont_fit_tab <- read.table('./data/cont_fit_tab.RData')
8+
9+
df <- cont_fit_tab %>%
10+
filter(s1==2, ratio==0.5 | ratio==0.9 | ratio==1.0 | ratio == 1.1 | ratio==1.5) %>%
11+
select(-s1) %>%
12+
melt(id.vars=c("tau", "ratio"))
13+
14+
ratio_labs <- c(
15+
`0.5` = "ratio = 0.5",
16+
`0.9` = "ratio = 0.9",
17+
`1` = "ratio = 1.0",
18+
`1.1` = "ratio = 1.1",
19+
`1.5` = "ratio = 1.5"
20+
)
21+
22+
png("beta_params_tau.png", units="in", height=2.2, width=8.4, res=200, pointsize=10)
23+
24+
ggplot(data=df, aes(x=tau, y=value, color=variable, linetype=variable)) +
25+
facet_grid(~ratio, labeller = as_labeller(ratio_labs)) +
26+
geom_hline(yintercept=1, linetype=3, size=0.2) +
27+
geom_line(size=0.25) +
28+
geom_point(size=0.6) +
29+
scale_colour_manual(values=c(a="darkred", b="skyblue3")) +
30+
scale_linetype_manual(breaks=c("a","b"), values=c(2, 1)) +
31+
scale_y_continuous(breaks = c(0.5, 1.0, 1.5)) +
32+
labs(x="Threshold", y="Parameter") +
33+
theme_bw() +
34+
theme(legend.title = element_blank(),
35+
strip.background = element_blank(),
36+
text = element_text(color="black"),
37+
strip.text= element_text(size=12),
38+
axis.text = element_text(size=9, color="black"),
39+
legend.text = element_text(size=10),
40+
panel.grid.minor = element_blank(),
41+
panel.grid.major = element_blank(),
42+
aspect.ratio = 1/1,
43+
plot.margin = unit(c(0,0,0,0.1), "cm"))
44+
45+
dev.off()
46+

fte_downscaled.R

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
2+
## Produce FTE histograms for downscaled GEFS data
3+
4+
library(lubridate)
5+
library(dplyr)
6+
library(tidyr)
7+
library(reshape2)
8+
library(ggplot2)
9+
library(fitdistrplus)
10+
library(latex2exp)
11+
12+
13+
# Threshold exceedence ranking --------------------------------------------
14+
15+
## import fields_dat dim(lat x lon x time x mem)
16+
load("./data/gefs_downscaled_fields.RData")
17+
18+
## compute the mean threshold exceedence of fields_df at array of thresholds
19+
## and return analysis ranks
20+
exceed_ranks <- function(dat_arr, tau){
21+
# dat_arr: 3d array (lon x lat x member)
22+
# tau: vector of thresholds
23+
ranks <- array(dim = length(tau))
24+
for (i in 1:length(tau)) {
25+
m <- apply(dat_arr, 3, function(field) mean(as.vector(field) > tau[i]))
26+
if(length(unique(m)) != 1) {
27+
ranks[i] <- rank(m, ties.method = "random")[1]
28+
} else {
29+
# exclude exact ties
30+
ranks[i] <- NA
31+
}
32+
}
33+
return(ranks)
34+
}
35+
36+
disagg_rank <- function(r) {
37+
return(runif(1, r-1/24, r+1/24))
38+
}
39+
40+
## iterate over time, compute ranks at different thresholds
41+
set.seed(10)
42+
tau <- c(5,10,20)
43+
ranks_df <- data.frame(t(apply(field_dat, 3, exceed_ranks, tau=tau))) # (day x tau)
44+
45+
46+
# FTE Histograms ----------------------------------------------------------
47+
48+
## stratify days by month and build histograms for each month-threshold pair
49+
names(ranks_df) <- paste('tau', tau, sep='')
50+
51+
m <- c(1, 4, 7, 10) # Jan, Apr, Jul, Oct
52+
dates <- seq.Date(as.Date('2002-01-02'), as.Date('2015-12-30'), by='day')
53+
date_idx <- (month(dates) %in% m)
54+
55+
## fit beta parameters to density fte hists
56+
set.seed(20)
57+
down_fit_tab <- ranks_df %>%
58+
mutate(month = month(dates[date_idx])) %>%
59+
melt(id.vars='month', variable.name='tau', value.name='rank') %>%
60+
mutate(rank = sapply((rank-0.5)/12, disagg_rank)) %>%
61+
group_by(tau, month) %>%
62+
drop_na() %>%
63+
summarise(params=paste(fitdist(rank,'beta')$estimate, collapse=" ")) %>%
64+
separate(params, c('a', 'b'), sep=" ") %>%
65+
mutate(a=round(as.numeric(a), 3), b=round(as.numeric(b),3)) %>%
66+
unite(params, a:b, sep = ", ")
67+
68+
## build dataframe with months and tau as factors to facet over
69+
month_labs <- rep("", 12)
70+
month_labs[c(1,4,7,10)] <- c("January", "April", "July", "October")
71+
tau_labs <- c(tau5=TeX(paste("$\\tau = $", "5 mm")),
72+
tau10=TeX(paste("$\\tau = $", "10 mm")),
73+
tau20=TeX(paste("$\\tau = $", "20 mm")))
74+
75+
ranks_df <- ranks_df %>%
76+
mutate(month = month(dates[date_idx])) %>%
77+
melt(id.vars='month', variable.name='tau', value.name='rank') %>%
78+
mutate(month = as.factor(month_labs[month])) %>%
79+
mutate(rank = (rank-0.5)/12)
80+
81+
ranks_df$month <- factor(ranks_df$month,
82+
levels = c("January", "April", "July", "October"),
83+
labels = c("bold(January)", "bold(April)", "bold(July)", "bold(October)"))
84+
levels(ranks_df$tau) <- tau_labs
85+
86+
87+
png("fte_downscaled.png", units="in", height=6.2, width=8, res=200, pointsize=10)
88+
89+
ranks_df %>%
90+
ggplot(aes(x=rank)) +
91+
geom_hline(yintercept=1, linetype=3, size=0.3, color="grey") +
92+
geom_histogram(aes(y=..density..), bins=12, fill="black", color="white") +
93+
scale_y_continuous(breaks=seq(0,2)) +
94+
facet_grid(rows=vars(tau), cols=vars(month), labeller=label_parsed) +
95+
annotate("text", x=0.48, y=2.5, size=4, label=down_fit_tab$params) +
96+
labs(y="", x="") +
97+
theme_bw() +
98+
theme(legend.title = element_blank(),
99+
strip.background = element_blank(),
100+
text = element_text(color="black"),
101+
strip.text = element_text(size=12),
102+
axis.text = element_text(size=9, color="black"),
103+
axis.text.x = element_blank(),
104+
axis.ticks.x = element_blank(),
105+
panel.grid.minor = element_blank(),
106+
panel.grid.major.x = element_blank(),
107+
panel.grid.major = element_blank(),
108+
aspect.ratio = 1/1,
109+
plot.margin = unit(c(0,0,0,0), "cm"))
110+
111+
dev.off()
112+
113+

gefs_downscaled.R

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
2+
## Produce FTE histograms for downscaled GEFS data
3+
4+
library(ncdf4)
5+
library(lubridate)
6+
library(dplyr)
7+
library(reshape2)
8+
library(ggplot2)
9+
10+
11+
# Analyses ----------------------------------------------------------------
12+
13+
## NOTE: dates range from 2002010200 to 2015123000
14+
15+
## grab analyses and time data
16+
ncin <- nc_open('./data/gefs/original/refcstv2_precip_ccpav3_subset_066_to_072.nc')
17+
apcp_anal <- ncvar_get(ncin, 'apcp_anal')
18+
19+
## subset data to same region
20+
lons_anal <- ncvar_get(ncin, 'lons_anal')
21+
lats_anal <- ncvar_get(ncin, 'lats_anal')
22+
lon_idx <- which(lons_anal[,1] >= -91 & lons_anal[,1] <= -81)
23+
lat_idx <- which(lats_anal[1,] >= 30 & lats_anal[1,] <= 40)
24+
apcp_anal <- apcp_anal[lon_idx, lat_idx, ]
25+
rm(lons_anal, lats_anal, lon_idx, lat_idx)
26+
27+
## create an array to store all each analysis-ensemble pair dim(lon, lat, time, obs/ens_n)
28+
field_dat <- array(dim = c(dim(apcp_anal), 12))
29+
field_dat[,,,1] <- apcp_anal
30+
31+
nc_close(ncin)
32+
rm(ncin, apcp_anal)
33+
34+
35+
# Ensembles (downscaled) --------------------------------------------------
36+
37+
## NOTES: Each array has space for 31 days, but
38+
# - January 2002 starts from 01/02 (start timeframe one day earlier, and trim)
39+
# - April always has 30 days
40+
41+
m <- c(1, 4, 7, 10) # Jan, Apr, Jul, Oct
42+
dates <- seq.Date(as.Date('2002-01-02'), as.Date('2015-12-30'), by='day')
43+
44+
files <- list.files(path='./data', pattern='*.nc', full.names=TRUE)
45+
lapply(seq_along(files), function(i) {
46+
ncin <- nc_open(files[i])
47+
fcsts <- ncvar_get(ncin, 'downscaled')
48+
49+
lons_fcst <- ncvar_get(ncin, 'Longitude')
50+
lats_fcst <- ncvar_get(ncin, 'Latitude')
51+
lon_idx <- which(lons_fcst[,1] >= -91 & lons_fcst[,1] <= -81)
52+
lat_idx <- which(lats_fcst[1,] >= 30 & lats_fcst[1,] <= 40)
53+
54+
# april
55+
if(m[i]==4){
56+
fcsts <- fcsts[,,,1:30,]
57+
}
58+
59+
d <- dim(fcsts)
60+
dim(fcsts) <- c(d[1], d[2], d[3], d[4]*d[5])
61+
62+
# january
63+
if(m[i]==1){
64+
# nil <- seq(31, dim(fcsts)[4], by=31)
65+
fcsts <- fcsts[,,, -31]
66+
}
67+
68+
date_idx <- which(month(dates)==m[i])
69+
for(j in 1:11) {
70+
jj <- j+1
71+
# print(dim(field_dat[,, date_idx, jj]))
72+
# print(dim(fcsts[,,j,]))
73+
field_dat[,, date_idx, jj] <<- fcsts[lon_idx, lat_idx, j,]
74+
}
75+
76+
nc_close(ncin)
77+
rm(ncin)
78+
})
79+
rm(files)
80+
81+
date_idx <- (month(dates) %in% m)
82+
field_dat <- field_dat[,, date_idx,]
83+
save(field_dat, file="gefs_downscaled_fields.RData")
84+
85+

gefs_upscaled.R

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
## Produce FTE histograms for upscaled GEFS reanalysis data
3+
4+
library(ncdf4)
5+
library(lubridate)
6+
library(dplyr)
7+
library(reshape2)
8+
library(ggplot2)
9+
10+
11+
# Fcst + Analysis ---------------------------------------------------------
12+
13+
## NOTE: dates range from 2002010200 to 2015123000
14+
15+
## grab field and time data
16+
ncin <- nc_open('./data/refcstv2_precip_ccpav3_subset_066_to_072.nc')
17+
fcst_ens <- ncvar_get(ncin, 'apcp_fcst_ens')
18+
anal_upsc <- ncvar_get(ncin, 'apcp_anal_upsc')
19+
init_anal <- ncvar_get(ncin, 'yyyymmddhh_init')
20+
21+
## get indices of data in upscaled region
22+
lons_fcst <- ncvar_get(ncin, 'lons_fcst')
23+
lats_fcst <- ncvar_get(ncin, 'lats_fcst')
24+
lon_idx <- which(lons_fcst[,1] >= -91 & lons_fcst[,1] <= -81)
25+
lat_idx <- which(lats_fcst[1,] >= 30 & lats_fcst[1,] <= 40)
26+
27+
## create an array to store each analysis-ensemble pair dim(lon, lat, time, obs/ens_n)
28+
field_dat <- array(dim = c(length(lon_idx), length(lat_idx), length(init_anal), 12))
29+
30+
## populate and trim to same regions
31+
field_dat[,,,1] <- anal_upsc[lon_idx, lat_idx, ]
32+
field_dat[,,,2:12] <- fcst_ens[lon_idx, lat_idx, , ]
33+
34+
nc_close(ncin)
35+
rm(ncin, anal_upsc, fcst_ens, init_anal, lats_fcst, lons_fcst, lat_idx, lon_idx)
36+

0 commit comments

Comments
 (0)