Skip to content

Commit 027c119

Browse files
committed
Rework noaa benchmarks to include subordinate stations
1 parent f5168ba commit 027c119

2 files changed

Lines changed: 219 additions & 59 deletions

File tree

benchmarks/noaa.ts

Lines changed: 217 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,117 +4,275 @@ import { createWriteStream } from 'fs'
44
import { join } from 'path'
55
import { findStation } from 'neaps'
66
import db from '@neaps/tide-database'
7+
import createFetch from 'make-fetch-happen'
8+
9+
process.env.TZ = 'UTC'
710

811
const __dirname = new URL('.', import.meta.url).pathname
12+
const fetch = createFetch.defaults({
13+
cachePath: join(__dirname, '.cache'),
14+
cache: 'force-cache',
15+
retry: 10
16+
})
917

1018
const stations = db
11-
.filter(
12-
(station) =>
13-
// TODO: Update this to test subordinate stations too.
14-
// Need to switch from `getWaterLevelAtTime` to `getExtremesPrediction` and compare time/level.
15-
station.type === 'reference' &&
16-
station.source.source_url.includes('noaa.gov')
17-
)
19+
.filter((station) => station.source.source_url.includes('noaa.gov'))
1820
.map((station) => station.source.id)
19-
.filter(() => Math.random() < 0.1)
2021

2122
// Create a directory for test cache
22-
await mkdir('./.test-cache', { recursive: true })
23+
await mkdir('.test-cache', { recursive: true })
2324

2425
interface Stat {
2526
station: string
26-
count: number
27-
mae: number
28-
rmse: number
29-
bias: number
27+
type: string
28+
start_utc: string
29+
end_utc: string
30+
events_noaa: number
31+
events_model: number
32+
matched: number
33+
missed: number
34+
extra: number
35+
med_abs_dt_min: number
36+
p95_abs_dt_min: number
37+
mean_dt_min: number
38+
mae_dh_m: number
39+
rmse_dh_m: number
40+
bias_dh_m: number
41+
p95_abs_dh_m: number
3042
}
3143

3244
const stats: Stat[] = []
45+
const MATCH_WINDOW = 3 * 60 * 60 * 1000 // 3 hours
3346

3447
console.log(`Testing tide predictions against ${stations.length} NOAA stations`)
3548

49+
type Extreme = {
50+
time: number
51+
level: number
52+
type: 'H' | 'L'
53+
}
54+
3655
for (const id of stations) {
3756
const station = findStation(id)
57+
const datum = station.defaultDatum ?? 'MTL'
3858

39-
// Catch error and return no levels if failing to fetch data. There is a test later to ensure enough stations are tested.
40-
const { levels } = await fetchNOAAdata(station.source.id).catch(() => ({
41-
levels: {}
59+
const noaaEvents: Extreme[] | undefined = (
60+
await fetchNOAAdata(id, datum)
61+
).predictions?.map((e: any) => ({
62+
time: new Date(e.t + ' GMT').getTime(),
63+
level: parseFloat(e.v),
64+
type: e.type
4265
}))
4366

4467
// No predictions available
45-
if (!levels.predictions) continue
68+
if (!noaaEvents) continue
4669

47-
let count = 0
48-
let sumError = 0
49-
let sumAbsError = 0
50-
let sumSqError = 0
70+
// Get start/end dates to match NOAA data
71+
const start = new Date(noaaEvents[0].time - MATCH_WINDOW)
72+
const end = new Date(noaaEvents[noaaEvents.length - 1].time + MATCH_WINDOW)
5173

52-
levels.predictions.forEach((prediction: { t: string; v: string }) => {
53-
const expected = parseFloat(prediction.v)
54-
55-
const { level: actual } = station.getWaterLevelAtTime({
56-
time: new Date(prediction.t),
57-
datum: false
74+
const neapsEvents: Extreme[] = station
75+
.getExtremesPrediction({
76+
start,
77+
end
5878
})
79+
.extremes.map((e) => ({
80+
time: e.time.getTime(),
81+
level: e.level,
82+
type: e.high ? 'H' : 'L'
83+
}))
5984

60-
const error = actual - expected
85+
let matched = 0
86+
let missed = 0
87+
let extra = 0
6188

62-
count += 1
63-
sumError += error
64-
sumAbsError += Math.abs(error)
65-
sumSqError += error * error
66-
})
89+
const dtMinutes: number[] = []
90+
const dhMeters: number[] = []
91+
92+
const noaa = Object.groupBy(noaaEvents, (e) => e.type)
93+
const neaps = Object.groupBy(neapsEvents, (e) => e.type)
94+
95+
const matchAndCollect = (noaaList: Extreme[], neapsList: Extreme[]) => {
96+
let j = 0
97+
98+
for (let i = 0; i < noaaList.length; i++) {
99+
const noaa = noaaList[i]
100+
101+
// Count model events that are too early to ever match this NOAA event as "extra"
102+
while (
103+
j < neapsList.length &&
104+
neapsList[j].time < noaa.time - MATCH_WINDOW
105+
) {
106+
extra += 1
107+
j += 1
108+
}
109+
110+
if (j >= neapsList.length) {
111+
missed += 1
112+
continue
113+
}
114+
115+
// Consider the closest of current or next model event
116+
let bestIndex = j
117+
let bestAbsDt = Math.abs(neapsList[j].time - noaa.time)
118+
119+
if (j + 1 < neapsList.length) {
120+
const nextAbsDt = Math.abs(neapsList[j + 1].time - noaa.time)
121+
if (nextAbsDt < bestAbsDt) {
122+
bestIndex = j + 1
123+
bestAbsDt = nextAbsDt
124+
}
125+
}
126+
127+
// If closest is outside the matching window, treat NOAA event as missed
128+
if (bestAbsDt > MATCH_WINDOW) {
129+
missed += 1
130+
continue
131+
}
132+
133+
const match = neapsList[bestIndex]
134+
135+
// Advance pointer past the matched event (one-to-one matching)
136+
j = bestIndex + 1
137+
138+
matched += 1
139+
140+
const dtMin = (match.time - noaa.time) / 60000
141+
const dh = match.level - noaa.level
67142

68-
const mae = sumAbsError / count
69-
const rmse = Math.sqrt(sumSqError / count)
70-
const bias = sumError / count
143+
dtMinutes.push(dtMin)
144+
dhMeters.push(dh)
145+
}
71146

72-
stats.push({ station: station.source.id, count, mae, rmse, bias })
147+
// Any remaining model events are "extra"
148+
extra += Math.max(0, neapsList.length - j)
149+
}
150+
151+
matchAndCollect(noaa.H ?? [], neaps.H ?? [])
152+
matchAndCollect(noaa.L ?? [], neaps.L ?? [])
153+
154+
const events_noaa = noaa.H.length + noaa.L.length
155+
const events_model = neaps.H.length + neaps.L.length
156+
157+
// Timing metrics (minutes)
158+
const absDt = sort(dtMinutes.map((v) => Math.abs(v)))
159+
const med_abs_dt_min = ntile(absDt, 0.5)
160+
const p95_abs_dt_min = ntile(absDt, 0.95)
161+
const mean_dt_min = mean(dtMinutes)
162+
163+
// Height metrics (meters) at matched events
164+
const absDh = dhMeters.map((v) => Math.abs(v))
165+
const mae_dh_m = mean(absDh)
166+
const rmse_dh_m = Math.sqrt(
167+
dhMeters.reduce((a, b) => a + b * b, 0) / dhMeters.length
168+
)
169+
const bias_dh_m = mean(dhMeters)
170+
const p95_abs_dh_m = ntile(sort(absDh), 0.95)
171+
172+
stats.push({
173+
station: station.source.id,
174+
type: station.type,
175+
start_utc: start.toISOString(),
176+
end_utc: end.toISOString(),
177+
events_noaa,
178+
events_model,
179+
matched,
180+
missed,
181+
extra,
182+
med_abs_dt_min,
183+
p95_abs_dt_min,
184+
mean_dt_min,
185+
mae_dh_m,
186+
rmse_dh_m,
187+
bias_dh_m,
188+
p95_abs_dh_m
189+
})
73190
process.stdout.write('.')
74191
}
75192

76193
// Write stats to file for later analysis
77194
const summary = createWriteStream(join(__dirname, 'noaa.csv'))
78-
summary.write('station,count,mae_m,rmse_m,bias_m\n')
79-
stats.forEach(({ station, count, mae, rmse, bias }) => {
195+
summary.write(
196+
'station,type,start_utc,end_utc,events_noaa,events_model,matched,missed,extra,med_abs_dt_min,p95_abs_dt_min,mean_dt_min,mae_dh_m,rmse_dh_m,bias_dh_m,p95_abs_dh_m\n'
197+
)
198+
199+
stats.forEach((s) => {
80200
summary.write(
81-
[station, count, mae.toFixed(4), rmse.toFixed(4), bias.toFixed(4)].join(
82-
','
83-
) + '\n'
201+
[
202+
s.station,
203+
s.type,
204+
s.start_utc,
205+
s.end_utc,
206+
s.events_noaa,
207+
s.events_model,
208+
s.matched,
209+
s.missed,
210+
s.extra,
211+
s.med_abs_dt_min.toFixed(2),
212+
s.p95_abs_dt_min.toFixed(2),
213+
s.mean_dt_min.toFixed(2),
214+
s.mae_dh_m.toFixed(4),
215+
s.rmse_dh_m.toFixed(4),
216+
s.bias_dh_m.toFixed(4),
217+
s.p95_abs_dh_m.toFixed(4)
218+
].join(',') + '\n'
84219
)
85220
})
86221
summary.end()
87222

88223
// Baseline expectations based on current performance. The goal should be to move these toward zero over time.
89-
const maeValues = stats.map((s) => s.mae).sort((a, b) => a - b)
90-
const p50MAE = maeValues[Math.floor(stats.length / 2)]
91-
const p90MAE = maeValues[Math.floor(stats.length * 0.9)]
92-
const p95MAE = maeValues[Math.floor(stats.length * 0.95)]
224+
const maeValues = sort(stats.map((s) => s.mae_dh_m))
225+
const p50MAE = ntile(maeValues, 0.5)
226+
const p90MAE = ntile(maeValues, 0.9)
227+
const p95MAE = ntile(maeValues, 0.95)
93228

94-
console.log('\n', { p50MAE, p90MAE, p95MAE })
229+
const medAbsDtValues = sort(stats.map((s) => s.med_abs_dt_min))
230+
const p95MedAbsDt = ntile(medAbsDtValues, 0.95)
231+
232+
console.log('\n', { count: stats.length, p50MAE, p90MAE, p95MAE, p95MedAbsDt })
95233

96234
expect(p50MAE, 'MAE p50').toBeLessThan(0.03) // 3 cm
97235
expect(p90MAE, 'MAE p90').toBeLessThan(0.06) // 6 cm
98236
expect(p95MAE, 'MAE p95').toBeLessThan(0.08) // 8 cm
237+
expect(p95MedAbsDt, 'Median |dt| p95 across stations').toBeLessThan(21)
99238

100-
async function makeRequest(url: string) {
101-
const res = await fetch(url)
102-
if (!res.ok) throw new Error(`Request failed with status ${res.status}`)
103-
return res.json()
104-
}
105-
106-
async function fetchNOAAdata(station: string) {
107-
const filePath = `./.test-cache/${station}.json`
239+
async function fetchNOAAdata(station: string, datum = 'MLLW') {
240+
const filePath = `./.test-cache/${station}-${datum}.json`
108241

109242
try {
110243
return await readFile(filePath, 'utf-8').then((data) => JSON.parse(data))
111-
} catch {
112-
const levels = await makeRequest(
113-
`https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?date=recent&station=${station}&product=predictions&datum=MTL&time_zone=gmt&units=metric&format=json`
244+
} catch (e) {
245+
const url = new URL(
246+
'https://api.tidesandcurrents.noaa.gov/api/prod/datagetter'
114247
)
248+
url.search = new URLSearchParams({
249+
datum,
250+
station,
251+
date: 'recent',
252+
// TOOD: switch to range to get more data points
253+
// range: (24 * 7).toString(), // 7 days
254+
product: 'predictions',
255+
time_zone: 'gmt',
256+
units: 'metric',
257+
format: 'json',
258+
interval: 'hilo'
259+
}).toString()
115260

116-
const data = { levels }
261+
const res = await fetch(url.toString())
262+
const data = await res.json()
117263
await writeFile(filePath, JSON.stringify(data))
118264
return data
119265
}
120266
}
267+
268+
function sort(data: number[]) {
269+
return data.sort((a, b) => a - b)
270+
}
271+
272+
function ntile(data: number[], percent: number) {
273+
return data[Math.floor(data.length * percent)] ?? NaN
274+
}
275+
276+
function mean(data: number[]) {
277+
return data.reduce((a, b) => a + b, 0) / data.length
278+
}

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
},
1212
"devDependencies": {
1313
"@eslint/js": "^9.39.2",
14+
"@types/make-fetch-happen": "^10.0.4",
1415
"@types/node": "^25.0.2",
1516
"@vitest/coverage-v8": "^4.0.15",
1617
"eslint": "^9.39.2",
1718
"eslint-config-prettier": "^10.1.8",
19+
"make-fetch-happen": "^15.0.3",
1820
"npm-run-all": "^4.1.5",
1921
"prettier": "^3.7.4",
2022
"tsdown": "^0.18.1",

0 commit comments

Comments
 (0)