@@ -4,117 +4,275 @@ import { createWriteStream } from 'fs'
44import { join } from 'path'
55import { findStation } from 'neaps'
66import db from '@neaps/tide-database'
7+ import createFetch from 'make-fetch-happen'
8+
9+ process . env . TZ = 'UTC'
710
811const __dirname = new URL ( '.' , import . meta. url ) . pathname
12+ const fetch = createFetch . defaults ( {
13+ cachePath : join ( __dirname , '.cache' ) ,
14+ cache : 'force-cache' ,
15+ retry : 10
16+ } )
917
1018const stations = db
11- . filter (
12- ( station ) =>
13- // TODO: Update this to test subordinate stations too.
14- // Need to switch from `getWaterLevelAtTime` to `getExtremesPrediction` and compare time/level.
15- station . type === 'reference' &&
16- station . source . source_url . includes ( 'noaa.gov' )
17- )
19+ . filter ( ( station ) => station . source . source_url . includes ( 'noaa.gov' ) )
1820 . map ( ( station ) => station . source . id )
19- . filter ( ( ) => Math . random ( ) < 0.1 )
2021
2122// Create a directory for test cache
22- await mkdir ( './. test-cache' , { recursive : true } )
23+ await mkdir ( '.test-cache' , { recursive : true } )
2324
2425interface Stat {
2526 station : string
26- count : number
27- mae : number
28- rmse : number
29- bias : number
27+ type : string
28+ start_utc : string
29+ end_utc : string
30+ events_noaa : number
31+ events_model : number
32+ matched : number
33+ missed : number
34+ extra : number
35+ med_abs_dt_min : number
36+ p95_abs_dt_min : number
37+ mean_dt_min : number
38+ mae_dh_m : number
39+ rmse_dh_m : number
40+ bias_dh_m : number
41+ p95_abs_dh_m : number
3042}
3143
3244const stats : Stat [ ] = [ ]
45+ const MATCH_WINDOW = 3 * 60 * 60 * 1000 // 3 hours
3346
3447console . log ( `Testing tide predictions against ${ stations . length } NOAA stations` )
3548
49+ type Extreme = {
50+ time : number
51+ level : number
52+ type : 'H' | 'L'
53+ }
54+
3655for ( const id of stations ) {
3756 const station = findStation ( id )
57+ const datum = station . defaultDatum ?? 'MTL'
3858
39- // Catch error and return no levels if failing to fetch data. There is a test later to ensure enough stations are tested.
40- const { levels } = await fetchNOAAdata ( station . source . id ) . catch ( ( ) => ( {
41- levels : { }
59+ const noaaEvents : Extreme [ ] | undefined = (
60+ await fetchNOAAdata ( id , datum )
61+ ) . predictions ?. map ( ( e : any ) => ( {
62+ time : new Date ( e . t + ' GMT' ) . getTime ( ) ,
63+ level : parseFloat ( e . v ) ,
64+ type : e . type
4265 } ) )
4366
4467 // No predictions available
45- if ( ! levels . predictions ) continue
68+ if ( ! noaaEvents ) continue
4669
47- let count = 0
48- let sumError = 0
49- let sumAbsError = 0
50- let sumSqError = 0
70+ // Get start/end dates to match NOAA data
71+ const start = new Date ( noaaEvents [ 0 ] . time - MATCH_WINDOW )
72+ const end = new Date ( noaaEvents [ noaaEvents . length - 1 ] . time + MATCH_WINDOW )
5173
52- levels . predictions . forEach ( ( prediction : { t : string ; v : string } ) => {
53- const expected = parseFloat ( prediction . v )
54-
55- const { level : actual } = station . getWaterLevelAtTime ( {
56- time : new Date ( prediction . t ) ,
57- datum : false
74+ const neapsEvents : Extreme [ ] = station
75+ . getExtremesPrediction ( {
76+ start,
77+ end
5878 } )
79+ . extremes . map ( ( e ) => ( {
80+ time : e . time . getTime ( ) ,
81+ level : e . level ,
82+ type : e . high ? 'H' : 'L'
83+ } ) )
5984
60- const error = actual - expected
85+ let matched = 0
86+ let missed = 0
87+ let extra = 0
6188
62- count += 1
63- sumError += error
64- sumAbsError += Math . abs ( error )
65- sumSqError += error * error
66- } )
89+ const dtMinutes : number [ ] = [ ]
90+ const dhMeters : number [ ] = [ ]
91+
92+ const noaa = Object . groupBy ( noaaEvents , ( e ) => e . type )
93+ const neaps = Object . groupBy ( neapsEvents , ( e ) => e . type )
94+
95+ const matchAndCollect = ( noaaList : Extreme [ ] , neapsList : Extreme [ ] ) => {
96+ let j = 0
97+
98+ for ( let i = 0 ; i < noaaList . length ; i ++ ) {
99+ const noaa = noaaList [ i ]
100+
101+ // Count model events that are too early to ever match this NOAA event as "extra"
102+ while (
103+ j < neapsList . length &&
104+ neapsList [ j ] . time < noaa . time - MATCH_WINDOW
105+ ) {
106+ extra += 1
107+ j += 1
108+ }
109+
110+ if ( j >= neapsList . length ) {
111+ missed += 1
112+ continue
113+ }
114+
115+ // Consider the closest of current or next model event
116+ let bestIndex = j
117+ let bestAbsDt = Math . abs ( neapsList [ j ] . time - noaa . time )
118+
119+ if ( j + 1 < neapsList . length ) {
120+ const nextAbsDt = Math . abs ( neapsList [ j + 1 ] . time - noaa . time )
121+ if ( nextAbsDt < bestAbsDt ) {
122+ bestIndex = j + 1
123+ bestAbsDt = nextAbsDt
124+ }
125+ }
126+
127+ // If closest is outside the matching window, treat NOAA event as missed
128+ if ( bestAbsDt > MATCH_WINDOW ) {
129+ missed += 1
130+ continue
131+ }
132+
133+ const match = neapsList [ bestIndex ]
134+
135+ // Advance pointer past the matched event (one-to-one matching)
136+ j = bestIndex + 1
137+
138+ matched += 1
139+
140+ const dtMin = ( match . time - noaa . time ) / 60000
141+ const dh = match . level - noaa . level
67142
68- const mae = sumAbsError / count
69- const rmse = Math . sqrt ( sumSqError / count )
70- const bias = sumError / count
143+ dtMinutes . push ( dtMin )
144+ dhMeters . push ( dh )
145+ }
71146
72- stats . push ( { station : station . source . id , count, mae, rmse, bias } )
147+ // Any remaining model events are "extra"
148+ extra += Math . max ( 0 , neapsList . length - j )
149+ }
150+
151+ matchAndCollect ( noaa . H ?? [ ] , neaps . H ?? [ ] )
152+ matchAndCollect ( noaa . L ?? [ ] , neaps . L ?? [ ] )
153+
154+ const events_noaa = noaa . H . length + noaa . L . length
155+ const events_model = neaps . H . length + neaps . L . length
156+
157+ // Timing metrics (minutes)
158+ const absDt = sort ( dtMinutes . map ( ( v ) => Math . abs ( v ) ) )
159+ const med_abs_dt_min = ntile ( absDt , 0.5 )
160+ const p95_abs_dt_min = ntile ( absDt , 0.95 )
161+ const mean_dt_min = mean ( dtMinutes )
162+
163+ // Height metrics (meters) at matched events
164+ const absDh = dhMeters . map ( ( v ) => Math . abs ( v ) )
165+ const mae_dh_m = mean ( absDh )
166+ const rmse_dh_m = Math . sqrt (
167+ dhMeters . reduce ( ( a , b ) => a + b * b , 0 ) / dhMeters . length
168+ )
169+ const bias_dh_m = mean ( dhMeters )
170+ const p95_abs_dh_m = ntile ( sort ( absDh ) , 0.95 )
171+
172+ stats . push ( {
173+ station : station . source . id ,
174+ type : station . type ,
175+ start_utc : start . toISOString ( ) ,
176+ end_utc : end . toISOString ( ) ,
177+ events_noaa,
178+ events_model,
179+ matched,
180+ missed,
181+ extra,
182+ med_abs_dt_min,
183+ p95_abs_dt_min,
184+ mean_dt_min,
185+ mae_dh_m,
186+ rmse_dh_m,
187+ bias_dh_m,
188+ p95_abs_dh_m
189+ } )
73190 process . stdout . write ( '.' )
74191}
75192
76193// Write stats to file for later analysis
77194const summary = createWriteStream ( join ( __dirname , 'noaa.csv' ) )
78- summary . write ( 'station,count,mae_m,rmse_m,bias_m\n' )
79- stats . forEach ( ( { station, count, mae, rmse, bias } ) => {
195+ summary . write (
196+ 'station,type,start_utc,end_utc,events_noaa,events_model,matched,missed,extra,med_abs_dt_min,p95_abs_dt_min,mean_dt_min,mae_dh_m,rmse_dh_m,bias_dh_m,p95_abs_dh_m\n'
197+ )
198+
199+ stats . forEach ( ( s ) => {
80200 summary . write (
81- [ station , count , mae . toFixed ( 4 ) , rmse . toFixed ( 4 ) , bias . toFixed ( 4 ) ] . join (
82- ','
83- ) + '\n'
201+ [
202+ s . station ,
203+ s . type ,
204+ s . start_utc ,
205+ s . end_utc ,
206+ s . events_noaa ,
207+ s . events_model ,
208+ s . matched ,
209+ s . missed ,
210+ s . extra ,
211+ s . med_abs_dt_min . toFixed ( 2 ) ,
212+ s . p95_abs_dt_min . toFixed ( 2 ) ,
213+ s . mean_dt_min . toFixed ( 2 ) ,
214+ s . mae_dh_m . toFixed ( 4 ) ,
215+ s . rmse_dh_m . toFixed ( 4 ) ,
216+ s . bias_dh_m . toFixed ( 4 ) ,
217+ s . p95_abs_dh_m . toFixed ( 4 )
218+ ] . join ( ',' ) + '\n'
84219 )
85220} )
86221summary . end ( )
87222
88223// Baseline expectations based on current performance. The goal should be to move these toward zero over time.
89- const maeValues = stats . map ( ( s ) => s . mae ) . sort ( ( a , b ) => a - b )
90- const p50MAE = maeValues [ Math . floor ( stats . length / 2 ) ]
91- const p90MAE = maeValues [ Math . floor ( stats . length * 0.9 ) ]
92- const p95MAE = maeValues [ Math . floor ( stats . length * 0.95 ) ]
224+ const maeValues = sort ( stats . map ( ( s ) => s . mae_dh_m ) )
225+ const p50MAE = ntile ( maeValues , 0.5 )
226+ const p90MAE = ntile ( maeValues , 0.9 )
227+ const p95MAE = ntile ( maeValues , 0.95 )
93228
94- console . log ( '\n' , { p50MAE, p90MAE, p95MAE } )
229+ const medAbsDtValues = sort ( stats . map ( ( s ) => s . med_abs_dt_min ) )
230+ const p95MedAbsDt = ntile ( medAbsDtValues , 0.95 )
231+
232+ console . log ( '\n' , { count : stats . length , p50MAE, p90MAE, p95MAE, p95MedAbsDt } )
95233
96234expect ( p50MAE , 'MAE p50' ) . toBeLessThan ( 0.03 ) // 3 cm
97235expect ( p90MAE , 'MAE p90' ) . toBeLessThan ( 0.06 ) // 6 cm
98236expect ( p95MAE , 'MAE p95' ) . toBeLessThan ( 0.08 ) // 8 cm
237+ expect ( p95MedAbsDt , 'Median |dt| p95 across stations' ) . toBeLessThan ( 21 )
99238
100- async function makeRequest ( url : string ) {
101- const res = await fetch ( url )
102- if ( ! res . ok ) throw new Error ( `Request failed with status ${ res . status } ` )
103- return res . json ( )
104- }
105-
106- async function fetchNOAAdata ( station : string ) {
107- const filePath = `./.test-cache/${ station } .json`
239+ async function fetchNOAAdata ( station : string , datum = 'MLLW' ) {
240+ const filePath = `./.test-cache/${ station } -${ datum } .json`
108241
109242 try {
110243 return await readFile ( filePath , 'utf-8' ) . then ( ( data ) => JSON . parse ( data ) )
111- } catch {
112- const levels = await makeRequest (
113- ` https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?date=recent&station= ${ station } &product=predictions&datum=MTL&time_zone=gmt&units=metric&format=json`
244+ } catch ( e ) {
245+ const url = new URL (
246+ ' https://api.tidesandcurrents.noaa.gov/api/prod/datagetter'
114247 )
248+ url . search = new URLSearchParams ( {
249+ datum,
250+ station,
251+ date : 'recent' ,
252+ // TOOD: switch to range to get more data points
253+ // range: (24 * 7).toString(), // 7 days
254+ product : 'predictions' ,
255+ time_zone : 'gmt' ,
256+ units : 'metric' ,
257+ format : 'json' ,
258+ interval : 'hilo'
259+ } ) . toString ( )
115260
116- const data = { levels }
261+ const res = await fetch ( url . toString ( ) )
262+ const data = await res . json ( )
117263 await writeFile ( filePath , JSON . stringify ( data ) )
118264 return data
119265 }
120266}
267+
268+ function sort ( data : number [ ] ) {
269+ return data . sort ( ( a , b ) => a - b )
270+ }
271+
272+ function ntile ( data : number [ ] , percent : number ) {
273+ return data [ Math . floor ( data . length * percent ) ] ?? NaN
274+ }
275+
276+ function mean ( data : number [ ] ) {
277+ return data . reduce ( ( a , b ) => a + b , 0 ) / data . length
278+ }
0 commit comments