Skip to content

Commit 4cd3cec

Browse files
committed
matching: normalize route_short_name by parsing FahrtBezeichner [todo]
1 parent 3c9bc3a commit 4cd3cec

File tree

5 files changed

+74
-20
lines changed

5 files changed

+74
-20
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import last from 'lodash/last.js'
2+
3+
// Normalizes a string identifying a source of realtime data, extracted from a {Ist,Soll}Fahrt.FahrtID.FahrtBezeichner, so that it can be used in metric labels, logs, etc.
4+
// Note: The results are used for two purposes:
5+
// - In the matching process, it is used to determine the route_short_name of a VDV Fahrt.
6+
// - As a "data source" label in some metrics.
7+
// Prevents too many metric label combinations, which would cause many Prometheus time series to be created. (https://prometheus.io/docs/practices/naming/#labels)
8+
const extractDataSourceFromFahrtBezeichner = (fahrtBezeichner) => {
9+
// Note: There might be FahrtBezeichner values like `75861#DLr-D#ODEG`.
10+
const _parts = (fahrtBezeichner ?? '').split('#')
11+
const src = _parts.length > 1 && last(_parts).trim() || null
12+
if (src === null || src === '!ADD!') {
13+
return 'unknown'
14+
}
15+
if (src.toLowerCase() === 'nahverkehrsgesellschaft jerichower land') {
16+
return 'NJL'
17+
}
18+
if (src.length > 4) {
19+
// We assume this case to be rather rare. In case a realtime data source is added or changed to such a long identifier, we'll detect that and adapt the mapping here.
20+
return '-too-long-'
21+
}
22+
return src
23+
}
24+
25+
export {
26+
extractDataSourceFromFahrtBezeichner,
27+
}

lib/gtfs-matching.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ import {
1111
connectToNats,
1212
JSONCodec,
1313
} from './nats.js'
14+
import {
15+
extractDataSourceFromFahrtBezeichner as defaultExtractDataSourceFromFahrtBezeichner,
16+
} from './extract-data-srv-from-vdv-fahrtbezeichner.js'
1417
import {
1518
createMatchWithGtfs,
1619
} from './raw-match.js'
@@ -42,6 +45,7 @@ const runGtfsMatching = async (cfg, opt = {}) => {
4245
natsConsumerName,
4346
natsAckWait, // in milliseconds
4447
matchConcurrency,
48+
extractDataSourceFromFahrtBezeichner,
4549
publishUnmatchedTripUpdates,
4650
} = {
4751
natsConsumerName: process.env.MATCHING_CONSUMER_NAME
@@ -59,6 +63,7 @@ const runGtfsMatching = async (cfg, opt = {}) => {
5963
// SELECT num_cores FROM cpu_cores LIMIT 1
6064
// same as with hafas-gtfs-rt-feed: https://github.com/derhuerst/hafas-gtfs-rt-feed/blob/8.2.6/lib/match.js#L54-L61
6165
: Math.ceil(1 + osCpus().length * 1.2),
66+
extractDataSourceFromFahrtBezeichner: defaultExtractDataSourceFromFahrtBezeichner,
6267
publishUnmatchedTripUpdates: process.env.MATCHING_PUBLISH_UNMATCHED_TRIPUPDATES
6368
? process.env.MATCHING_PUBLISH_UNMATCHED_TRIPUPDATES === 'true'
6469
: false,
@@ -123,6 +128,7 @@ const runGtfsMatching = async (cfg, opt = {}) => {
123128
stop: stopMatching,
124129
} = await createMatchWithGtfs({
125130
logger,
131+
extractDataSourceFromFahrtBezeichner,
126132
})
127133

128134
const publishGtfsRtTripUpdateToNats = (gtfsRtTripUpdate, logCtx) => {

lib/raw-match.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@ const formattingLogger = createLogger('formatting', {
1414
const createMatchWithGtfs = async (cfg) => {
1515
const {
1616
logger,
17+
extractDataSourceFromFahrtBezeichner,
1718
} = cfg
1819

1920
const {
2021
formatVdvAusIstFahrtAsGtfsRtTripUpdate,
2122
} = await createFormatVdvAusIstFahrtAsGtfsRtTripUpdate({
2223
logger: formattingLogger,
24+
extractDataSourceFromFahrtBezeichner,
2325
})
2426
const {
2527
matchGtfsRtTripUpdateWithScheduleStopTimes,

lib/vdv-aus-istfahrt-as-gtfs-rt-tripupdate.js

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import {ok, deepStrictEqual} from 'node:assert/strict'
22
import { strictEqual } from 'node:assert'
3+
import {OPERATORS} from './operators.js'
34
import {
45
unixTimestampFromIso8601,
56
} from './util.js'
@@ -23,6 +24,35 @@ const stripDataProviderPrefixFromAusHaltID = (ausHaltId) => {
2324
: ausHaltId
2425
}
2526

27+
// VBBr trips have a dual naming scheme, with both a letter and a line number, e.g.
28+
// - "5/522" from Fontanestr. to Wilhelmsdorf/Malge
29+
// AUS just uses the letter, while the GTFS uses both.
30+
const vbbrRouteShortNames = new Map([
31+
['B', 'B/522'],
32+
])
33+
const getRouteShortName = (cfg) => {
34+
const {
35+
logger,
36+
istFahrt,
37+
extractDataSourceFromFahrtBezeichner,
38+
} = cfg
39+
40+
if (!istFahrt.LinienText) {
41+
logger.warn({
42+
istFahrt,
43+
}, 'cannot compute route_short_name without IstFahrt.LinienText')
44+
return null
45+
}
46+
const linienText = istFahrt.LinienText.trim()
47+
48+
const src = extractDataSourceFromFahrtBezeichner(istFahrt.FahrtID.FahrtBezeichner)
49+
if (src === 'VBBr' && vbbrRouteShortNames.has(linienText)) {
50+
return vbbrRouteShortNames.get(linienText)
51+
}
52+
53+
return linienText
54+
}
55+
2656
const formatIstHaltAsStopTimeEvent = (istHalt, abfahrtAnkunft) => {
2757
const plannedIso = istHalt[`${abfahrtAnkunft}szeit`] || null
2858
if (plannedIso === null) {
@@ -108,6 +138,7 @@ const formatVdvAusIstHaltAsGtfsRtStopTimeUpdate = (istHalt) => {
108138
const createFormatVdvAusIstFahrtAsGtfsRtTripUpdate = async (cfg) => {
109139
const {
110140
logger,
141+
extractDataSourceFromFahrtBezeichner,
111142
} = cfg
112143

113144
const formatVdvAusIstFahrtAsGtfsRtTripUpdate = (istFahrt) => {
@@ -202,7 +233,10 @@ const createFormatVdvAusIstFahrtAsGtfsRtTripUpdate = async (cfg) => {
202233
}
203234

204235
// not part of the GTFS Realtime spec, we just use it for matching and/or debug-logging
205-
const route_short_name = istFahrt.LinienText || null
236+
const route_short_name = getRouteShortName({
237+
istFahrt,
238+
extractDataSourceFromFahrtBezeichner,
239+
})
206240
Object.defineProperty(tripUpdate.trip, kRouteShortName, {value: route_short_name})
207241
Object.defineProperty(tripUpdate, kFahrtID, {value: istFahrt.FahrtID ?? null})
208242
Object.defineProperty(tripUpdate, kUmlaufID, {value: istFahrt.UmlaufID ?? null})

lib/vdv-reconciliation.js

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import {
33
Counter,
44
Summary,
55
} from 'prom-client'
6-
import last from 'lodash/last.js'
76
import Redlock from 'redlock'
87
import {createLogger} from './logger.js'
98
import {register} from './metrics.js'
@@ -13,6 +12,9 @@ import {
1312
connectToNats,
1413
JSONCodec,
1514
} from './nats.js'
15+
import {
16+
extractDataSourceFromFahrtBezeichner as defaultExtractDataSourceFromFahrtBezeichner,
17+
} from './extract-data-srv-from-vdv-fahrtbezeichner.js'
1618
import {connectToRedis} from './redis.js'
1719
import {
1820
computeVdvFahrtId,
@@ -58,24 +60,7 @@ const runVdvReconciliation = async (cfg, opt = {}) => {
5860
reconciliationConcurrency: process.env.RECONCILIATION_CONCURRENCY
5961
? parseInt(process.env.RECONCILIATION_CONCURRENCY)
6062
: 30,
61-
// Normalizes a string identifying a source of realtime data, extracted from a {Ist,Soll}Fahrt.FahrtID.FahrtBezeichner, so that it can be used in metric labels, logs, etc.
62-
// Prevents too many metric label combinations, which would cause many Prometheus time series to be created. (https://prometheus.io/docs/practices/naming/#labels)
63-
extractDataSourceFromFahrtBezeichner: (fahrtBezeichner) => {
64-
// note: There might be FahrtBezeichner values like `75861#DLr-D#ODEG`.
65-
const _parts = (fahrtBezeichner ?? '').split('#')
66-
const src = _parts.length > 1 && last(_parts).trim() || null
67-
if (src === null || src === '!ADD!') {
68-
return 'unknown'
69-
}
70-
if (src.toLowerCase() === 'nahverkehrsgesellschaft jerichower land') {
71-
return 'NJL'
72-
}
73-
if (src.length > 4) {
74-
// We assume this case to be rather rare. In case a realtime data source is added or changed to such a long identifier, we'll detect that and adapt the mapping here.
75-
return '-too-long-'
76-
}
77-
return src
78-
},
63+
extractDataSourceFromFahrtBezeichner: defaultExtractDataSourceFromFahrtBezeichner,
7964
...opt,
8065
}
8166
ok(Number.isInteger(natsAckWait), 'opt.natsAckWait must be an integer')

0 commit comments

Comments
 (0)