Skip to content

Commit ca0763b

Browse files
Improve GTFS chapter
1 parent b10b2b0 commit ca0763b

File tree

1 file changed

+44
-59
lines changed

1 file changed

+44
-59
lines changed

docs/GTFS.xml

+44-59
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@
2525
<para><varname>calendar_dates.txt</varname> define exceptions to the default service patterns defined in <varname>calendar.txt</varname>. There are two types of exceptions: 1 means that the service has been added for the specified date, and 2 means that the service has been removed for the specified date.</para>
2626
</listitem>
2727

28-
<listitem>
29-
<para><varname>route_types.txt</varname> contains transportation types used on routes, such as bus, metro, tramway, etc.</para>
30-
</listitem>
31-
3228
<listitem>
3329
<para><varname>routes.txt</varname> contains transit routes. A route is a group of trips that are displayed to riders as a single service.</para>
3430
</listitem>
@@ -85,7 +81,6 @@ CREATE TABLE calendar (
8581
end_date date NOT NULL,
8682
CONSTRAINT calendar_pkey PRIMARY KEY (service_id)
8783
);
88-
CREATE INDEX calendar_service_id ON calendar (service_id);
8984

9085
CREATE TABLE exception_types (
9186
exception_type int PRIMARY KEY,
@@ -97,19 +92,14 @@ CREATE TABLE calendar_dates (
9792
date date NOT NULL,
9893
exception_type int REFERENCES exception_types(exception_type)
9994
);
100-
CREATE INDEX calendar_dates_dateidx ON calendar_dates (date);
101-
102-
CREATE TABLE route_types (
103-
route_type int PRIMARY KEY,
104-
description text
105-
);
95+
CREATE INDEX calendar_dates_date_idx ON calendar_dates (date);
10696

10797
CREATE TABLE routes (
10898
route_id text,
10999
route_short_name text DEFAULT '',
110100
route_long_name text DEFAULT '',
111101
route_desc text DEFAULT '',
112-
route_type int REFERENCES route_types(route_type),
102+
route_type int,
113103
route_url text,
114104
route_color text,
115105
route_text_color text,
@@ -120,17 +110,17 @@ CREATE TABLE shapes (
120110
shape_id text NOT NULL,
121111
shape_pt_lat double precision NOT NULL,
122112
shape_pt_lon double precision NOT NULL,
123-
shape_pt_sequence int NOT NULL
113+
shape_pt_sequence int NOT NULL,
114+
shape_dist_traveled float NOT NULL
124115
);
125-
CREATE INDEX shapes_shape_key ON shapes (shape_id);
116+
CREATE INDEX shapes_shape_id_idx ON shapes (shape_id);
126117

127118
-- Create a table to store the shape geometries
128119
CREATE TABLE shape_geoms (
129120
shape_id text NOT NULL,
130-
shape_geom geometry('LINESTRING', 4326),
121+
shape_geom geometry('LINESTRING', 3857),
131122
CONSTRAINT shape_geom_pkey PRIMARY KEY (shape_id)
132123
);
133-
CREATE INDEX shape_geoms_key ON shapes (shape_id);
134124

135125
CREATE TABLE location_types (
136126
location_type int PRIMARY KEY,
@@ -146,9 +136,9 @@ CREATE TABLE stops (
146136
stop_lon double precision,
147137
zone_id text,
148138
stop_url text,
149-
location_type integer REFERENCES location_types(location_type),
139+
location_type integer REFERENCES location_types(location_type),
150140
parent_station integer,
151-
stop_geom geometry('POINT', 4326),
141+
stop_geom geometry('POINT', 3857),
152142
platform_code text DEFAULT NULL,
153143
CONSTRAINT stops_pkey PRIMARY KEY (stop_id)
154144
);
@@ -160,7 +150,7 @@ CREATE TABLE pickup_dropoff_types (
160150

161151
CREATE TABLE stop_times (
162152
trip_id text NOT NULL,
163-
-- Check that casting to time interval works.
153+
-- Check that casting to time interval works
164154
arrival_time interval CHECK (arrival_time::interval = arrival_time::interval),
165155
departure_time interval CHECK (departure_time::interval = departure_time::interval),
166156
stop_id text,
@@ -183,7 +173,6 @@ CREATE TABLE trips (
183173
shape_id text,
184174
CONSTRAINT trips_pkey PRIMARY KEY (trip_id)
185175
);
186-
CREATE INDEX trips_trip_id ON trips (trip_id);
187176

188177
INSERT INTO exception_types (exception_type, description) VALUES
189178
(1, 'service has been added'),
@@ -218,8 +207,6 @@ COPY trips(route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape
218207
FROM '/home/gtfs_tutorial/trips.txt' DELIMITER ',' CSV HEADER;
219208
COPY agency(agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone)
220209
FROM '/home/gtfs_tutorial/agency.txt' DELIMITER ',' CSV HEADER;
221-
COPY route_types(route_type,description)
222-
FROM '/home/gtfs_tutorial/route_types.txt' DELIMITER ',' CSV HEADER;
223210
COPY routes(route_id,route_short_name,route_long_name,route_desc,route_type,route_url,
224211
route_color,route_text_color) FROM '/home/gtfs_tutorial/routes.txt' DELIMITER ','
225212
CSV HEADER;
@@ -233,12 +220,12 @@ CSV HEADER;
233220
<programlisting language="sql">
234221
INSERT INTO shape_geoms
235222
SELECT shape_id, ST_MakeLine(array_agg(
236-
ST_SetSRID(ST_MakePoint(shape_pt_lon, shape_pt_lat),4326) ORDER BY shape_pt_sequence))
223+
ST_Transform(ST_Point(shape_pt_lon, shape_pt_lat, 4326), 3857) ORDER BY shape_pt_sequence))
237224
FROM shapes
238225
GROUP BY shape_id;
239226

240227
UPDATE stops
241-
SET stop_geom = ST_SetSRID(ST_MakePoint(stop_lon, stop_lat),4326);
228+
SET stop_geom = ST_Transform(ST_Point(stop_lon, stop_lat, 4326), 3857);
242229
</programlisting>
243230
The visualization of the routes and stops in QGIS is given in <xref linkend="stib" />. In the figure, red lines correspond to the trajectories of vehicles, while orange points correspond to the location of stops.
244231
</para>
@@ -253,21 +240,21 @@ SET stop_geom = ST_SetSRID(ST_MakePoint(stop_lon, stop_lat),4326);
253240
<sect1>
254241
<title>Transforming GTFS Data for MobilityDB</title>
255242
<para>
256-
We start by creating a table that contains couples of <varname>service_id</varname> and <varname>date</varname> defining the dates at which a service is provided.
243+
We start by creating a table that contains couples of <varname>service_id</varname> and <varname>date</varname> defining the dates at which a service is provided.
257244
<programlisting language="sql">
258245
DROP TABLE IF EXISTS service_dates;
259246
CREATE TABLE service_dates AS (
260247
SELECT service_id, date_trunc('day', d)::date AS date
261248
FROM calendar c, generate_series(start_date, end_date, '1 day'::interval) AS d
262249
WHERE (
263-
(monday = 1 AND extract(isodow FROM d) = 1) OR
264-
(tuesday = 1 AND extract(isodow FROM d) = 2) OR
265-
(wednesday = 1 AND extract(isodow FROM d) = 3) OR
266-
(thursday = 1 AND extract(isodow FROM d) = 4) OR
267-
(friday = 1 AND extract(isodow FROM d) = 5) OR
268-
(saturday = 1 AND extract(isodow FROM d) = 6) OR
269-
(sunday = 1 AND extract(isodow FROM d) = 7)
270-
)
250+
(monday = 1 AND extract(isodow FROM d) = 1) OR
251+
(tuesday = 1 AND extract(isodow FROM d) = 2) OR
252+
(wednesday = 1 AND extract(isodow FROM d) = 3) OR
253+
(thursday = 1 AND extract(isodow FROM d) = 4) OR
254+
(friday = 1 AND extract(isodow FROM d) = 5) OR
255+
(saturday = 1 AND extract(isodow FROM d) = 6) OR
256+
(sunday = 1 AND extract(isodow FROM d) = 7)
257+
)
271258
EXCEPT
272259
SELECT service_id, date
273260
FROM calendar_dates WHERE exception_type = 2
@@ -304,8 +291,8 @@ FROM trips t JOIN stop_times s ON t.trip_id = s.trip_id;
304291

305292
UPDATE trip_stops t
306293
SET perc = CASE
307-
WHEN stop_sequence = 1 then 0.0
308-
WHEN stop_sequence = no_stops then 1.0
294+
WHEN stop_sequence = 1 THEN 0.0
295+
WHEN stop_sequence = no_stops THEN 1.0
309296
ELSE ST_LineLocatePoint(g.shape_geom, s.stop_geom)
310297
END
311298
FROM shape_geoms g, stops s
@@ -359,7 +346,7 @@ SET seg_length = ST_Length(seg_geom), no_points = ST_NumPoints(seg_geom);
359346
</para>
360347

361348
<para>
362-
The geometry of a segment is a linestring containing multiple points. From the previous table we know at which time the trip arrived at the first point and at the last point of the segment. To determine at which time the trip arrived at the intermediate points of the segments, we create a table <varname>trip_points</varname> that contains all the points composing the geometry of a segment.
349+
The geometry of a segment is a linestring containing multiple points. From table <varname>trip_stops</varname> we know at which time the trip arrived at the first point and at the last point of the segment. To determine at which time the trip arrived at the intermediate points of the segments, we create a table <varname>trip_points</varname> that contains all the points composing the geometry of a segment.
363350
<programlisting language="sql">
364351
DROP TABLE IF EXISTS trip_points;
365352
CREATE TABLE trip_points (
@@ -377,29 +364,26 @@ INSERT INTO trip_points (trip_id, route_id, service_id, stop1_sequence,
377364
point_sequence, point_geom, point_arrival_time)
378365
WITH temp1 AS (
379366
SELECT trip_id, route_id, service_id, stop1_sequence, stop2_sequence,
380-
no_stops, stop1_arrival_time, stop2_arrival_time, seg_length,
381-
(dp).path[1] AS point_sequence, no_points, (dp).geom as point_geom
382-
FROM trip_segs, ST_DumpPoints(seg_geom) AS dp
383-
),
367+
no_stops, stop1_arrival_time, stop2_arrival_time, seg_length,
368+
(dp).path[1] AS point_sequence, no_points, (dp).geom as point_geom
369+
FROM trip_segs, ST_DumpPoints(seg_geom) AS dp ),
384370
temp2 AS (
385-
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
386-
stop2_arrival_time, seg_length, point_sequence, no_points, point_geom
387-
FROM temp1
388-
WHERE point_sequence &lt;&gt; no_points OR stop2_sequence = no_stops
389-
),
371+
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
372+
stop2_arrival_time, seg_length, point_sequence, no_points, point_geom
373+
FROM temp1
374+
WHERE point_sequence != no_points OR stop2_sequence = no_stops ),
390375
temp3 AS (
391-
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
392-
stop2_arrival_time, point_sequence, no_points, point_geom,
393-
ST_Length(ST_MakeLine(array_agg(point_geom) OVER w)) / seg_length AS perc
394-
FROM temp2 WINDOW w AS (PARTITION BY trip_id, service_id, stop1_sequence
395-
ORDER BY point_sequence)
396-
)
376+
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
377+
stop2_arrival_time, point_sequence, no_points, point_geom,
378+
ST_Length(ST_MakeLine(array_agg(point_geom) OVER w)) / seg_length AS perc
379+
FROM temp2 WINDOW w AS (PARTITION BY trip_id, service_id, stop1_sequence
380+
ORDER BY point_sequence) )
397381
SELECT trip_id, route_id, service_id, stop1_sequence, point_sequence, point_geom,
398-
CASE
399-
WHEN point_sequence = 1 then stop1_arrival_time
400-
WHEN point_sequence = no_points then stop2_arrival_time
401-
ELSE stop1_arrival_time + ((stop2_arrival_time - stop1_arrival_time) * perc)
402-
END AS point_arrival_time
382+
CASE
383+
WHEN point_sequence = 1 THEN stop1_arrival_time
384+
WHEN point_sequence = no_points THEN stop2_arrival_time
385+
ELSE stop1_arrival_time + ((stop2_arrival_time - stop1_arrival_time) * perc)
386+
END AS point_arrival_time
403387
FROM temp3;
404388
</programlisting>
405389
In the temporary table <varname>temp1</varname> we use the function <varname>ST_DumpPoints</varname> to obtain the points composing the geometry of a segment. Nevertheless, this table contains duplicate points, that is, the last point of a segment is equal to the first point of the next one. In the temporary table <varname>temp2</varname> we filter out the last point of a segment unless it is the last segment of the trip. In the temporary table <varname>temp3</varname> we compute in the attribute <varname>perc</varname> the relative position of a point within a trip segment with window functions. For this we use the function <varname>ST_MakeLine</varname> to construct the subsegment from the first point of the segment to the current one, determine the length of the subsegment with function <varname>ST_Length</varname> and divide this length by the overall segment length. Finally, in the outer query we use the computed percentage to determine the arrival time to that point.
@@ -441,14 +425,15 @@ CREATE TABLE trips_mdb (
441425
);
442426

443427
INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip)
444-
SELECT trip_id, service_id, route_id, date, tgeompoint_seq(array_agg(tgeompoint_inst(point_geom, t) ORDER BY T))
428+
SELECT trip_id, service_id, route_id, date, tgeompointSeq(array_agg(
429+
tgeompoint(point_geom, t) ORDER BY T))
445430
FROM trips_input
446431
GROUP BY trip_id, service_id, route_id, date;
447432

448433
INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip)
449434
SELECT trip_id, route_id, t.service_id, d.date,
450-
shift(trip, make_interval(days => d.date - t.date))
451-
FROM trips_mdb t JOIN service_dates d ON t.service_id = d.service_id AND t.date &lt;&gt; d.date;
435+
shiftTime(trip, make_interval(days => d.date - t.date))
436+
FROM trips_mdb t JOIN service_dates d ON t.service_id = d.service_id AND t.date != d.date;
452437
</programlisting>
453438
In the first <varname>INSERT</varname> statement we group the rows in the <varname>trips_input</varname> table by <varname>trip_id</varname> and <varname>date</varname> while keeping the <varname>route_id</varname> atribute, use the <varname>array_agg</varname> function to construct an array containing the temporal points composing the trip ordered by time, and compute the trip from this array using the function <varname>tgeompointseq</varname>. As explained above, table <varname>trips_input</varname> only contains the first date of a trip. In the second <varname>INSERT</varname> statement we add the trips for all the other dates with the function <varname>shift</varname>.
454439
</para>

0 commit comments

Comments
 (0)