Skip to content

Commit b95f899

Browse files
committed
remove PostgreSQL stuff, prepare for DuckDB [todo:rename,readme,etc]
1 parent 32511a8 commit b95f899

File tree

6 files changed

+28
-240
lines changed

6 files changed

+28
-240
lines changed

cli.js

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ const {
5757
if (flags.help) {
5858
process.stdout.write(`
5959
Usage:
60-
gtfs-to-sql [options] [--] <gtfs-file> ...
60+
import-gtfs-into-duckdb [options] [--] <path-to-duckdb> <gtfs-file> ...
6161
Options:
6262
--silent -s Don't show files being converted.
6363
--require-dependencies -d Require files that the specified GTFS files depend
@@ -73,14 +73,9 @@ Options:
7373
--routes-without-agency-id Don't require routes.txt items to have an agency_id.
7474
--stops-without-level-id Don't require stops.txt items to have a level_id.
7575
Default if levels.txt has not been provided.
76-
--stops-location-index Create a spatial index on stops.stop_loc for efficient
77-
queries by geolocation.
7876
--schema The schema to use for the database. Default: public
79-
--postgraphile Tweak generated SQL for PostGraphile usage.
80-
https://www.graphile.org/postgraphile/
8177
Examples:
82-
gtfs-to-sql some-gtfs/*.txt | psql -b # import into PostgreSQL
83-
gtfs-to-sql -u -- some-gtfs/*.txt | gzip >gtfs.sql # generate a gzipped SQL dump
78+
import-gtfs-into-duckdb some-gtfs.duckdb some-gtfs/*.txt
8479
8580
[1] https://developers.google.com/transit/gtfs/reference/extended-route-types
8681
[2] https://groups.google.com/g/gtfs-changes/c/keT5rTPS7Y0/m/71uMz2l6ke0J
@@ -94,10 +89,11 @@ if (flags.version) {
9489
}
9590

9691
const {basename, extname} = require('path')
97-
const {pipeline} = require('stream')
9892
const convertGtfsToSql = require('./index')
9993

100-
const files = args.map((file) => {
94+
const [pathToDb] = args
95+
96+
const files = args.slice(1).map((file) => {
10197
const name = basename(file, extname(file))
10298
return {name, file}
10399
})
@@ -115,12 +111,8 @@ const opt = {
115111
}
116112
opt.stopsWithoutLevelId = !flags['stops-without-level-id']
117113

118-
pipeline(
119-
convertGtfsToSql(files, opt),
120-
process.stdout,
121-
(err) => {
122-
if (!err) return;
123-
if (err.code !== 'EPIPE') console.error(err)
124-
process.exit(1)
125-
}
126-
)
114+
convertGtfsToSql(pathToDb, files, opt)
115+
.catch((err) => {
116+
console.error(err)
117+
process.exit(1)
118+
})

index.js

Lines changed: 14 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22

33
const debug = require('debug')('gtfs-via-postgres')
44
const sequencify = require('sequencify')
5-
const {inspect} = require('util')
5+
const {Database} = require('duckdb')
6+
const {promisify} = require('util')
67
const readCsv = require('gtfs-utils/read-csv')
7-
const {Stringifier} = require('csv-stringify')
88
const formatters = require('./lib')
99
const getDependencies = require('./lib/deps')
1010
const pkg = require('./package.json')
1111

12-
const convertGtfsToSql = async function* (files, opt = {}) {
12+
const convertGtfsToSql = async (pathToDb, files, opt = {}) => {
13+
debug('pathToDb', pathToDb)
14+
1315
opt = {
1416
silent: false,
1517
requireDependencies: false,
@@ -41,38 +43,6 @@ const convertGtfsToSql = async function* (files, opt = {}) {
4143
debug('deps', deps)
4244

4345
const tasks = { // file name -> [dep name]
44-
'is_bcp_47_code': {
45-
dep: [],
46-
},
47-
'is_timezone': {
48-
dep: [],
49-
},
50-
...(tripsWithoutShapeId ? {} : {
51-
'shape_exists': {
52-
dep: [...deps.shape_exists],
53-
},
54-
}),
55-
56-
// special handling of calendar/calendar_dates:
57-
// service_days relies on *both* calendar's & calendar_dates' tables to
58-
// be present, so we add mock tasks here. Each of these mock tasks get
59-
// replaced by a file-based one below if the file has been passed.
60-
'calendar': {
61-
dep: [],
62-
},
63-
'calendar_dates': {
64-
dep: [],
65-
},
66-
'service_days': {
67-
dep: ['calendar', 'calendar_dates'],
68-
},
69-
70-
// The arrivals_departures & connections views rely on frequencies' table
71-
// to be present, so we add a mock task here. It gets replaced by a
72-
// file-based one below if the file has been passed.
73-
'frequencies': {
74-
dep: [...deps.frequencies],
75-
},
7646
}
7747

7848
for (const file of files) {
@@ -100,96 +70,26 @@ const convertGtfsToSql = async function* (files, opt = {}) {
10070
sequencify(tasks, Object.keys(tasks), order)
10171
debug('order', order)
10272

103-
yield `\
104-
-- GTFS SQL dump generated by ${pkg.name} v${pkg.version}
105-
-- ${pkg.homepage}
106-
-- options:
107-
${inspect(opt, {compact: false}).split('\n').map(line => '-- ' + line).join('\n')}
108-
109-
\\set ON_ERROR_STOP True
110-
CREATE EXTENSION IF NOT EXISTS postgis;
111-
${opt.schema !== 'public' ? `CREATE SCHEMA IF NOT EXISTS "${opt.schema}";` : ''}
112-
BEGIN;
113-
114-
\n`
73+
const db = new Database(pathToDb)
74+
const dbRun = promisify(db.run)
11575

116-
const csv = new Stringifier({quoted: true})
76+
await dbRun('BEGIN TRANSACTION')
11777

11878
for (const name of order) {
11979
if (!silent) console.error(name)
12080
const task = tasks[name]
121-
yield `-- ${name}\n-----------------\n\n`
12281

123-
const {
124-
beforeAll,
125-
afterAll,
126-
} = formatters[name]
127-
128-
if ('string' === typeof beforeAll && beforeAll) {
129-
yield beforeAll
130-
} else if ('function' === typeof beforeAll) {
131-
yield beforeAll(opt)
132-
}
82+
const importData = formatters[name]
13383

13484
if (task.file) {
135-
const {formatRow} = formatters[name]
136-
let nrOfRows = 0
137-
for await (const rawRow of await readCsv(task.file)) {
138-
const row = formatRow(rawRow, opt)
139-
let formattedRow = null
140-
csv.api.__transform(row, (_formattedRow) => {
141-
formattedRow = _formattedRow
142-
})
143-
yield formattedRow
144-
nrOfRows++
145-
}
146-
147-
if (!silent) console.error(` processed ${nrOfRows} rows`)
148-
}
149-
150-
if ('string' === typeof afterAll && afterAll) {
151-
yield afterAll + ';\n'
152-
} else if ('function' === typeof afterAll) {
153-
yield afterAll(opt) + ';\n'
85+
const input = await readCsv(task.file)
86+
await importData(db, input, opt)
87+
} else {
88+
await importData(db, opt)
15489
}
15590
}
15691

157-
yield `\
158-
159-
${opt.postgraphile ? `\
160-
-- seal imported data
161-
-- todo:
162-
-- > Be careful with public schema.It already has a lot of default privileges that you maybe don't want... See documentation[1].
163-
-- > [1]: postgresql.org/docs/11/ddl-schemas.html#DDL-SCHEMAS-PRIV
164-
DO $$
165-
BEGIN
166-
-- https://stackoverflow.com/questions/8092086/create-postgresql-role-user-if-it-doesnt-exist#8099557
167-
IF EXISTS (
168-
SELECT FROM pg_catalog.pg_roles
169-
WHERE rolname = 'postgraphile'
170-
) THEN
171-
RAISE NOTICE 'Role "postgraphile" already exists, skipping creation.';
172-
ELSE
173-
CREATE ROLE postgraphile LOGIN PASSWORD 'todo'; -- todo: postgraphile password?
174-
END IF;
175-
END
176-
$$;
177-
DO $$
178-
DECLARE
179-
db TEXT := current_database();
180-
BEGIN
181-
EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %I TO %I', db, 'postgraphile');
182-
END
183-
$$;
184-
GRANT USAGE ON SCHEMA "${opt.schema}" TO postgraphile;
185-
-- https://stackoverflow.com/questions/760210/how-do-you-create-a-read-only-user-in-postgresql#comment50679407_762649
186-
REVOKE CREATE ON SCHEMA "${opt.schema}" FROM PUBLIC;
187-
GRANT SELECT ON ALL TABLES IN SCHEMA "${opt.schema}" TO postgraphile;
188-
-- ALTER DEFAULT PRIVILEGES IN SCHEMA "${opt.schema}" GRANT SELECT ON TABLES TO postgraphile;
189-
-- todo: set search_path? https://stackoverflow.com/questions/760210/how-do-you-create-a-read-only-user-in-postgresql#comment33535263_762649
190-
` : ''}
191-
192-
COMMIT;`
92+
await dbRun('COMMIT')
19393
}
19494

19595
module.exports = convertGtfsToSql

lib/agency.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
'use strict'
22

33
// https://developers.google.com/transit/gtfs/reference#agencytxt
4-
const beforeAll = (opt) => `\
4+
const importData = async (db, agency, opt) => {
5+
await dbRun(`\
56
CREATE TABLE "${opt.schema}".agency (
67
agency_id TEXT PRIMARY KEY,
78
agency_name TEXT NOT NULL,

lib/deps.js

Lines changed: 0 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,7 @@
11
'use strict'
22

33
const getDependencies = (opt, files) => {
4-
const {
5-
tripsWithoutShapeId,
6-
routesWithoutAgencyId,
7-
stopsWithoutLevelId,
8-
} = opt
94
return {
10-
shape_exists: [
11-
'shapes',
12-
],
13-
agency: [
14-
'is_timezone',
15-
],
16-
stops: [
17-
'is_timezone',
18-
...(stopsWithoutLevelId ? [] : ['levels']),
19-
],
20-
transfers: [
21-
'stops',
22-
],
23-
stop_times: [
24-
'trips',
25-
'stops',
26-
'service_days',
27-
'frequencies',
28-
],
29-
routes: [
30-
...(routesWithoutAgencyId ? [] : ['agency']),
31-
],
32-
trips: [
33-
'routes',
34-
'service_days',
35-
...(tripsWithoutShapeId ? [] : ['shapes', 'shape_exists']),
36-
],
37-
frequencies: [
38-
'trips',
39-
],
40-
pathways: [
41-
'stops',
42-
],
43-
feed_info: [
44-
'is_bcp_47_code',
45-
],
46-
translations: [
47-
'is_bcp_47_code',
48-
// > table_name
49-
// > Defines the dataset table that contains the field to be translated. The following values are allowed:
50-
// > agency
51-
// > stops
52-
// > routes
53-
// > trips
54-
// > stop_times
55-
// > feed_info
56-
// > pathways
57-
// > levels
58-
// > attributions
59-
// https://developers.google.com/transit/gtfs/reference#translationstxt
60-
// todo: respect opt.*!
61-
// these are soft dependencies, they are not depended upon, they must only be imported first
62-
// todo: only specify dependencies here if the files are not in use
63-
'agency',
64-
'stops',
65-
'routes',
66-
'trips',
67-
...(files.includes('stop_times')
68-
? ['stop_times']
69-
: []
70-
),
71-
...(files.includes('feed_info')
72-
? ['feed_info']
73-
: []
74-
),
75-
...(files.includes('pathways')
76-
? ['pathways']
77-
: []
78-
),
79-
...(files.includes('levels')
80-
? ['levels']
81-
: []
82-
),
83-
// not supported yet: attributions
84-
],
855
}
866
}
877

lib/index.js

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,4 @@
11
'use strict'
22

33
module.exports = {
4-
is_bcp_47_code: require('./prerequisites').is_bcp_47_code,
5-
is_timezone: require('./prerequisites').is_timezone,
6-
shape_exists: require('./prerequisites').shape_exists,
7-
agency: require('./agency'),
8-
calendar: require('./calendar'),
9-
calendar_dates: require('./calendar_dates'),
10-
service_days: require('./service_days'),
11-
feed_info: require('./feed_info'),
12-
frequencies: require('./frequencies'),
13-
routes: require('./routes'),
14-
shapes: require('./shapes'),
15-
stop_times: require('./stop_times'),
16-
stops: require('./stops'),
17-
transfers: require('./transfers'),
18-
trips: require('./trips'),
19-
pathways: require('./pathways'),
20-
levels: require('./levels'),
21-
translations: require('./translations'),
224
}

package.json

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"private": true,
23
"name": "gtfs-via-postgres",
34
"description": "Process GTFS using PostgreSQL.",
45
"version": "4.5.3",
@@ -46,25 +47,17 @@
4647
"node": ">=16.17"
4748
},
4849
"dependencies": {
49-
"csv-stringify": "^6.2.0",
5050
"debug": "^4.3.3",
51+
"duckdb": "^0.7.1",
5152
"gtfs-utils": "^5.1.0",
5253
"sequencify": "0.0.7"
5354
},
5455
"devDependencies": {
55-
"@graphile-contrib/pg-simplify-inflector": "^6.1.0",
56-
"@graphile/postgis": "^0.2.0-0",
5756
"csv-parser": "^3.0.0",
5857
"eslint": "^8.33.0",
5958
"pkg": "^5.3.2",
60-
"postgraphile": "^4.12.11",
6159
"sample-gtfs-feed": "^0.11.0"
6260
},
63-
"peerDependencies": {
64-
"@graphile-contrib/pg-simplify-inflector": "^6.1.0",
65-
"@graphile/postgis": "^0.2.0-0",
66-
"postgraphile": "^4.12.11"
67-
},
6861
"scripts": {
6962
"test": "./test/index.sh",
7063
"lint": "eslint .",

0 commit comments

Comments
 (0)