gtfsc.py

changeset 1
f9788970fa46
child 2
7378b802ddf8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gtfsc.py	Wed Jul 29 23:45:53 2020 +0300
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+import io
+import sys
+import sqlalchemy
+import sqlalchemy.orm
+from datamodel import *
+
+ROUTE_TYPES = {
+	'0': 'tram',
+	'1': 'subway',
+	'2': 'rail',
+	'3': 'bus',
+	'4': 'ferry',
+	'5': 'cable-tram',
+	'6': 'aerial-lift',
+	'7': 'funicular',
+	'11': 'trolleybus',
+	'12': 'monorail',
+}
+
+def read_csv(file):
+	import csv
+	reader = csv.reader(file)
+	keys = next(reader)
+	for i in range(len(keys)):
+		keys[i] = keys[i].replace('\ufeff', '').strip()
+	for row in reader:
+		yield dict(zip(keys, row))
+
+def load_gtfs_routes(gtfs_zip):
+	with gtfs_zip.open('routes.txt') as file:
+		for row in read_csv(map(bytes.decode, file)):
+			route = GtfsRoute(
+				id = row['route_id'],
+				reference = row['route_short_name'],
+				description = row['route_long_name'],
+				type = int(row['route_type']),
+			)
+			yield route.id, route
+
+def load_shapes(gtfs_zip):
+	from collections import defaultdict
+	shapes = dict()
+	with gtfs_zip.open('shapes.txt') as file:
+		for row in read_csv(map(bytes.decode, file)):
+			shape_id = row['shape_id']
+			if shape_id not in shapes:
+				shapes[shape_id] = GtfsShape(
+					id = shape_id,
+					shape_coordinates = '',
+					length = 0,
+				)
+			shape = shapes[shape_id]
+			if len(shape.shape_coordinates) > 0:
+				shape.shape_coordinates += ' '
+			shape.shape_coordinates += str.format(
+				'{shape_pt_lat} {shape_pt_lon}',
+				**row,
+			)
+			shape.length = max(shape.length, float(row['shape_dist_traveled']))
+	return shapes.values()
+
+def trip_length(trip, *, shapes):
+	if trip.shape_id:
+		return dict.get(shapes, trip.shape_id).length * float(profile['metrics']['shape-modifier'])
+	else:
+		return 0
+
+def load_trips(gtfs_zip):
+	services = set()
+	with gtfs_zip.open('trips.txt') as file:
+		for row in read_csv(map(bytes.decode, file)):
+			if row['service_id'] not in services:
+				set.add(services, row['service_id'])
+				yield GtfsService(id = row['service_id'])
+			yield GtfsTrip(
+				id = row['trip_id'],
+				route_id = row['route_id'],
+				service = row['service_id'],
+				shape_id = dict.get(row, 'shape_id')
+			)
+
+def load_stops(gtfs_zip):
+	with gtfs_zip.open('stops.txt') as file:
+		for row in read_csv(map(bytes.decode, file)):
+			lat = float(row['stop_lat'])
+			lon = float(row['stop_lon'])
+			yield GtfsStop(
+				stop_id = row['stop_id'],
+				stop_name = row['stop_name'],
+				stop_latitude = lat,
+				stop_longitude = float(row['stop_lon']),
+			)
+
+def gtfs_stop_spatial_testing(session, regions):
+	print('Finding out in which regions bus stops are...')
+	from compute_regions import RegionTester
+	regiontester = RegionTester(regions)
+	for bus_stop in session.query(GtfsStop):
+		classification = regiontester(
+			latitude = bus_stop.stop_latitude,
+			longitude = bus_stop.stop_longitude,
+		)
+		if classification:
+			bus_stop.stop_region = classification.region
+			bus_stop.stop_region_major = classification.region_class == 'major'
+
+def load_with_loading_text(fn, what, device):
+	print(
+		str.format('Loading {}s... ', what),
+		file = device,
+		end = '',
+		flush = True,
+	)
+	result = fn()
+	print(
+		str.format(
+			'{n} {what}s',
+			n = len(result if type(result) is not tuple else result[0]),
+			what = what,
+		),
+		file = device,
+	)
+	return result
+
+def load_gtfs(
+	gtfs_zip_path,
+	*,
+	profile,
+	session,
+	device = sys.stderr
+):
+	from zipfile import ZipFile
+	with ZipFile(gtfs_zip_path) as gtfs_zip:
+		print('Loading routes...')
+		for route_id, route in load_gtfs_routes(gtfs_zip):
+			session.add(route)
+		print('Loading stops...')
+		for stop in load_stops(gtfs_zip):
+			session.add(stop)
+		print('Loading shapes...')
+		for shape in load_shapes(gtfs_zip):
+			session.add(shape)
+		print('Loading trips...')
+		for trip_or_service in load_trips(gtfs_zip):
+			session.add(trip_or_service)
+
+def parse_yesno(value):
+	return value and value != 'no'
+
+def regions_to_db(regions):
+	from itertools import product
+	for region in regions.values():
+		names = dict()
+		for prefix, language in product(
+			['', 'short_', 'internal_'],
+			['', ':sv', ':en', ':ja'],
+		):
+			key = 'region_' + prefix + 'name' + str.replace(language, ':', '_')
+			value = dict.get(region, prefix + 'name' + language)
+			names[key] = value
+		yield GtfsRegion(
+			**names,
+			municipality = dict.get(region, 'municipality'),
+			external = parse_yesno(dict.get(region, 'external')),
+		)
+
+if __name__ == '__main__':
+	import sys
+	from configparser import ConfigParser
+	from regions import parse_regions
+	profile = ConfigParser()
+	profile.read('föli.ini')
+	engine = sqlalchemy.create_engine('sqlite:///gtfs.db')
+	GtfsBase.metadata.create_all(engine)
+	session = sqlalchemy.orm.sessionmaker(bind = engine)()
+	regions = parse_regions('föli.osm')
+	for region in regions_to_db(regions):
+		session.add(region)
+	session.commit()
+	buses = load_gtfs('gtfs.zip', profile = profile, session = session)
+	gtfs_stop_spatial_testing(session = session, regions = regions)
+	print('Committing to database...')
+	session.commit()

mercurial