itienary_processing.py

Fri, 05 Feb 2021 12:16:29 +0200

author
Teemu Piippo <teemu@hecknology.net>
date
Fri, 05 Feb 2021 12:16:29 +0200
changeset 4
ac067a42b00f
parent 3
10ce28475e9c
permissions
-rwxr-xr-x

update

#!/usr/bin/env python3
import sqlalchemy
import sqlalchemy.orm
import multiprocessing
from regions import parse_regions
from datamodel import *
engine = sqlalchemy.create_engine('sqlite:///gtfs.db')
GtfsBase.metadata.create_all(engine)
session = sqlalchemy.orm.sessionmaker(bind = engine)()
regions = parse_regions('föli.osm')

def filter_itinerary(raw_itinerary):
	encountered = set()
	for region in raw_itinerary:
		if region and region not in encountered:
			yield region
			encountered.add(region)

def get_stoptimes(session):
	yield from session \
		.query(GtfsStopTime) \
		.filter(GtfsStopTime.destination == None) \
		.limit(1000)

def length_left(stoptime):
	return stoptime.trip.shape.length - stoptime.shape_distance_traveled

from datetime import datetime, timedelta
time_in_db = timedelta(0)
time_in_process = timedelta(0)
amount = session.query(GtfsStopTime).filter(GtfsStopTime.destination == None).count()
k = 0
got_stoptimes = amount != 0
from threading import Thread

def get_filtered_itinerary(stoptime):
	return list(filter_itinerary(
		entry.stop.stop_region
		for entry in session.query(GtfsStopTime)
			.filter(GtfsStopTime.trip_id == stoptime.trip_id)
			.filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence)
		if entry.stop.stop_region_major == 1
	))

def get_destination(stoptime, itinerary):
	from busroute import destinations_list
	dests = destinations_list(
		itinerary = itinerary,
		trip_length = float(length_left(stoptime)),
		regions = regions,
	)
	return ';'.join(dests)

while got_stoptimes:
	if k > 0:
		print('%.2f%% done' % (k * 100 / amount))
		#print('%s spent in query, %s spent in processing (%.1f%% of time spent processing)' % (time_in_db, time_in_process, (time_in_process / (time_in_db + time_in_process) * 100)))
	got_stoptimes = False
	stoptimes = list(get_stoptimes(session))
	with multiprocessing.Pool(12) as p:
		itineraries = list(p.map(get_filtered_itinerary, stoptimes))
	for stoptime, itinerary in zip(stoptimes, itineraries):
		got_stoptimes = True
		destination = get_destination(stoptime, itinerary)
		stoptime.destination = destination
		k += 1
	session.commit()

mercurial