# HG changeset patch # User Teemu Piippo # Date 1604580770 -7200 # Node ID 10ce28475e9c04c5cac09091e75dff3f354b7277 # Parent 7378b802ddf88d9ad1686c037e73fb8d47d24faf things diff -r 7378b802ddf8 -r 10ce28475e9c .hgignore --- a/.hgignore Thu Jul 30 21:52:31 2020 +0300 +++ b/.hgignore Thu Nov 05 14:52:50 2020 +0200 @@ -2,3 +2,4 @@ gtfs.zip __pycache__ *.db +gtfs diff -r 7378b802ddf8 -r 10ce28475e9c föli.osm --- a/föli.osm Thu Jul 30 21:52:31 2020 +0300 +++ b/föli.osm Thu Nov 05 14:52:50 2020 +0200 @@ -2223,8 +2223,6 @@ - - @@ -3058,6 +3056,7 @@ + @@ -6078,7 +6077,7 @@ - + @@ -6325,6 +6324,7 @@ + diff -r 7378b802ddf8 -r 10ce28475e9c itienary_processing.py --- a/itienary_processing.py Thu Jul 30 21:52:31 2020 +0300 +++ b/itienary_processing.py Thu Nov 05 14:52:50 2020 +0200 @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import sqlalchemy import sqlalchemy.orm +import multiprocessing from regions import parse_regions from datamodel import * engine = sqlalchemy.create_engine('sqlite:///gtfs.db') @@ -24,27 +25,42 @@ def length_left(stoptime): return stoptime.trip.shape.length - stoptime.shape_distance_traveled +from datetime import datetime, timedelta +time_in_db = timedelta(0) +time_in_process = timedelta(0) amount = session.query(GtfsStopTime).filter(GtfsStopTime.destination == None).count() k = 0 got_stoptimes = amount != 0 +from threading import Thread + +def get_filtered_itinerary(stoptime): + return list(filter_itinerary( + entry.stop.stop_region + for entry in session.query(GtfsStopTime) + .filter(GtfsStopTime.trip_id == stoptime.trip_id) + .filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence) + )) + +def get_destination(stoptime, itinerary): + from busroute import destinations_list + dests = destinations_list( + itinerary = itinerary, + trip_length = float(length_left(stoptime)), + regions = regions, + ) + return '-'.join(dests) while got_stoptimes: - print('%.2f%%' % (k * 100 / amount)) + if k > 0: + print('%.2f%% done' % (k * 100 / amount)) + #print('%s spent in query, %s spent in processing (%.1f%% of time spent processing)' % (time_in_db, time_in_process, (time_in_process / (time_in_db + time_in_process) * 100))) got_stoptimes = False - for stoptime in get_stoptimes(session): + stoptimes = list(get_stoptimes(session)) + with multiprocessing.Pool(12) as p: + itineraries = list(p.map(get_filtered_itinerary, stoptimes)) + for stoptime, itinerary in zip(stoptimes, itineraries): got_stoptimes = True + destination = get_destination(stoptime, itinerary) + stoptime.destination = destination k += 1 - itinerary = list(filter_itinerary( - entry.stop.stop_region - for entry in session.query(GtfsStopTime) - .filter(GtfsStopTime.trip_id == stoptime.trip_id) - .filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence) - )) - from busroute import destinations_list - dests = destinations_list( - itinerary = itinerary, - trip_length = float(length_left(stoptime)), - regions = regions, - ) - stoptime.destination = '-'.join(dests) session.commit() diff -r 7378b802ddf8 -r 10ce28475e9c regiontest.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regiontest.py Thu Nov 05 14:52:50 2020 +0200 @@ -0,0 +1,11 @@ + + from collections import defaultdict + region_names_per_ref = defaultdict(set) + for ref, region in dict.items(regions): + set.add(region_names_per_ref[ref], region['name:fi']) + problem_regions = dict(filter(lambda k: len(region_names_per_ref[k]) > 1, region_names_per_ref)) + if problem_regions: + from sys import stderr, exit + print('Found problems with regions', file = stderr) + print(problem_regions, file = stderr) + exit(1)