Thu, 05 Nov 2020 14:52:50 +0200
things
2 | 1 | #!/usr/bin/env python3 |
2 | import sqlalchemy | |
3 | import sqlalchemy.orm | |
3 | 4 | import multiprocessing |
2 | 5 | from regions import parse_regions |
6 | from datamodel import * | |
7 | engine = sqlalchemy.create_engine('sqlite:///gtfs.db') | |
8 | GtfsBase.metadata.create_all(engine) | |
9 | session = sqlalchemy.orm.sessionmaker(bind = engine)() | |
10 | regions = parse_regions('föli.osm') | |
11 | ||
12 | def filter_itinerary(raw_itinerary): | |
13 | encountered = set() | |
14 | for region in raw_itinerary: | |
15 | if region and region not in encountered: | |
16 | yield region | |
17 | encountered.add(region) | |
18 | ||
19 | def get_stoptimes(session): | |
20 | yield from session \ | |
21 | .query(GtfsStopTime) \ | |
22 | .filter(GtfsStopTime.destination == None) \ | |
23 | .limit(1000) | |
24 | ||
25 | def length_left(stoptime): | |
26 | return stoptime.trip.shape.length - stoptime.shape_distance_traveled | |
27 | ||
3 | 28 | from datetime import datetime, timedelta |
29 | time_in_db = timedelta(0) | |
30 | time_in_process = timedelta(0) | |
2 | 31 | amount = session.query(GtfsStopTime).filter(GtfsStopTime.destination == None).count() |
32 | k = 0 | |
33 | got_stoptimes = amount != 0 | |
3 | 34 | from threading import Thread |
35 | ||
36 | def get_filtered_itinerary(stoptime): | |
37 | return list(filter_itinerary( | |
38 | entry.stop.stop_region | |
39 | for entry in session.query(GtfsStopTime) | |
40 | .filter(GtfsStopTime.trip_id == stoptime.trip_id) | |
41 | .filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence) | |
42 | )) | |
43 | ||
44 | def get_destination(stoptime, itinerary): | |
45 | from busroute import destinations_list | |
46 | dests = destinations_list( | |
47 | itinerary = itinerary, | |
48 | trip_length = float(length_left(stoptime)), | |
49 | regions = regions, | |
50 | ) | |
51 | return '-'.join(dests) | |
2 | 52 | |
53 | while got_stoptimes: | |
3 | 54 | if k > 0: |
55 | print('%.2f%% done' % (k * 100 / amount)) | |
56 | #print('%s spent in query, %s spent in processing (%.1f%% of time spent processing)' % (time_in_db, time_in_process, (time_in_process / (time_in_db + time_in_process) * 100))) | |
2 | 57 | got_stoptimes = False |
3 | 58 | stoptimes = list(get_stoptimes(session)) |
59 | with multiprocessing.Pool(12) as p: | |
60 | itineraries = list(p.map(get_filtered_itinerary, stoptimes)) | |
61 | for stoptime, itinerary in zip(stoptimes, itineraries): | |
2 | 62 | got_stoptimes = True |
3 | 63 | destination = get_destination(stoptime, itinerary) |
64 | stoptime.destination = destination | |
2 | 65 | k += 1 |
66 | session.commit() |