Fri, 05 Feb 2021 12:16:29 +0200
update
2 | 1 | #!/usr/bin/env python3 |
2 | import sqlalchemy | |
3 | import sqlalchemy.orm | |
3 | 4 | import multiprocessing |
2 | 5 | from regions import parse_regions |
6 | from datamodel import * | |
7 | engine = sqlalchemy.create_engine('sqlite:///gtfs.db') | |
8 | GtfsBase.metadata.create_all(engine) | |
9 | session = sqlalchemy.orm.sessionmaker(bind = engine)() | |
10 | regions = parse_regions('föli.osm') | |
11 | ||
12 | def filter_itinerary(raw_itinerary): | |
13 | encountered = set() | |
14 | for region in raw_itinerary: | |
15 | if region and region not in encountered: | |
16 | yield region | |
17 | encountered.add(region) | |
18 | ||
19 | def get_stoptimes(session): | |
20 | yield from session \ | |
21 | .query(GtfsStopTime) \ | |
22 | .filter(GtfsStopTime.destination == None) \ | |
23 | .limit(1000) | |
24 | ||
25 | def length_left(stoptime): | |
26 | return stoptime.trip.shape.length - stoptime.shape_distance_traveled | |
27 | ||
3 | 28 | from datetime import datetime, timedelta |
29 | time_in_db = timedelta(0) | |
30 | time_in_process = timedelta(0) | |
2 | 31 | amount = session.query(GtfsStopTime).filter(GtfsStopTime.destination == None).count() |
32 | k = 0 | |
33 | got_stoptimes = amount != 0 | |
3 | 34 | from threading import Thread |
35 | ||
36 | def get_filtered_itinerary(stoptime): | |
37 | return list(filter_itinerary( | |
38 | entry.stop.stop_region | |
39 | for entry in session.query(GtfsStopTime) | |
40 | .filter(GtfsStopTime.trip_id == stoptime.trip_id) | |
41 | .filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence) | |
4 | 42 | if entry.stop.stop_region_major == 1 |
3 | 43 | )) |
44 | ||
45 | def get_destination(stoptime, itinerary): | |
46 | from busroute import destinations_list | |
47 | dests = destinations_list( | |
48 | itinerary = itinerary, | |
49 | trip_length = float(length_left(stoptime)), | |
50 | regions = regions, | |
51 | ) | |
4 | 52 | return ';'.join(dests) |
2 | 53 | |
54 | while got_stoptimes: | |
3 | 55 | if k > 0: |
56 | print('%.2f%% done' % (k * 100 / amount)) | |
57 | #print('%s spent in query, %s spent in processing (%.1f%% of time spent processing)' % (time_in_db, time_in_process, (time_in_process / (time_in_db + time_in_process) * 100))) | |
2 | 58 | got_stoptimes = False |
3 | 59 | stoptimes = list(get_stoptimes(session)) |
60 | with multiprocessing.Pool(12) as p: | |
61 | itineraries = list(p.map(get_filtered_itinerary, stoptimes)) | |
62 | for stoptime, itinerary in zip(stoptimes, itineraries): | |
2 | 63 | got_stoptimes = True |
3 | 64 | destination = get_destination(stoptime, itinerary) |
65 | stoptime.destination = destination | |
2 | 66 | k += 1 |
67 | session.commit() |