buses.py

Thu, 30 Jul 2020 21:52:31 +0300

author
Teemu Piippo <teemu@hecknology.net>
date
Thu, 30 Jul 2020 21:52:31 +0300
changeset 2
7378b802ddf8
parent 1
f9788970fa46
permissions
-rw-r--r--

destination processing

1
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
1
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
2 def old_load_gtfs(gtfs_zip_path):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
3 global viimeinen_käyttöpäivä
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
4 from zipfile import ZipFile
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
5 with ZipFile(gtfs_zip_path) as gtfs_zip:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
6 with gtfs_zip.open('trips.txt') as file:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
7 for row in read_csv(map(bytes.decode, file)):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
8 if row['service_id'] not in services:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
9 services[row['service_id']] = BusService(row['service_id'])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
10 route = routes_per_id[row['route_id']]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
11 trip = GtfsTrip(
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
12 reference = row['trip_id'],
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
13 route = route,
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
14 service = services[row['service_id']],
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
15 length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']),
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
16 block_id = row.get('block_id') or row['service_id'],
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
17 shape = row.get('shape_id')
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
18 )
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
19 route.trips.add(trip)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
20 if trip.name in all_trips:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
21 print('Trip %s already exists' % trip.name)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
22 else:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
23 all_trips[trip.name] = trip
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
24 print('%d trips' % len(all_trips), file = stderr)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
25
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
26 def read_date(teksti):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
27 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:]))
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
28
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
29 def read_time(teksti):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
30 hour, minute, second = map(int, teksti.split(':'))
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
31 return timedelta(hours = hour, minutes = minute, seconds = second)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
32
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
33 print('Loading dates... ', file = stderr, flush = True)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
34 viimeinen_käyttöpäivä = date.today()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
35
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
36 def date_range(start_date, end_date, *, include_end = False):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
37 ''' Generates date from start_date to end_date. If include_end is True, then end_date will be yielded. '''
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
38 current_date = start_date
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
39 while current_date < end_date:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
40 yield current_date
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
41 current_date += timedelta(1)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
42 if include_end:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
43 yield end_date
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
44
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
45 def add_day_to_service(service_name, day):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
46 try:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
47 service = services[service_name]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
48 except KeyError:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
49 return
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
50 else:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
51 service.dates.add(day)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
52 if day not in services_for_day:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
53 services_for_day[day] = set()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
54 services_for_day[day].add(service)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
55 global viimeinen_käyttöpäivä
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
56 viimeinen_käyttöpäivä = max(day, viimeinen_käyttöpäivä)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
57
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
58 def filter_day(row, day):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
59 day_names = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
60 return int(row[day_names[day.isoweekday() - 1]])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
61
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
62 with gtfs_zip.open('calendar.txt') as file:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
63 for row in read_csv(map(bytes.decode, file)):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
64 for day in date_range(read_date(row['start_date']), read_date(row['end_date']), include_end = True):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
65 if filter_day(row, day):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
66 add_day_to_service(service_name = row['service_id'], day = day)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
67
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
68 with gtfs_zip.open('calendar_dates.txt') as file:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
69 for row in read_csv(map(bytes.decode, file)):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
70 add_day_to_service(service_name = row['service_id'], day = read_date(row['date']))
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
71
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
72 def services_available_at(day):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
73 for service in services.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
74 if day in service.dates:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
75 yield service
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
76
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
77 print('Loading stops... ', file = stderr, end = '', flush = True)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
78 with gtfs_zip.open('stops.txt') as file:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
79 for row in read_csv(map(bytes.decode, file)):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
80 location = Location(float(row['stop_lat']), float(row['stop_lon']))
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
81 stop = BusStop(
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
82 reference = row['stop_id'],
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
83 name = row['stop_name'],
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
84 location = location,
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
85 code = row.get('stop_code', row['stop_id']),
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
86 )
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
87 bus_stops[stop.reference] = stop
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
88 if profile['regions']['use-regions']:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
89 with open('regions-per-stop.json') as file:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
90 for stop_reference, region in json.load(file).items():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
91 try:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
92 bus_stops[stop_reference].region = region
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
93 except KeyError:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
94 pass
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
95 for bus_stop in bus_stops.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
96 if not hasattr(bus_stop, 'region'):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
97 bus_stop.region = None
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
98 print('%d stops' % len(bus_stops), file = stderr)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
99
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
100 from collections import defaultdict
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
101 bus_stops_by_name = defaultdict(set)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
102 for bus_stop in bus_stops.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
103 bus_stops_by_name[bus_stop.name].add(bus_stop)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
104 bus_stops_by_name = dict(bus_stops_by_name)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
105
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
106 # ryhmittele bus_stops nimen mukaan
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
107 global all_clusters
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
108 all_clusters = []
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
109 def cluster_gtfs_stops():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
110 sorted_gtfs_stops = sorted(bus_stops.values(), key = lambda bus_stop: bus_stop.name)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
111 for bus_stop in sorted_gtfs_stops:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
112 if not bus_stop.cluster:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
113 stops_to_cluster = {bus_stop}
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
114 # etsi pysäkin samannimiset vastaparit
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
115 for pair_candidate in bus_stops_by_name[bus_stop.name]:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
116 distance = pair_candidate.location.distance(bus_stop.location)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
117 if pair_candidate is not bus_stop and distance <= 0.4:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
118 stops_to_cluster.add(pair_candidate)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
119 for stop_to_cluster in stops_to_cluster:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
120 if stop_to_cluster.cluster:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
121 cluster = stop_to_cluster.cluster
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
122 break
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
123 else:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
124 cluster = BusStopCluster()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
125 all_clusters.append(cluster)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
126 for stop_to_cluster in stops_to_cluster:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
127 if not stop_to_cluster.cluster:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
128 cluster.add_stop(stop_to_cluster)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
129 # Merkitse muistiin pysäkkien vastaparit käyttäen hyväksi tämänhetkistä ryhmittelytietoa
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
130 for bus_stop in bus_stops.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
131 if bus_stop.cluster:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
132 bus_stop.pairs = bus_stop.cluster.stops - {bus_stop}
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
133 # Ryhmitä ne bus_stops, joilla ei ollut omaa vastaparia, muiden pysäkkien kanssa
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
134 for bus_stop in sorted_gtfs_stops:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
135 if len(bus_stop.cluster.stops) == 1:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
136 possibilities = set()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
137 for cluster in all_clusters:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
138 if cluster is not bus_stop.cluster:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
139 distance = cluster.center.distance(bus_stop.location)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
140 if distance <= 0.4:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
141 possibilities.add((distance, cluster))
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
142 if possibilities:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
143 best = min(possibilities)[1]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
144 all_clusters.remove(bus_stop.cluster)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
145 best.merge(bus_stop.cluster)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
146
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
147 def shared_elements_in_n_sets(sets):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
148 from itertools import combinations
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
149 result = set()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
150 for pair in combinations(sets, 2):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
151 result |= pair[0] & pair[1]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
152 return result
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
153
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
154 def name_clusters():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
155 from collections import defaultdict
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
156 clusters_per_name = defaultdict(set)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
157 for cluster in all_clusters:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
158 name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
159 clusters_per_name[name_representing_stop.name].add(cluster)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
160 for name, clusters in clusters_per_name.items():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
161 if len(clusters) == 1:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
162 # Simple case: this cluster is the only one that wants this name.
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
163 next(iter(clusters)).name = name
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
164 else:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
165 if profile['regions']['use-regions']:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
166 # Find out if all clusters are in different areas
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
167 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
168 # Proposal: cluster -> the areas unique to the cluster
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
169 proposal = {
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
170 cluster: {stop.region for stop in cluster.stops} - common_regions - {None}
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
171 for cluster in clusters
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
172 }
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
173 # If at most one cluster is without its own unique region, name the others by region and this one without any.
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
174 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
175 for cluster, unique_areas in proposal.items():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
176 individual_cluster_name = name
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
177 if unique_areas:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
178 individual_cluster_name += ' (' + min(unique_areas) + ')'
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
179 cluster.name = individual_cluster_name
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
180 break
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
181 # If all else fails, just number them.
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
182 for n, (_, cluster) in enumerate(sorted(
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
183 min((stop.reference.lower(), cluster) for stop in cluster.stops)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
184 for cluster in clusters
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
185 ), 1):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
186 individual_cluster_name = name + '-' + str(n)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
187 cluster.name = individual_cluster_name
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
188
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
189 print('Clustering bus stops...')
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
190 cluster_gtfs_stops()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
191 name_clusters()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
192 for cluster in all_clusters:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
193 if cluster.url_name in clusters_by_name:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
194 print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name))
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
195 else:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
196 clusters_by_name[cluster.url_name] = cluster
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
197 print('Loading schedules... ', end = '', flush = True, file = stderr)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
198 with gtfs_zip.open('stop_times.txt') as file:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
199 row_count = sum(line.count(b'\n') for line in file)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
200 with gtfs_zip.open('stop_times.txt') as file:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
201 progress = 0
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
202 for row in read_csv(map(bytes.decode, file)):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
203 if int(row.get('pickup_type', '') or '0') and int(row.get('drop_off_type', '') or '0'):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
204 continue
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
205 trip = all_trips[transform_trip_reference(row['trip_id'])]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
206 arrival_time = read_time(row['arrival_time'])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
207 departure_time = read_time(row['departure_time'])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
208 stop = bus_stops[row['stop_id']]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
209 traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier'])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
210 visitnumber = len(trip.schedule) + 1
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
211 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance, visitnumber))
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
212 stop.involved_trips.add(trip)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
213 progress += 1
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
214 if progress % 1000 == 0:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
215 print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
216 print('\rLoading schedules... complete', file = stderr)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
217 for trip in all_trips.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
218 from busroute import simplify_name
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
219 schedule = trip.concise_schedule()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
220 try:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
221 trip.from_place = simplify_name(schedule[0])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
222 trip.to_place = simplify_name(schedule[-1])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
223 except IndexError:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
224 trip.from_place = ''
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
225 trip.to_place = ''
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
226 for route in routes.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
227 from collections import Counter
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
228 from busroute import simplify_name
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
229 tally = Counter()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
230 for trip in route.trips:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
231 schedule = trip.concise_schedule()
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
232 places = set(schedule)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
233 do_add = True
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
234 assert type(schedule) is list
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
235 for candidate in tally:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
236 if places.issubset(set(candidate)):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
237 do_add = False
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
238 tally.update({tuple(candidate)})
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
239 if do_add:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
240 tally.update({tuple(schedule)})
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
241 try:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
242 most_common_route = tally.most_common(1)[0][0]
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
243 route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1])
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
244 except:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
245 route.description = ''
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
246 route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta())
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
247 if 'compatibility' in profile and profile['compatibility'].get('fix-destination-times', False):
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
248 # There seems to be something strange going on in Föli's gtfs data.
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
249 # It seems that sometimes the arrival time of the last stop is
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
250 # completely off, so try estimate when the bus will really arrive
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
251 # there based on the last leg distance.
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
252 # I noticed this for bus 220's arrival time at Mylly several years
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
253 # ago. Possibly this has been fixed in the data by now?
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
254 for trip in all_trips.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
255 if len(trip.schedule) >= 2:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
256 bus_speed_coefficient = 750 # meters per minute
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
257 last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
258 trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient)
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
259 # Add services to all bus stops
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
260 for route in routes.values():
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
261 for trip in route.trips:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
262 for halt in trip.schedule:
f9788970fa46 begin work on bus compiler
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
263 halt.stop.services.add(route.service)

mercurial