Fri, 05 Feb 2021 12:16:29 +0200
update
1 | 1 | |
2 | def old_load_gtfs(gtfs_zip_path): | |
3 | global viimeinen_käyttöpäivä | |
4 | from zipfile import ZipFile | |
5 | with ZipFile(gtfs_zip_path) as gtfs_zip: | |
6 | with gtfs_zip.open('trips.txt') as file: | |
7 | for row in read_csv(map(bytes.decode, file)): | |
8 | if row['service_id'] not in services: | |
9 | services[row['service_id']] = BusService(row['service_id']) | |
10 | route = routes_per_id[row['route_id']] | |
11 | trip = GtfsTrip( | |
12 | reference = row['trip_id'], | |
13 | route = route, | |
14 | service = services[row['service_id']], | |
15 | length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']), | |
16 | block_id = row.get('block_id') or row['service_id'], | |
17 | shape = row.get('shape_id') | |
18 | ) | |
19 | route.trips.add(trip) | |
20 | if trip.name in all_trips: | |
21 | print('Trip %s already exists' % trip.name) | |
22 | else: | |
23 | all_trips[trip.name] = trip | |
24 | print('%d trips' % len(all_trips), file = stderr) | |
25 | ||
26 | def read_date(teksti): | |
27 | return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) | |
28 | ||
29 | def read_time(teksti): | |
30 | hour, minute, second = map(int, teksti.split(':')) | |
31 | return timedelta(hours = hour, minutes = minute, seconds = second) | |
32 | ||
33 | print('Loading dates... ', file = stderr, flush = True) | |
34 | viimeinen_käyttöpäivä = date.today() | |
35 | ||
36 | def date_range(start_date, end_date, *, include_end = False): | |
37 | ''' Generates date from start_date to end_date. If include_end is True, then end_date will be yielded. ''' | |
38 | current_date = start_date | |
39 | while current_date < end_date: | |
40 | yield current_date | |
41 | current_date += timedelta(1) | |
42 | if include_end: | |
43 | yield end_date | |
44 | ||
45 | def add_day_to_service(service_name, day): | |
46 | try: | |
47 | service = services[service_name] | |
48 | except KeyError: | |
49 | return | |
50 | else: | |
51 | service.dates.add(day) | |
52 | if day not in services_for_day: | |
53 | services_for_day[day] = set() | |
54 | services_for_day[day].add(service) | |
55 | global viimeinen_käyttöpäivä | |
56 | viimeinen_käyttöpäivä = max(day, viimeinen_käyttöpäivä) | |
57 | ||
58 | def filter_day(row, day): | |
59 | day_names = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] | |
60 | return int(row[day_names[day.isoweekday() - 1]]) | |
61 | ||
62 | with gtfs_zip.open('calendar.txt') as file: | |
63 | for row in read_csv(map(bytes.decode, file)): | |
64 | for day in date_range(read_date(row['start_date']), read_date(row['end_date']), include_end = True): | |
65 | if filter_day(row, day): | |
66 | add_day_to_service(service_name = row['service_id'], day = day) | |
67 | ||
68 | with gtfs_zip.open('calendar_dates.txt') as file: | |
69 | for row in read_csv(map(bytes.decode, file)): | |
70 | add_day_to_service(service_name = row['service_id'], day = read_date(row['date'])) | |
71 | ||
72 | def services_available_at(day): | |
73 | for service in services.values(): | |
74 | if day in service.dates: | |
75 | yield service | |
76 | ||
77 | print('Loading stops... ', file = stderr, end = '', flush = True) | |
78 | with gtfs_zip.open('stops.txt') as file: | |
79 | for row in read_csv(map(bytes.decode, file)): | |
80 | location = Location(float(row['stop_lat']), float(row['stop_lon'])) | |
81 | stop = BusStop( | |
82 | reference = row['stop_id'], | |
83 | name = row['stop_name'], | |
84 | location = location, | |
85 | code = row.get('stop_code', row['stop_id']), | |
86 | ) | |
87 | bus_stops[stop.reference] = stop | |
88 | if profile['regions']['use-regions']: | |
89 | with open('regions-per-stop.json') as file: | |
90 | for stop_reference, region in json.load(file).items(): | |
91 | try: | |
92 | bus_stops[stop_reference].region = region | |
93 | except KeyError: | |
94 | pass | |
95 | for bus_stop in bus_stops.values(): | |
96 | if not hasattr(bus_stop, 'region'): | |
97 | bus_stop.region = None | |
98 | print('%d stops' % len(bus_stops), file = stderr) | |
99 | ||
100 | from collections import defaultdict | |
101 | bus_stops_by_name = defaultdict(set) | |
102 | for bus_stop in bus_stops.values(): | |
103 | bus_stops_by_name[bus_stop.name].add(bus_stop) | |
104 | bus_stops_by_name = dict(bus_stops_by_name) | |
105 | ||
106 | # ryhmittele bus_stops nimen mukaan | |
107 | global all_clusters | |
108 | all_clusters = [] | |
109 | def cluster_gtfs_stops(): | |
110 | sorted_gtfs_stops = sorted(bus_stops.values(), key = lambda bus_stop: bus_stop.name) | |
111 | for bus_stop in sorted_gtfs_stops: | |
112 | if not bus_stop.cluster: | |
113 | stops_to_cluster = {bus_stop} | |
114 | # etsi pysäkin samannimiset vastaparit | |
115 | for pair_candidate in bus_stops_by_name[bus_stop.name]: | |
116 | distance = pair_candidate.location.distance(bus_stop.location) | |
117 | if pair_candidate is not bus_stop and distance <= 0.4: | |
118 | stops_to_cluster.add(pair_candidate) | |
119 | for stop_to_cluster in stops_to_cluster: | |
120 | if stop_to_cluster.cluster: | |
121 | cluster = stop_to_cluster.cluster | |
122 | break | |
123 | else: | |
124 | cluster = BusStopCluster() | |
125 | all_clusters.append(cluster) | |
126 | for stop_to_cluster in stops_to_cluster: | |
127 | if not stop_to_cluster.cluster: | |
128 | cluster.add_stop(stop_to_cluster) | |
129 | # Merkitse muistiin pysäkkien vastaparit käyttäen hyväksi tämänhetkistä ryhmittelytietoa | |
130 | for bus_stop in bus_stops.values(): | |
131 | if bus_stop.cluster: | |
132 | bus_stop.pairs = bus_stop.cluster.stops - {bus_stop} | |
133 | # Ryhmitä ne bus_stops, joilla ei ollut omaa vastaparia, muiden pysäkkien kanssa | |
134 | for bus_stop in sorted_gtfs_stops: | |
135 | if len(bus_stop.cluster.stops) == 1: | |
136 | possibilities = set() | |
137 | for cluster in all_clusters: | |
138 | if cluster is not bus_stop.cluster: | |
139 | distance = cluster.center.distance(bus_stop.location) | |
140 | if distance <= 0.4: | |
141 | possibilities.add((distance, cluster)) | |
142 | if possibilities: | |
143 | best = min(possibilities)[1] | |
144 | all_clusters.remove(bus_stop.cluster) | |
145 | best.merge(bus_stop.cluster) | |
146 | ||
147 | def shared_elements_in_n_sets(sets): | |
148 | from itertools import combinations | |
149 | result = set() | |
150 | for pair in combinations(sets, 2): | |
151 | result |= pair[0] & pair[1] | |
152 | return result | |
153 | ||
154 | def name_clusters(): | |
155 | from collections import defaultdict | |
156 | clusters_per_name = defaultdict(set) | |
157 | for cluster in all_clusters: | |
158 | name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] | |
159 | clusters_per_name[name_representing_stop.name].add(cluster) | |
160 | for name, clusters in clusters_per_name.items(): | |
161 | if len(clusters) == 1: | |
162 | # Simple case: this cluster is the only one that wants this name. | |
163 | next(iter(clusters)).name = name | |
164 | else: | |
165 | if profile['regions']['use-regions']: | |
166 | # Find out if all clusters are in different areas | |
167 | common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) | |
168 | # Proposal: cluster -> the areas unique to the cluster | |
169 | proposal = { | |
170 | cluster: {stop.region for stop in cluster.stops} - common_regions - {None} | |
171 | for cluster in clusters | |
172 | } | |
173 | # If at most one cluster is without its own unique region, name the others by region and this one without any. | |
174 | if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: | |
175 | for cluster, unique_areas in proposal.items(): | |
176 | individual_cluster_name = name | |
177 | if unique_areas: | |
178 | individual_cluster_name += ' (' + min(unique_areas) + ')' | |
179 | cluster.name = individual_cluster_name | |
180 | break | |
181 | # If all else fails, just number them. | |
182 | for n, (_, cluster) in enumerate(sorted( | |
183 | min((stop.reference.lower(), cluster) for stop in cluster.stops) | |
184 | for cluster in clusters | |
185 | ), 1): | |
186 | individual_cluster_name = name + '-' + str(n) | |
187 | cluster.name = individual_cluster_name | |
188 | ||
189 | print('Clustering bus stops...') | |
190 | cluster_gtfs_stops() | |
191 | name_clusters() | |
192 | for cluster in all_clusters: | |
193 | if cluster.url_name in clusters_by_name: | |
194 | print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) | |
195 | else: | |
196 | clusters_by_name[cluster.url_name] = cluster | |
197 | print('Loading schedules... ', end = '', flush = True, file = stderr) | |
198 | with gtfs_zip.open('stop_times.txt') as file: | |
199 | row_count = sum(line.count(b'\n') for line in file) | |
200 | with gtfs_zip.open('stop_times.txt') as file: | |
201 | progress = 0 | |
202 | for row in read_csv(map(bytes.decode, file)): | |
203 | if int(row.get('pickup_type', '') or '0') and int(row.get('drop_off_type', '') or '0'): | |
204 | continue | |
205 | trip = all_trips[transform_trip_reference(row['trip_id'])] | |
206 | arrival_time = read_time(row['arrival_time']) | |
207 | departure_time = read_time(row['departure_time']) | |
208 | stop = bus_stops[row['stop_id']] | |
209 | traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) | |
210 | visitnumber = len(trip.schedule) + 1 | |
211 | trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance, visitnumber)) | |
212 | stop.involved_trips.add(trip) | |
213 | progress += 1 | |
214 | if progress % 1000 == 0: | |
215 | print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) | |
216 | print('\rLoading schedules... complete', file = stderr) | |
217 | for trip in all_trips.values(): | |
218 | from busroute import simplify_name | |
219 | schedule = trip.concise_schedule() | |
220 | try: | |
221 | trip.from_place = simplify_name(schedule[0]) | |
222 | trip.to_place = simplify_name(schedule[-1]) | |
223 | except IndexError: | |
224 | trip.from_place = '' | |
225 | trip.to_place = '' | |
226 | for route in routes.values(): | |
227 | from collections import Counter | |
228 | from busroute import simplify_name | |
229 | tally = Counter() | |
230 | for trip in route.trips: | |
231 | schedule = trip.concise_schedule() | |
232 | places = set(schedule) | |
233 | do_add = True | |
234 | assert type(schedule) is list | |
235 | for candidate in tally: | |
236 | if places.issubset(set(candidate)): | |
237 | do_add = False | |
238 | tally.update({tuple(candidate)}) | |
239 | if do_add: | |
240 | tally.update({tuple(schedule)}) | |
241 | try: | |
242 | most_common_route = tally.most_common(1)[0][0] | |
243 | route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) | |
244 | except: | |
245 | route.description = '' | |
246 | route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta()) | |
247 | if 'compatibility' in profile and profile['compatibility'].get('fix-destination-times', False): | |
248 | # There seems to be something strange going on in Föli's gtfs data. | |
249 | # It seems that sometimes the arrival time of the last stop is | |
250 | # completely off, so try estimate when the bus will really arrive | |
251 | # there based on the last leg distance. | |
252 | # I noticed this for bus 220's arrival time at Mylly several years | |
253 | # ago. Possibly this has been fixed in the data by now? | |
254 | for trip in all_trips.values(): | |
255 | if len(trip.schedule) >= 2: | |
256 | bus_speed_coefficient = 750 # meters per minute | |
257 | last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance | |
258 | trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) | |
259 | # Add services to all bus stops | |
260 | for route in routes.values(): | |
261 | for trip in route.trips: | |
262 | for halt in trip.schedule: | |
263 | halt.stop.services.add(route.service) |