171 all_clusters = set() |
171 all_clusters = set() |
172 viimeinen_käyttöpäivä = None |
172 viimeinen_käyttöpäivä = None |
173 clusters_by_name = {} |
173 clusters_by_name = {} |
174 services_for_day = {} |
174 services_for_day = {} |
175 |
175 |
176 def load_buses(gtfs_zip_path): |
176 def load_buses(gtfs_zip_path, regions): |
177 global viimeinen_käyttöpäivä |
177 global viimeinen_käyttöpäivä |
178 from zipfile import ZipFile |
178 from zipfile import ZipFile |
179 with ZipFile(gtfs_zip_path) as gtfs_zip: |
179 with ZipFile(gtfs_zip_path) as gtfs_zip: |
180 print('Ladataan linjat... ', file = stderr, end = '', flush = True) |
180 print('Loading routes... ', file = stderr, end = '', flush = True) |
181 with gtfs_zip.open('routes.txt') as file: |
181 with gtfs_zip.open('routes.txt') as file: |
182 for row in read_csv(map(bytes.decode, file)): |
182 for row in read_csv(map(bytes.decode, file)): |
183 route = BusRoute(row) |
183 route = BusRoute(row) |
184 routes[route.reference] = route |
184 routes[route.reference] = route |
185 routes_per_id[route.id] = route |
185 routes_per_id[route.id] = route |
186 print('%d linjaa' % len(routes), file = stderr) |
186 print('%d routes' % len(routes), file = stderr) |
187 |
187 print('Loading trips... ', file = stderr, end = '', flush = True) |
188 print('Ladataan ajovuorot... ', file = stderr, end = '', flush = True) |
|
189 |
|
190 shape_distances = {} |
188 shape_distances = {} |
191 try: |
189 try: |
192 with gtfs_zip.open('shapes.txt') as file: |
190 with gtfs_zip.open('shapes.txt') as file: |
193 for row in read_csv(map(bytes.decode, file)): |
191 for row in read_csv(map(bytes.decode, file)): |
194 shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled'])) |
192 shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled'])) |
210 route.trips.add(trip) |
208 route.trips.add(trip) |
211 if trip.name in all_trips: |
209 if trip.name in all_trips: |
212 print('Trip %s already exists' % trip.name) |
210 print('Trip %s already exists' % trip.name) |
213 else: |
211 else: |
214 all_trips[trip.name] = trip |
212 all_trips[trip.name] = trip |
215 print('%d ajoa' % len(all_trips), file = stderr) |
213 print('%d trips' % len(all_trips), file = stderr) |
216 |
214 |
217 def read_date(teksti): |
215 def read_date(teksti): |
218 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) |
216 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) |
219 |
217 |
220 def read_time(teksti): |
218 def read_time(teksti): |
221 tunti, minuutti, sekunti = map(int, teksti.split(':')) |
219 hour, minute, second = map(int, teksti.split(':')) |
222 return timedelta(hours = tunti, minutes = minuutti, seconds = sekunti) |
220 return timedelta(hours = hour, minutes = minute, seconds = second) |
223 |
221 |
224 print('Ladataan päiväykset... ', file = stderr, flush = True) |
222 print('Ladataan päiväykset... ', file = stderr, flush = True) |
225 |
223 |
226 viimeinen_käyttöpäivä = date.today() |
224 viimeinen_käyttöpäivä = date.today() |
227 |
225 |
264 def services_available_at(day): |
262 def services_available_at(day): |
265 for service in services.values(): |
263 for service in services.values(): |
266 if day in service.dates: |
264 if day in service.dates: |
267 yield service |
265 yield service |
268 |
266 |
269 print('Ladataan pysäkit... ', file = stderr, end = '', flush = True) |
267 print('Loading stops... ', file = stderr, end = '', flush = True) |
270 with gtfs_zip.open('stops.txt') as file: |
268 with gtfs_zip.open('stops.txt') as file: |
271 for row in read_csv(map(bytes.decode, file)): |
269 for row in read_csv(map(bytes.decode, file)): |
272 location = Sijainti(float(row['stop_lat']), float(row['stop_lon'])) |
270 location = Location(float(row['stop_lat']), float(row['stop_lon'])) |
273 stop = BusStop( |
271 stop = BusStop( |
274 reference = row['stop_id'], |
272 reference = row['stop_id'], |
275 name = row['stop_name'], |
273 name = row['stop_name'], |
276 location = location, |
274 location = location, |
277 code = row.get('stop_code', row['stop_id']), |
275 code = row.get('stop_code', row['stop_id']), |
285 except KeyError: |
283 except KeyError: |
286 pass |
284 pass |
287 for bus_stop in bus_stops.values(): |
285 for bus_stop in bus_stops.values(): |
288 if not hasattr(bus_stop, 'region'): |
286 if not hasattr(bus_stop, 'region'): |
289 bus_stop.region = None |
287 bus_stop.region = None |
290 print('%d pysäkkiä' % len(bus_stops), file = stderr) |
288 print('%d stops' % len(bus_stops), file = stderr) |
291 |
|
292 |
289 |
293 class BusStopCluster: |
290 class BusStopCluster: |
294 def __init__(self): |
291 def __init__(self): |
295 self.stops = set() |
292 self.stops = set() |
296 self._center = None |
293 self.cached_center = None |
297 self.name = None |
294 self.name = None |
298 @property |
295 @property |
299 def url_name(self): |
296 def url_name(self): |
300 return self.name.lower().replace('(', '').replace(')', '').replace(' ', '-') |
297 return self.name.lower().replace('(', '').replace(')', '').replace(' ', '-') |
301 def add_stop(self, stop): |
298 def add_stop(self, stop): |
302 assert not stop.cluster |
299 assert not stop.cluster |
303 stop.cluster = self |
300 stop.cluster = self |
304 self.stops.add(stop) |
301 self.stops.add(stop) |
305 self._center = None |
302 self.cached_center = None |
306 @property |
303 @property |
307 def center(self): |
304 def center(self): |
308 if not self._center: |
305 if not self.cached_center: |
309 if self.stops: |
306 if self.stops: |
310 from statistics import median |
307 from statistics import median |
311 pointtype = type(next(iter(self.stops)).location) |
308 pointtype = type(next(iter(self.stops)).location) |
312 self._center = pointtype( |
309 self.cached_center = pointtype( |
313 median(stop.location.x for stop in self.stops), |
310 median(stop.location.x for stop in self.stops), |
314 median(stop.location.y for stop in self.stops), |
311 median(stop.location.y for stop in self.stops), |
315 ) |
312 ) |
316 else: |
313 else: |
317 raise ValueError('an empty cluster has no center point') |
314 raise ValueError('an empty cluster has no center point') |
318 return self._center |
315 return self.cached_center |
319 def merge(self, other): |
316 def merge(self, other): |
320 for bus_stop in other.stops: |
317 for bus_stop in other.stops: |
321 bus_stop.cluster = self |
318 bus_stop.cluster = self |
322 self.stops |= other.stops |
319 self.stops |= other.stops |
323 other.stops = set() |
320 other.stops = set() |
324 other._center = None |
321 other.cached_center = None |
325 def schedule(self, *, max_amount = 50): |
322 def schedule(self, *, max_amount = 50): |
326 result = [] |
323 result = [] |
327 for stop in self.stops: |
324 for stop in self.stops: |
328 result += stop.schedule(max_amount = max_amount) |
325 result += stop.schedule(max_amount = max_amount) |
329 result.sort(key = lambda schedule_entry: schedule_entry['time']) |
326 result.sort(key = lambda schedule_entry: schedule_entry['time']) |
343 for bus_stop in sorted_bus_stops: |
340 for bus_stop in sorted_bus_stops: |
344 if not bus_stop.cluster: |
341 if not bus_stop.cluster: |
345 stops_to_cluster = {bus_stop} |
342 stops_to_cluster = {bus_stop} |
346 # etsi pysäkin samannimiset vastaparit |
343 # etsi pysäkin samannimiset vastaparit |
347 for pair_candidate in bus_stops_by_name[bus_stop.name]: |
344 for pair_candidate in bus_stops_by_name[bus_stop.name]: |
348 distance = pair_candidate.location.etäisyys(bus_stop.location) |
345 distance = pair_candidate.location.distance(bus_stop.location) |
349 if pair_candidate is not bus_stop and distance <= 0.4: |
346 if pair_candidate is not bus_stop and distance <= 0.4: |
350 stops_to_cluster.add(pair_candidate) |
347 stops_to_cluster.add(pair_candidate) |
351 for stop_to_cluster in stops_to_cluster: |
348 for stop_to_cluster in stops_to_cluster: |
352 if stop_to_cluster.cluster: |
349 if stop_to_cluster.cluster: |
353 cluster = stop_to_cluster.cluster |
350 cluster = stop_to_cluster.cluster |
383 result |= pair[0] & pair[1] |
380 result |= pair[0] & pair[1] |
384 return result |
381 return result |
385 |
382 |
386 def name_clusters(): |
383 def name_clusters(): |
387 from collections import defaultdict |
384 from collections import defaultdict |
388 from pprint import pprint |
|
389 clusters_per_name = defaultdict(set) |
385 clusters_per_name = defaultdict(set) |
390 for cluster in all_clusters: |
386 for cluster in all_clusters: |
391 name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] |
387 name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] |
392 clusters_per_name[name_representing_stop.name].add(cluster) |
388 clusters_per_name[name_representing_stop.name].add(cluster) |
393 for name, clusters in clusters_per_name.items(): |
389 for name, clusters in clusters_per_name.items(): |
394 if len(clusters) == 1: |
390 if len(clusters) == 1: |
395 # Ryhmä on ainoa jolla on varaus tälle nimelle. Sen kuin vaan. |
391 # Simple case: this cluster is the only one that wants this name. |
396 next(iter(clusters)).name = name |
392 next(iter(clusters)).name = name |
397 else: |
393 else: |
398 if profile['regions']['use-regions']: |
394 if profile['regions']['use-regions']: |
399 # Olisiko kaikki klusterit eri alueilla? |
395 # Find out if all clusters are in different areas |
400 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) |
396 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) |
401 # Esitys: ryhmä -> ne alueet jotka ovat tälle ryhmälle ainutlaatuisia |
397 # Proposal: cluster -> the areas unique to the cluster |
402 proposal = { |
398 proposal = { |
403 cluster: {stop.region for stop in cluster.stops} - common_regions - {None} |
399 cluster: {stop.region for stop in cluster.stops} - common_regions - {None} |
404 for cluster in clusters |
400 for cluster in clusters |
405 } |
401 } |
406 # Jos enintään yksi klusteri tässä esityksessä on kokonaan ilman omaa aluetta, jolla se voisi eritellä, |
402 # If at most one cluster is without its own unique region, name the others by region and this one without any. |
407 # niin nimetään klusterit näiden alueiden mukaan. |
|
408 # Se klusteri jolla ei ole omaa aluetta (jos on) jätetään ilman aluepäätettä. |
|
409 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: |
403 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: |
410 for cluster, unique_areas in proposal.items(): |
404 for cluster, unique_areas in proposal.items(): |
411 individual_cluster_name = name |
405 individual_cluster_name = name |
412 if unique_areas: |
406 if unique_areas: |
413 individual_cluster_name += ' (' + min(unique_areas) + ')' |
407 individual_cluster_name += ' (' + min(unique_areas) + ')' |
414 cluster.name = individual_cluster_name |
408 cluster.name = individual_cluster_name |
415 break |
409 break |
416 # Typerä reunatapaus. Indeksoidaan numeroin... |
410 # If all else fails, just number them. |
417 for n, (_, cluster) in enumerate(sorted( |
411 for n, (_, cluster) in enumerate(sorted( |
418 min((stop.reference.lower(), cluster) for stop in cluster.stops) |
412 min((stop.reference.lower(), cluster) for stop in cluster.stops) |
419 for cluster in clusters |
413 for cluster in clusters |
420 ), 1): |
414 ), 1): |
421 individual_cluster_name = name + '-' + str(n) |
415 individual_cluster_name = name + '-' + str(n) |
422 cluster.name = individual_cluster_name |
416 cluster.name = individual_cluster_name |
423 |
417 |
424 print('Ryhmitellään pysäkit...') |
418 print('Clustering bus stops...') |
425 cluster_bus_stops() |
419 cluster_bus_stops() |
426 name_clusters() |
420 name_clusters() |
427 |
421 |
428 for cluster in all_clusters: |
422 for cluster in all_clusters: |
429 if cluster.url_name in clusters_by_name: |
423 if cluster.url_name in clusters_by_name: |
430 print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) |
424 print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) |
431 else: |
425 else: |
432 clusters_by_name[cluster.url_name] = cluster |
426 clusters_by_name[cluster.url_name] = cluster |
433 |
427 |
434 print('Ladataan aikataulut... ', end = '', flush = True, file = stderr) |
428 print('Loading schedules... ', end = '', flush = True, file = stderr) |
435 with gtfs_zip.open('stop_times.txt') as file: |
429 with gtfs_zip.open('stop_times.txt') as file: |
436 row_count = sum(line.count(b'\n') for line in file) |
430 row_count = sum(line.count(b'\n') for line in file) |
437 with gtfs_zip.open('stop_times.txt') as file: |
431 with gtfs_zip.open('stop_times.txt') as file: |
438 progress = 0 |
432 progress = 0 |
439 for row in read_csv(map(bytes.decode, file)): |
433 for row in read_csv(map(bytes.decode, file)): |
446 traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) |
440 traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) |
447 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance)) |
441 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance)) |
448 stop.involved_trips.add(trip) |
442 stop.involved_trips.add(trip) |
449 progress += 1 |
443 progress += 1 |
450 if progress % 1000 == 0: |
444 if progress % 1000 == 0: |
451 print('\rLadataan aikataulut... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) |
445 print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) |
452 print('\rLadataan aikataulut... ladattu', file = stderr) |
446 print('\rLoading schedules... complete', file = stderr) |
453 |
447 |
454 for trip in all_trips.values(): |
448 for trip in all_trips.values(): |
455 from busroute import simplify_name |
449 from busroute import simplify_name |
456 schedule = trip.concise_schedule() |
450 schedule = trip.concise_schedule() |
457 try: |
451 try: |