buses.py

changeset 88
3b86597c5a88
parent 81
d389cdabda00
child 90
36efdea68d03
equal deleted inserted replaced
87:9139a94e540c 88:3b86597c5a88
171 all_clusters = set() 171 all_clusters = set()
172 viimeinen_käyttöpäivä = None 172 viimeinen_käyttöpäivä = None
173 clusters_by_name = {} 173 clusters_by_name = {}
174 services_for_day = {} 174 services_for_day = {}
175 175
176 def load_buses(gtfs_zip_path): 176 def load_buses(gtfs_zip_path, regions):
177 global viimeinen_käyttöpäivä 177 global viimeinen_käyttöpäivä
178 from zipfile import ZipFile 178 from zipfile import ZipFile
179 with ZipFile(gtfs_zip_path) as gtfs_zip: 179 with ZipFile(gtfs_zip_path) as gtfs_zip:
180 print('Ladataan linjat... ', file = stderr, end = '', flush = True) 180 print('Loading routes... ', file = stderr, end = '', flush = True)
181 with gtfs_zip.open('routes.txt') as file: 181 with gtfs_zip.open('routes.txt') as file:
182 for row in read_csv(map(bytes.decode, file)): 182 for row in read_csv(map(bytes.decode, file)):
183 route = BusRoute(row) 183 route = BusRoute(row)
184 routes[route.reference] = route 184 routes[route.reference] = route
185 routes_per_id[route.id] = route 185 routes_per_id[route.id] = route
186 print('%d linjaa' % len(routes), file = stderr) 186 print('%d routes' % len(routes), file = stderr)
187 187 print('Loading trips... ', file = stderr, end = '', flush = True)
188 print('Ladataan ajovuorot... ', file = stderr, end = '', flush = True)
189
190 shape_distances = {} 188 shape_distances = {}
191 try: 189 try:
192 with gtfs_zip.open('shapes.txt') as file: 190 with gtfs_zip.open('shapes.txt') as file:
193 for row in read_csv(map(bytes.decode, file)): 191 for row in read_csv(map(bytes.decode, file)):
194 shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled'])) 192 shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled']))
210 route.trips.add(trip) 208 route.trips.add(trip)
211 if trip.name in all_trips: 209 if trip.name in all_trips:
212 print('Trip %s already exists' % trip.name) 210 print('Trip %s already exists' % trip.name)
213 else: 211 else:
214 all_trips[trip.name] = trip 212 all_trips[trip.name] = trip
215 print('%d ajoa' % len(all_trips), file = stderr) 213 print('%d trips' % len(all_trips), file = stderr)
216 214
217 def read_date(teksti): 215 def read_date(teksti):
218 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) 216 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:]))
219 217
220 def read_time(teksti): 218 def read_time(teksti):
221 tunti, minuutti, sekunti = map(int, teksti.split(':')) 219 hour, minute, second = map(int, teksti.split(':'))
222 return timedelta(hours = tunti, minutes = minuutti, seconds = sekunti) 220 return timedelta(hours = hour, minutes = minute, seconds = second)
223 221
224 print('Ladataan päiväykset... ', file = stderr, flush = True) 222 print('Ladataan päiväykset... ', file = stderr, flush = True)
225 223
226 viimeinen_käyttöpäivä = date.today() 224 viimeinen_käyttöpäivä = date.today()
227 225
264 def services_available_at(day): 262 def services_available_at(day):
265 for service in services.values(): 263 for service in services.values():
266 if day in service.dates: 264 if day in service.dates:
267 yield service 265 yield service
268 266
269 print('Ladataan pysäkit... ', file = stderr, end = '', flush = True) 267 print('Loading stops... ', file = stderr, end = '', flush = True)
270 with gtfs_zip.open('stops.txt') as file: 268 with gtfs_zip.open('stops.txt') as file:
271 for row in read_csv(map(bytes.decode, file)): 269 for row in read_csv(map(bytes.decode, file)):
272 location = Sijainti(float(row['stop_lat']), float(row['stop_lon'])) 270 location = Location(float(row['stop_lat']), float(row['stop_lon']))
273 stop = BusStop( 271 stop = BusStop(
274 reference = row['stop_id'], 272 reference = row['stop_id'],
275 name = row['stop_name'], 273 name = row['stop_name'],
276 location = location, 274 location = location,
277 code = row.get('stop_code', row['stop_id']), 275 code = row.get('stop_code', row['stop_id']),
285 except KeyError: 283 except KeyError:
286 pass 284 pass
287 for bus_stop in bus_stops.values(): 285 for bus_stop in bus_stops.values():
288 if not hasattr(bus_stop, 'region'): 286 if not hasattr(bus_stop, 'region'):
289 bus_stop.region = None 287 bus_stop.region = None
290 print('%d pysäkkiä' % len(bus_stops), file = stderr) 288 print('%d stops' % len(bus_stops), file = stderr)
291
292 289
293 class BusStopCluster: 290 class BusStopCluster:
294 def __init__(self): 291 def __init__(self):
295 self.stops = set() 292 self.stops = set()
296 self._center = None 293 self.cached_center = None
297 self.name = None 294 self.name = None
298 @property 295 @property
299 def url_name(self): 296 def url_name(self):
300 return self.name.lower().replace('(', '').replace(')', '').replace(' ', '-') 297 return self.name.lower().replace('(', '').replace(')', '').replace(' ', '-')
301 def add_stop(self, stop): 298 def add_stop(self, stop):
302 assert not stop.cluster 299 assert not stop.cluster
303 stop.cluster = self 300 stop.cluster = self
304 self.stops.add(stop) 301 self.stops.add(stop)
305 self._center = None 302 self.cached_center = None
306 @property 303 @property
307 def center(self): 304 def center(self):
308 if not self._center: 305 if not self.cached_center:
309 if self.stops: 306 if self.stops:
310 from statistics import median 307 from statistics import median
311 pointtype = type(next(iter(self.stops)).location) 308 pointtype = type(next(iter(self.stops)).location)
312 self._center = pointtype( 309 self.cached_center = pointtype(
313 median(stop.location.x for stop in self.stops), 310 median(stop.location.x for stop in self.stops),
314 median(stop.location.y for stop in self.stops), 311 median(stop.location.y for stop in self.stops),
315 ) 312 )
316 else: 313 else:
317 raise ValueError('an empty cluster has no center point') 314 raise ValueError('an empty cluster has no center point')
318 return self._center 315 return self.cached_center
319 def merge(self, other): 316 def merge(self, other):
320 for bus_stop in other.stops: 317 for bus_stop in other.stops:
321 bus_stop.cluster = self 318 bus_stop.cluster = self
322 self.stops |= other.stops 319 self.stops |= other.stops
323 other.stops = set() 320 other.stops = set()
324 other._center = None 321 other.cached_center = None
325 def schedule(self, *, max_amount = 50): 322 def schedule(self, *, max_amount = 50):
326 result = [] 323 result = []
327 for stop in self.stops: 324 for stop in self.stops:
328 result += stop.schedule(max_amount = max_amount) 325 result += stop.schedule(max_amount = max_amount)
329 result.sort(key = lambda schedule_entry: schedule_entry['time']) 326 result.sort(key = lambda schedule_entry: schedule_entry['time'])
343 for bus_stop in sorted_bus_stops: 340 for bus_stop in sorted_bus_stops:
344 if not bus_stop.cluster: 341 if not bus_stop.cluster:
345 stops_to_cluster = {bus_stop} 342 stops_to_cluster = {bus_stop}
346 # etsi pysäkin samannimiset vastaparit 343 # etsi pysäkin samannimiset vastaparit
347 for pair_candidate in bus_stops_by_name[bus_stop.name]: 344 for pair_candidate in bus_stops_by_name[bus_stop.name]:
348 distance = pair_candidate.location.etäisyys(bus_stop.location) 345 distance = pair_candidate.location.distance(bus_stop.location)
349 if pair_candidate is not bus_stop and distance <= 0.4: 346 if pair_candidate is not bus_stop and distance <= 0.4:
350 stops_to_cluster.add(pair_candidate) 347 stops_to_cluster.add(pair_candidate)
351 for stop_to_cluster in stops_to_cluster: 348 for stop_to_cluster in stops_to_cluster:
352 if stop_to_cluster.cluster: 349 if stop_to_cluster.cluster:
353 cluster = stop_to_cluster.cluster 350 cluster = stop_to_cluster.cluster
366 for bus_stop in sorted_bus_stops: 363 for bus_stop in sorted_bus_stops:
367 if len(bus_stop.cluster.stops) == 1: 364 if len(bus_stop.cluster.stops) == 1:
368 possibilities = set() 365 possibilities = set()
369 for cluster in all_clusters: 366 for cluster in all_clusters:
370 if cluster is not bus_stop.cluster: 367 if cluster is not bus_stop.cluster:
371 distance = cluster.center.etäisyys(bus_stop.location) 368 distance = cluster.center.distance(bus_stop.location)
372 if distance <= 0.4: 369 if distance <= 0.4:
373 possibilities.add((distance, cluster)) 370 possibilities.add((distance, cluster))
374 if possibilities: 371 if possibilities:
375 best = min(possibilities)[1] 372 best = min(possibilities)[1]
376 all_clusters.remove(bus_stop.cluster) 373 all_clusters.remove(bus_stop.cluster)
383 result |= pair[0] & pair[1] 380 result |= pair[0] & pair[1]
384 return result 381 return result
385 382
386 def name_clusters(): 383 def name_clusters():
387 from collections import defaultdict 384 from collections import defaultdict
388 from pprint import pprint
389 clusters_per_name = defaultdict(set) 385 clusters_per_name = defaultdict(set)
390 for cluster in all_clusters: 386 for cluster in all_clusters:
391 name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] 387 name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2]
392 clusters_per_name[name_representing_stop.name].add(cluster) 388 clusters_per_name[name_representing_stop.name].add(cluster)
393 for name, clusters in clusters_per_name.items(): 389 for name, clusters in clusters_per_name.items():
394 if len(clusters) == 1: 390 if len(clusters) == 1:
395 # Ryhmä on ainoa jolla on varaus tälle nimelle. Sen kuin vaan. 391 # Simple case: this cluster is the only one that wants this name.
396 next(iter(clusters)).name = name 392 next(iter(clusters)).name = name
397 else: 393 else:
398 if profile['regions']['use-regions']: 394 if profile['regions']['use-regions']:
399 # Olisiko kaikki klusterit eri alueilla? 395 # Find out if all clusters are in different areas
400 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) 396 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters)
401 # Esitys: ryhmä -> ne alueet jotka ovat tälle ryhmälle ainutlaatuisia 397 # Proposal: cluster -> the areas unique to the cluster
402 proposal = { 398 proposal = {
403 cluster: {stop.region for stop in cluster.stops} - common_regions - {None} 399 cluster: {stop.region for stop in cluster.stops} - common_regions - {None}
404 for cluster in clusters 400 for cluster in clusters
405 } 401 }
406 # Jos enintään yksi klusteri tässä esityksessä on kokonaan ilman omaa aluetta, jolla se voisi eritellä, 402 # If at most one cluster is without its own unique region, name the others by region and this one without any.
407 # niin nimetään klusterit näiden alueiden mukaan.
408 # Se klusteri jolla ei ole omaa aluetta (jos on) jätetään ilman aluepäätettä.
409 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: 403 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1:
410 for cluster, unique_areas in proposal.items(): 404 for cluster, unique_areas in proposal.items():
411 individual_cluster_name = name 405 individual_cluster_name = name
412 if unique_areas: 406 if unique_areas:
413 individual_cluster_name += ' (' + min(unique_areas) + ')' 407 individual_cluster_name += ' (' + min(unique_areas) + ')'
414 cluster.name = individual_cluster_name 408 cluster.name = individual_cluster_name
415 break 409 break
416 # Typerä reunatapaus. Indeksoidaan numeroin... 410 # If all else fails, just number them.
417 for n, (_, cluster) in enumerate(sorted( 411 for n, (_, cluster) in enumerate(sorted(
418 min((stop.reference.lower(), cluster) for stop in cluster.stops) 412 min((stop.reference.lower(), cluster) for stop in cluster.stops)
419 for cluster in clusters 413 for cluster in clusters
420 ), 1): 414 ), 1):
421 individual_cluster_name = name + '-' + str(n) 415 individual_cluster_name = name + '-' + str(n)
422 cluster.name = individual_cluster_name 416 cluster.name = individual_cluster_name
423 417
424 print('Ryhmitellään pysäkit...') 418 print('Clustering bus stops...')
425 cluster_bus_stops() 419 cluster_bus_stops()
426 name_clusters() 420 name_clusters()
427 421
428 for cluster in all_clusters: 422 for cluster in all_clusters:
429 if cluster.url_name in clusters_by_name: 423 if cluster.url_name in clusters_by_name:
430 print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) 424 print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name))
431 else: 425 else:
432 clusters_by_name[cluster.url_name] = cluster 426 clusters_by_name[cluster.url_name] = cluster
433 427
434 print('Ladataan aikataulut... ', end = '', flush = True, file = stderr) 428 print('Loading schedules... ', end = '', flush = True, file = stderr)
435 with gtfs_zip.open('stop_times.txt') as file: 429 with gtfs_zip.open('stop_times.txt') as file:
436 row_count = sum(line.count(b'\n') for line in file) 430 row_count = sum(line.count(b'\n') for line in file)
437 with gtfs_zip.open('stop_times.txt') as file: 431 with gtfs_zip.open('stop_times.txt') as file:
438 progress = 0 432 progress = 0
439 for row in read_csv(map(bytes.decode, file)): 433 for row in read_csv(map(bytes.decode, file)):
446 traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) 440 traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier'])
447 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance)) 441 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance))
448 stop.involved_trips.add(trip) 442 stop.involved_trips.add(trip)
449 progress += 1 443 progress += 1
450 if progress % 1000 == 0: 444 if progress % 1000 == 0:
451 print('\rLadataan aikataulut... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) 445 print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr)
452 print('\rLadataan aikataulut... ladattu', file = stderr) 446 print('\rLoading schedules... complete', file = stderr)
453 447
454 for trip in all_trips.values(): 448 for trip in all_trips.values():
455 from busroute import simplify_name 449 from busroute import simplify_name
456 schedule = trip.concise_schedule() 450 schedule = trip.concise_schedule()
457 try: 451 try:
500 trips_by_vehicle_info[(trip.block_id, trip.schedule[0].arrival_time)] = trip 494 trips_by_vehicle_info[(trip.block_id, trip.schedule[0].arrival_time)] = trip
501 495
502 if __name__ == '__main__': 496 if __name__ == '__main__':
503 profile.read('profiles/föli.ini') 497 profile.read('profiles/föli.ini')
504 load_buses('gtfs.zip') 498 load_buses('gtfs.zip')
499 import busroute
500 from regions import parse_regions
501 busroute.regions = parse_regions('föli.osm')

mercurial