buses.py

changeset 71
d2e19670b772
parent 42
0b53b7c70875
child 72
65408ed066b3
equal deleted inserted replaced
70:c2fc50748efd 71:d2e19670b772
8 8
9 def transform_trip_reference(reference): 9 def transform_trip_reference(reference):
10 return reference 10 return reference
11 11
12 class BusTrip: 12 class BusTrip:
13 def __init__(self, reference, route, service, length): 13 def __init__(self, reference, route, service, length, block_id):
14 self.reference, self.route, self.service = reference, route, service 14 self.reference, self.route, self.service, self.block_id = reference, route, service, block_id
15 self.length = length 15 self.length = length
16 self.schedule = [] 16 self.schedule = []
17 self.name = transform_trip_reference(reference) 17 self.name = transform_trip_reference(reference)
18 def __repr__(self): 18 def __repr__(self):
19 return 'all_trips[%r]' % self.name 19 return 'all_trips[%r]' % self.name
27 try: 27 try:
28 return self.service in services_for_day[day] 28 return self.service in services_for_day[day]
29 except KeyError: 29 except KeyError:
30 return False 30 return False
31 def concise_schedule(self, starting_stop = None): 31 def concise_schedule(self, starting_stop = None):
32 if starting_stop and starting_stop in self.schedule: 32 if profile['regions']['use-regions']:
33 schedule = copy(self.schedule) 33 if starting_stop and starting_stop in self.schedule:
34 schedule = schedule[schedule.index(starting_stop):] 34 schedule = copy(self.schedule)
35 schedule = schedule[schedule.index(starting_stop):]
36 else:
37 schedule = self.schedule
38 used_areas = set()
39 result = []
40 for halt in schedule:
41 stop = halt.stop
42 if stop.region and stop.region not in used_areas:
43 used_areas.add(stop.region)
44 result.append(stop.region)
45 return result
35 else: 46 else:
36 schedule = self.schedule 47 return [halt.stop.name for halt in self.schedule]
37 used_areas = set()
38 result = []
39 for halt in schedule:
40 stop = halt.stop
41 if stop.region and stop.region not in used_areas:
42 used_areas.add(stop.region)
43 result.append(stop.region)
44 return result
45 48
46 class BusRoute: 49 class BusRoute:
47 def __init__(self, entry): 50 def __init__(self, entry):
48 self.id = entry['route_id'] 51 self.id = entry['route_id']
49 self.reference = entry['route_short_name'] 52 self.reference = entry['route_short_name']
128 self.arrival_time, self.departure_time, self.stop, self.trip = arrival_time, departure_time, \ 131 self.arrival_time, self.departure_time, self.stop, self.trip = arrival_time, departure_time, \
129 stop, trip 132 stop, trip
130 self.traveled_distance = traveled_distance 133 self.traveled_distance = traveled_distance
131 @property 134 @property
132 def is_arrival(self): 135 def is_arrival(self):
133 if not hasattr(self, 'cachedIsArrival'): 136 if profile['regions']['use-regions']:
134 if self.stop.region: 137 if not hasattr(self, 'cachedIsArrival'):
135 iterator = iter(self.trip.schedule) 138 if self.stop.region:
136 stop = next(iterator) 139 iterator = iter(self.trip.schedule)
137 while stop is not self:
138 stop = next(iterator) 140 stop = next(iterator)
139 for stop in iterator: 141 while stop is not self:
140 if stop.stop.region != self.stop.region: 142 stop = next(iterator)
141 self.cachedIsArrival = False 143 for stop in iterator:
142 break 144 if stop.stop.region != self.stop.region:
145 self.cachedIsArrival = False
146 break
147 else:
148 self.cachedIsArrival = True
143 else: 149 else:
144 self.cachedIsArrival = True 150 self.cachedIsArrival = False
145 else: 151 return self.cachedIsArrival
146 self.cachedIsArrival = False 152 else:
147 return self.cachedIsArrival 153 return self == self.trip.schedule[-1]
148 def __repr__(self): 154 def __repr__(self):
149 return 'BusHalt(%r, %r, %r, %r)' % (self.arrival_time, self.departure_time, self.stop, self.trip) 155 return 'BusHalt(%r, %r, %r, %r)' % (self.arrival_time, self.departure_time, self.stop, self.trip)
156 def sign(self, long = False):
157 from busroute import reduce_schedule
158 return reduce_schedule(
159 route = self.trip.concise_schedule(self),
160 trip_length = self.trip.length - self.traveled_distance,
161 long = long,
162 )
150 163
151 routes = {} 164 routes = {}
152 routes_per_id = {} 165 routes_per_id = {}
153 all_trips = {} 166 all_trips = {}
154 services = {} 167 services = {}
156 all_clusters = set() 169 all_clusters = set()
157 viimeinen_käyttöpäivä = None 170 viimeinen_käyttöpäivä = None
158 clusters_by_name = {} 171 clusters_by_name = {}
159 services_for_day = {} 172 services_for_day = {}
160 173
161 def load_buses(gtfs_zip_path, profile): 174 def load_buses(gtfs_zip_path):
162 global viimeinen_käyttöpäivä 175 global viimeinen_käyttöpäivä
163 from zipfile import ZipFile 176 from zipfile import ZipFile
164 with ZipFile(gtfs_zip_path) as gtfs_zip: 177 with ZipFile(gtfs_zip_path) as gtfs_zip:
165 print('Ladataan linjat... ', file = stderr, end = '', flush = True) 178 print('Ladataan linjat... ', file = stderr, end = '', flush = True)
166 with gtfs_zip.open('routes.txt') as file: 179 with gtfs_zip.open('routes.txt') as file:
171 print('%d linjaa' % len(routes), file = stderr) 184 print('%d linjaa' % len(routes), file = stderr)
172 185
173 print('Ladataan ajovuorot... ', file = stderr, end = '', flush = True) 186 print('Ladataan ajovuorot... ', file = stderr, end = '', flush = True)
174 187
175 shape_distances = {} 188 shape_distances = {}
176 with gtfs_zip.open('shapes.txt') as file: 189 try:
177 for row in read_csv(map(bytes.decode, file)): 190 with gtfs_zip.open('shapes.txt') as file:
178 shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled'])) 191 for row in read_csv(map(bytes.decode, file)):
192 shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled']))
193 except KeyError:
194 pass
179 195
180 with gtfs_zip.open('trips.txt') as file: 196 with gtfs_zip.open('trips.txt') as file:
181 for row in read_csv(map(bytes.decode, file)): 197 for row in read_csv(map(bytes.decode, file)):
182 if row['service_id'] not in services: 198 if row['service_id'] not in services:
183 services[row['service_id']] = BusService(row['service_id']) 199 services[row['service_id']] = BusService(row['service_id'])
184 route = routes_per_id[row['route_id']] 200 route = routes_per_id[row['route_id']]
185 trip = BusTrip( 201 trip = BusTrip(
186 reference = row['trip_id'], 202 reference = row['trip_id'],
187 route = route, 203 route = route,
188 service = services[row['service_id']], 204 service = services[row['service_id']],
189 length = shape_distances[row['shape_id']] * float(profile['metrics']['shape-modifier']) 205 length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']),
206 block_id = row['block_id'],
190 ) 207 )
191 route.trips.add(trip) 208 route.trips.add(trip)
192 assert trip.name not in all_trips 209 if trip.name in all_trips:
193 all_trips[trip.name] = trip 210 print('Trip %s already exists' % trip.name)
211 else:
212 all_trips[trip.name] = trip
194 print('%d ajoa' % len(all_trips), file = stderr) 213 print('%d ajoa' % len(all_trips), file = stderr)
195 214
196 def read_date(teksti): 215 def read_date(teksti):
197 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) 216 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:]))
198 217
251 location = Sijainti(float(row['stop_lat']), float(row['stop_lon'])) 270 location = Sijainti(float(row['stop_lat']), float(row['stop_lon']))
252 stop = BusStop( 271 stop = BusStop(
253 reference = row['stop_id'], 272 reference = row['stop_id'],
254 name = row['stop_name'], 273 name = row['stop_name'],
255 location = location, 274 location = location,
256 code = row['stop_code'], 275 code = row.get('stop_code', row['stop_id']),
257 ) 276 )
258 bus_stops[stop.reference] = stop 277 bus_stops[stop.reference] = stop
259 with open('regions-per-stop.json') as file: 278 if profile['regions']['use-regions']:
260 for stop_reference, region in json.load(file).items(): 279 with open('regions-per-stop.json') as file:
261 bus_stops[stop_reference].region = region 280 for stop_reference, region in json.load(file).items():
281 try:
282 bus_stops[stop_reference].region = region
283 except KeyError:
284 pass
262 print('%d pysäkkiä' % len(bus_stops), file = stderr) 285 print('%d pysäkkiä' % len(bus_stops), file = stderr)
263 286
264 287
265 class BusStopCluster: 288 class BusStopCluster:
266 def __init__(self): 289 def __init__(self):
365 for name, clusters in clusters_per_name.items(): 388 for name, clusters in clusters_per_name.items():
366 if len(clusters) == 1: 389 if len(clusters) == 1:
367 # Ryhmä on ainoa jolla on varaus tälle nimelle. Sen kuin vaan. 390 # Ryhmä on ainoa jolla on varaus tälle nimelle. Sen kuin vaan.
368 next(iter(clusters)).name = name 391 next(iter(clusters)).name = name
369 else: 392 else:
370 # Olisiko kaikki klusterit eri alueilla? 393 if profile['regions']['use-regions']:
371 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) 394 # Olisiko kaikki klusterit eri alueilla?
372 # Esitys: ryhmä -> ne alueet jotka ovat tälle ryhmälle ainutlaatuisia 395 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters)
373 proposal = { 396 # Esitys: ryhmä -> ne alueet jotka ovat tälle ryhmälle ainutlaatuisia
374 cluster: {stop.region for stop in cluster.stops} - common_regions - {None} 397 proposal = {
398 cluster: {stop.region for stop in cluster.stops} - common_regions - {None}
399 for cluster in clusters
400 }
401 # Jos enintään yksi klusteri tässä esityksessä on kokonaan ilman omaa aluetta, jolla se voisi eritellä,
402 # niin nimetään klusterit näiden alueiden mukaan.
403 # Se klusteri jolla ei ole omaa aluetta (jos on) jätetään ilman aluepäätettä.
404 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1:
405 for cluster, unique_areas in proposal.items():
406 individual_cluster_name = name
407 if unique_areas:
408 individual_cluster_name += ' (' + min(unique_areas) + ')'
409 cluster.name = individual_cluster_name
410 break
411 # Typerä reunatapaus. Indeksoidaan numeroin...
412 for n, (_, cluster) in enumerate(sorted(
413 min((stop.reference.lower(), cluster) for stop in cluster.stops)
375 for cluster in clusters 414 for cluster in clusters
376 } 415 ), 1):
377 # Jos enintään yksi klusteri tässä esityksessä on kokonaan ilman omaa aluetta, jolla se voisi eritellä, 416 individual_cluster_name = name + '-' + str(n)
378 # niin nimetään klusterit näiden alueiden mukaan. 417 cluster.name = individual_cluster_name
379 # Se klusteri jolla ei ole omaa aluetta (jos on) jätetään ilman aluepäätettä.
380 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1:
381 for cluster, unique_areas in proposal.items():
382 individual_cluster_name = name
383 if unique_areas:
384 individual_cluster_name += ' (' + min(unique_areas) + ')'
385 cluster.name = individual_cluster_name
386 else:
387 # Typerä reunatapaus. Indeksoidaan numeroin...
388 for n, (_, cluster) in enumerate(sorted(
389 min((stop.reference.lower(), cluster) for stop in cluster.stops)
390 for cluster in clusters
391 ), 1):
392 individual_cluster_name = name + '-' + str(n)
393 cluster.name = individual_cluster_name
394 418
395 print('Ryhmitellään pysäkit...') 419 print('Ryhmitellään pysäkit...')
396 cluster_bus_stops() 420 cluster_bus_stops()
397 name_clusters() 421 name_clusters()
398 422
406 with gtfs_zip.open('stop_times.txt') as file: 430 with gtfs_zip.open('stop_times.txt') as file:
407 row_count = sum(line.count(b'\n') for line in file) 431 row_count = sum(line.count(b'\n') for line in file)
408 with gtfs_zip.open('stop_times.txt') as file: 432 with gtfs_zip.open('stop_times.txt') as file:
409 progress = 0 433 progress = 0
410 for row in read_csv(map(bytes.decode, file)): 434 for row in read_csv(map(bytes.decode, file)):
435 if int(row.get('pickup_type', 0)) and int(row.get('drop_off_type', 0)):
436 continue
411 trip = all_trips[transform_trip_reference(row['trip_id'])] 437 trip = all_trips[transform_trip_reference(row['trip_id'])]
412 arrival_time = read_time(row['arrival_time']) 438 arrival_time = read_time(row['arrival_time'])
413 departure_time = read_time(row['departure_time']) 439 departure_time = read_time(row['departure_time'])
414 stop = bus_stops[row['stop_id']] 440 stop = bus_stops[row['stop_id']]
415 traveled_distance = float(row['shape_dist_traveled']) * float(profile['metrics']['shape-modifier']) 441 traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier'])
416 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance)) 442 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance))
417 stop.involved_trips.add(trip) 443 stop.involved_trips.add(trip)
418 progress += 1 444 progress += 1
419 if progress % 1000 == 0: 445 if progress % 1000 == 0:
420 print('\rLadataan aikataulut... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) 446 print('\rLadataan aikataulut... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr)
448 try: 474 try:
449 most_common_route = tally.most_common(1)[0][0] 475 most_common_route = tally.most_common(1)[0][0]
450 route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) 476 route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1])
451 except: 477 except:
452 route.description = '' 478 route.description = ''
453 route.trips = sorted(route.trips, key = lambda trip: trip.schedule[0].departure_time) 479 route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta())
454 480
455 # Fölin datassa on jotain tosi kummaa. Ilmeisesti ajovuoron viimeisen pysähdyksen saapumisaika on ihan täysin 481 # Fölin datassa on jotain tosi kummaa. Ilmeisesti ajovuoron viimeisen pysähdyksen saapumisaika on ihan täysin
456 # väärin. Arvaan että se on seuraavan lähdön aika, mutta joka tapauksessa se on väärin. 482 # väärin. Arvaan että se on seuraavan lähdön aika, mutta joka tapauksessa se on väärin.
457 # Arvataan mikä se todellinen saapumisaika on. Se ei voi mennä kauhean paljon pahemmin vikaan kuin alkuperäinen 483 # Arvataan mikä se todellinen saapumisaika on. Se ei voi mennä kauhean paljon pahemmin vikaan kuin alkuperäinen
458 # väärin oleva data. 484 # väärin oleva data.
459 for trip in all_trips.values(): 485 for trip in all_trips.values():
460 bus_speed_coefficient = 750 # metriä minuutissa 486 if len(trip.schedule) >= 2:
461 last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance 487 bus_speed_coefficient = 750 # metriä minuutissa
462 trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) 488 last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance
489 trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient)
490
491 global trips_by_vehicle_info
492 trips_by_vehicle_info = {}
493 for trip in all_trips.values():
494 trips_by_vehicle_info[(trip.block_id, trip.schedule[0].arrival_time)] = trip
463 495
464 if __name__ == '__main__': 496 if __name__ == '__main__':
465 from configparser import ConfigParser
466 profile = ConfigParser()
467 profile.read('profiles/föli.ini') 497 profile.read('profiles/föli.ini')
468 load_buses('gtfs.zip', profile) 498 load_buses('gtfs.zip')

mercurial