--- a/buses.py Wed Jun 21 18:24:34 2017 +0300 +++ b/buses.py Wed Jun 21 18:25:09 2017 +0300 @@ -47,6 +47,7 @@ def __init__(self, entry): self.id = entry['route_id'] self.reference = entry['route_short_name'] + self.trips = set() def __repr__(self): return 'routes[%r]' % self.reference @@ -62,6 +63,7 @@ self.reference, self.name, self.location = reference, name, location self.cluster = None self.pairs = set() # samannimiset lähellä olevat pysäkit + self.involved_trips = set() def __repr__(self): return 'bus_stops[%r]' % self.reference def schedule(self, max_amount = 50): @@ -100,7 +102,7 @@ raise ValueError('tried to retrieve schedule for date %s which is outside schedule data' % date) result = [] # Jokaiselle ajovuorolle, - for trip in all_trips.values(): + for trip in self.involved_trips: # jos tämä ajovuoro ajetaan tänä päivänä if trip.is_served_at(date): # ja jos tämä trip pysähtyy tällä pysäkillä, ei kuitenkaan saapuen @@ -175,6 +177,7 @@ service = services[row['service_id']], length = shape_distances[row['shape_id']] ) + route.trips.add(trip) assert trip.name not in all_trips all_trips[trip.name] = trip print('%d ajoa' % len(all_trips), file = stderr) @@ -368,7 +371,40 @@ stop = bus_stops[row['stop_id']] traveled_distance = float(row['shape_dist_traveled']) trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance)) + stop.involved_trips.add(trip) progress += 1 if progress % 1000 == 0: print('\rLadataan aikataulut... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) print('\rLadataan aikataulut... ladattu', file = stderr) + +for trip in all_trips.values(): + from busroute import simplify_name + schedule = trip.concise_schedule() + try: + trip.from_place = simplify_name(schedule[0]) + trip.to_place = simplify_name(schedule[-1]) + except IndexError: + trip.from_place = '' + trip.to_place = '' + +for route in routes.values(): + from collections import Counter + from busroute import simplify_name + tally = Counter() + for trip in route.trips: + schedule = trip.concise_schedule() + places = set(schedule) + do_add = True + assert type(schedule) is list + for candidate in tally: + if places.issubset(set(candidate)): + do_add = False + tally.update({tuple(candidate)}) + if do_add: + tally.update({tuple(schedule)}) + try: + most_common_route = tally.most_common(1)[0][0] + route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) + except: + route.description = '' + route.trips = sorted(route.trips, key = lambda trip: trip.schedule[0].departure_time)