--- a/buses.py Sat Oct 07 01:52:15 2017 +0300 +++ b/buses.py Thu Oct 26 16:54:29 2017 +0300 @@ -10,8 +10,8 @@ return reference class BusTrip: - def __init__(self, reference, route, service, length): - self.reference, self.route, self.service = reference, route, service + def __init__(self, reference, route, service, length, block_id): + self.reference, self.route, self.service, self.block_id = reference, route, service, block_id self.length = length self.schedule = [] self.name = transform_trip_reference(reference) @@ -29,19 +29,22 @@ except KeyError: return False def concise_schedule(self, starting_stop = None): - if starting_stop and starting_stop in self.schedule: - schedule = copy(self.schedule) - schedule = schedule[schedule.index(starting_stop):] + if profile['regions']['use-regions']: + if starting_stop and starting_stop in self.schedule: + schedule = copy(self.schedule) + schedule = schedule[schedule.index(starting_stop):] + else: + schedule = self.schedule + used_areas = set() + result = [] + for halt in schedule: + stop = halt.stop + if stop.region and stop.region not in used_areas: + used_areas.add(stop.region) + result.append(stop.region) + return result else: - schedule = self.schedule - used_areas = set() - result = [] - for halt in schedule: - stop = halt.stop - if stop.region and stop.region not in used_areas: - used_areas.add(stop.region) - result.append(stop.region) - return result + return [halt.stop.name for halt in self.schedule] class BusRoute: def __init__(self, entry): @@ -130,23 +133,33 @@ self.traveled_distance = traveled_distance @property def is_arrival(self): - if not hasattr(self, 'cachedIsArrival'): - if self.stop.region: - iterator = iter(self.trip.schedule) - stop = next(iterator) - while stop is not self: + if profile['regions']['use-regions']: + if not hasattr(self, 'cachedIsArrival'): + if self.stop.region: + iterator = iter(self.trip.schedule) stop = next(iterator) - for stop in iterator: - if stop.stop.region != self.stop.region: - self.cachedIsArrival = False - break + while stop is not self: + stop = next(iterator) + for stop in iterator: + if stop.stop.region != self.stop.region: + self.cachedIsArrival = False + break + else: + self.cachedIsArrival = True else: - self.cachedIsArrival = True - else: - self.cachedIsArrival = False - return self.cachedIsArrival + self.cachedIsArrival = False + return self.cachedIsArrival + else: + return self == self.trip.schedule[-1] def __repr__(self): return 'BusHalt(%r, %r, %r, %r)' % (self.arrival_time, self.departure_time, self.stop, self.trip) + def sign(self, long = False): + from busroute import reduce_schedule + return reduce_schedule( + route = self.trip.concise_schedule(self), + trip_length = self.trip.length - self.traveled_distance, + long = long, + ) routes = {} routes_per_id = {} @@ -158,7 +171,7 @@ clusters_by_name = {} services_for_day = {} -def load_buses(gtfs_zip_path, profile): +def load_buses(gtfs_zip_path): global viimeinen_käyttöpäivä from zipfile import ZipFile with ZipFile(gtfs_zip_path) as gtfs_zip: @@ -173,9 +186,12 @@ print('Ladataan ajovuorot... ', file = stderr, end = '', flush = True) shape_distances = {} - with gtfs_zip.open('shapes.txt') as file: - for row in read_csv(map(bytes.decode, file)): - shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled'])) + try: + with gtfs_zip.open('shapes.txt') as file: + for row in read_csv(map(bytes.decode, file)): + shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled'])) + except KeyError: + pass with gtfs_zip.open('trips.txt') as file: for row in read_csv(map(bytes.decode, file)): @@ -186,11 +202,14 @@ reference = row['trip_id'], route = route, service = services[row['service_id']], - length = shape_distances[row['shape_id']] * float(profile['metrics']['shape-modifier']) + length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']), + block_id = row['block_id'], ) route.trips.add(trip) - assert trip.name not in all_trips - all_trips[trip.name] = trip + if trip.name in all_trips: + print('Trip %s already exists' % trip.name) + else: + all_trips[trip.name] = trip print('%d ajoa' % len(all_trips), file = stderr) def read_date(teksti): @@ -253,12 +272,16 @@ reference = row['stop_id'], name = row['stop_name'], location = location, - code = row['stop_code'], + code = row.get('stop_code', row['stop_id']), ) bus_stops[stop.reference] = stop - with open('regions-per-stop.json') as file: - for stop_reference, region in json.load(file).items(): - bus_stops[stop_reference].region = region + if profile['regions']['use-regions']: + with open('regions-per-stop.json') as file: + for stop_reference, region in json.load(file).items(): + try: + bus_stops[stop_reference].region = region + except KeyError: + pass print('%d pysäkkiä' % len(bus_stops), file = stderr) @@ -367,30 +390,31 @@ # Ryhmä on ainoa jolla on varaus tälle nimelle. Sen kuin vaan. next(iter(clusters)).name = name else: - # Olisiko kaikki klusterit eri alueilla? - common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) - # Esitys: ryhmä -> ne alueet jotka ovat tälle ryhmälle ainutlaatuisia - proposal = { - cluster: {stop.region for stop in cluster.stops} - common_regions - {None} + if profile['regions']['use-regions']: + # Olisiko kaikki klusterit eri alueilla? + common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) + # Esitys: ryhmä -> ne alueet jotka ovat tälle ryhmälle ainutlaatuisia + proposal = { + cluster: {stop.region for stop in cluster.stops} - common_regions - {None} + for cluster in clusters + } + # Jos enintään yksi klusteri tässä esityksessä on kokonaan ilman omaa aluetta, jolla se voisi eritellä, + # niin nimetään klusterit näiden alueiden mukaan. + # Se klusteri jolla ei ole omaa aluetta (jos on) jätetään ilman aluepäätettä. + if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: + for cluster, unique_areas in proposal.items(): + individual_cluster_name = name + if unique_areas: + individual_cluster_name += ' (' + min(unique_areas) + ')' + cluster.name = individual_cluster_name + break + # Typerä reunatapaus. Indeksoidaan numeroin... + for n, (_, cluster) in enumerate(sorted( + min((stop.reference.lower(), cluster) for stop in cluster.stops) for cluster in clusters - } - # Jos enintään yksi klusteri tässä esityksessä on kokonaan ilman omaa aluetta, jolla se voisi eritellä, - # niin nimetään klusterit näiden alueiden mukaan. - # Se klusteri jolla ei ole omaa aluetta (jos on) jätetään ilman aluepäätettä. - if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: - for cluster, unique_areas in proposal.items(): - individual_cluster_name = name - if unique_areas: - individual_cluster_name += ' (' + min(unique_areas) + ')' - cluster.name = individual_cluster_name - else: - # Typerä reunatapaus. Indeksoidaan numeroin... - for n, (_, cluster) in enumerate(sorted( - min((stop.reference.lower(), cluster) for stop in cluster.stops) - for cluster in clusters - ), 1): - individual_cluster_name = name + '-' + str(n) - cluster.name = individual_cluster_name + ), 1): + individual_cluster_name = name + '-' + str(n) + cluster.name = individual_cluster_name print('Ryhmitellään pysäkit...') cluster_bus_stops() @@ -408,11 +432,13 @@ with gtfs_zip.open('stop_times.txt') as file: progress = 0 for row in read_csv(map(bytes.decode, file)): + if int(row.get('pickup_type', 0)) and int(row.get('drop_off_type', 0)): + continue trip = all_trips[transform_trip_reference(row['trip_id'])] arrival_time = read_time(row['arrival_time']) departure_time = read_time(row['departure_time']) stop = bus_stops[row['stop_id']] - traveled_distance = float(row['shape_dist_traveled']) * float(profile['metrics']['shape-modifier']) + traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance)) stop.involved_trips.add(trip) progress += 1 @@ -450,19 +476,23 @@ route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) except: route.description = '' - route.trips = sorted(route.trips, key = lambda trip: trip.schedule[0].departure_time) + route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta()) # Fölin datassa on jotain tosi kummaa. Ilmeisesti ajovuoron viimeisen pysähdyksen saapumisaika on ihan täysin # väärin. Arvaan että se on seuraavan lähdön aika, mutta joka tapauksessa se on väärin. # Arvataan mikä se todellinen saapumisaika on. Se ei voi mennä kauhean paljon pahemmin vikaan kuin alkuperäinen # väärin oleva data. for trip in all_trips.values(): - bus_speed_coefficient = 750 # metriä minuutissa - last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance - trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) + if len(trip.schedule) >= 2: + bus_speed_coefficient = 750 # metriä minuutissa + last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance + trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) + + global trips_by_vehicle_info + trips_by_vehicle_info = {} + for trip in all_trips.values(): + trips_by_vehicle_info[(trip.block_id, trip.schedule[0].arrival_time)] = trip if __name__ == '__main__': - from configparser import ConfigParser - profile = ConfigParser() profile.read('profiles/föli.ini') - load_buses('gtfs.zip', profile) + load_buses('gtfs.zip')