Wed, 26 Sep 2018 13:12:11 +0300
refactor
#!/usr/bin/env python3 import enum, json from sys import stderr from datetime import date, time, datetime, timedelta from copy import copy from misc import * from geometry import * def transform_trip_reference(reference): return reference class BusTrip: def __init__(self, reference, route, service, length, block_id, shape): self.reference, self.route, self.service, self.block_id = reference, route, service, block_id self.length = length self.schedule = [] self.name = transform_trip_reference(reference) self.shape = str(shape) def __repr__(self): return 'all_trips[%r]' % self.name def contains_stop(self, stop): for halt in self.schedule: if halt.stop is stop: return halt else: return None def is_served_at(self, day): try: return self.service in services_for_day[day] except KeyError: return False def concise_schedule(self, starting_stop = None): if starting_stop and starting_stop in self.schedule: schedule = copy(self.schedule) schedule = schedule[schedule.index(starting_stop):] else: schedule = self.schedule if profile['regions']['use-regions']: used_areas = set() result = [] for halt in schedule: stop = halt.stop if stop.region and stop.region not in used_areas: used_areas.add(stop.region) result.append(stop.region) return result else: return [halt.stop.name for halt in schedule] class BusRoute: def __init__(self, entry): self.id = entry['route_id'] self.reference = entry['route_short_name'] self.trips = set() self.service = None def __repr__(self): return 'routes[%r]' % self.reference class BusService: def __init__(self, reference): self.reference = reference self.dates = set() def __repr__(self): return 'services[%r]' % self.reference class BusStop: def __init__(self, reference, name, location, code = None): self.reference, self.name, self.location = reference, name, location self.code = code or reference self.cluster = None self.pairs = set() # samannimiset lähellä olevat pysäkit self.involved_trips = set() self.services = set() def __repr__(self): return 'bus_stops[%r]' % self.reference def schedule(self, *, max_amount = 50, arrivals = False): ''' Hakee tämän pysäkin seuraavat `määrä` lähtöä. Päätepysäkille saapuvia busseja ei lasketa. Palauttaa pysähdykset listana jossa alkiot ovat muotoa (aika, halt), jossa: - `aika` on saapumishetki muotoa datetime ja - `halt` on vastaava BusHalt olio. Mikäli pysäkille ei ole määrätty riittävästi pysähdyksiä kalenterissa, tuloslista jää alimittaiseksi, mahdollisesti jopa tyhjäksi. ''' result = [] # -1 päivää yövuoroja varten date = today() - timedelta(days = 1) # Niin kauan kuin aikatauluja ei ole vielä tarpeeksi, while len(result) < max_amount: try: # hae nykyisen päivän aikataulut ja lisää ne, result += self.schedule_for_day(date, arrivals = arrivals) except ValueError: # paitsi jos mentiin kalenterin ulkopuolelle, jolloin lopetetaan, break # ja siirry seuraavaan päivään. date += timedelta(1) # Typistä lopputulos haluttuun tulosmäärään. return result[:max_amount] def schedule_for_day(self, date, *, arrivals = False, allow_gone = False): ''' Hakee pysäkin aikataulut tiettynä päivänä. ''' # Jos päädyttiin aikataulukalenterin ulkopuolelle, niin tuotetaan virhe. Jos vain # palautettaisiin tyhjä result, niin algoritmi jatkaisi etsintää loputtomiin. if date > viimeinen_käyttöpäivä: raise ValueError('tried to retrieve schedule for date %s which is outside schedule data' % date) result = [] # Jokaiselle ajovuorolle, for trip in self.involved_trips: # jos tämä ajovuoro ajetaan tänä päivänä if trip.is_served_at(date): # ja jos tämä trip pysähtyy tällä pysäkillä, ei kuitenkaan saapuen # päätepysäkille, stop = trip.contains_stop(self) if stop and (arrivals or not stop.is_arrival) and stop is not trip.schedule[-1]: # ja jos tämä halt on tulevaisuudessa, stop_time = datetime.combine(date, time()) + stop.departure_time if allow_gone or (stop_time + timedelta(minutes = 1) >= now()): # lisää halt listaan. result.append({ 'date': date, 'offset': stop.departure_time, 'time': stop_time, 'trip': trip, 'stop': stop, }) # Lajittele lopputulos saapumisajan mukaan. result.sort(key = lambda schedule_entry: schedule_entry['time']) return result @property def typename(self): if self.services == {'train'}: return 'train-station' elif self.services == {'tram'}: return 'tram-stop' elif self.services == {'ferry'}: return 'ferry-terminal' else: return 'bus-stop' class BusHalt: def __init__(self, arrival_time, departure_time, stop, trip, traveled_distance): self.arrival_time, self.departure_time, self.stop, self.trip = arrival_time, departure_time, \ stop, trip self.traveled_distance = traveled_distance @property def is_arrival(self): if profile['regions']['use-regions']: if not hasattr(self, 'cachedIsArrival'): if self.stop.region: iterator = iter(self.trip.schedule) stop = next(iterator) while stop is not self: stop = next(iterator) for stop in iterator: if stop.stop.region != self.stop.region: self.cachedIsArrival = False break else: self.cachedIsArrival = True else: self.cachedIsArrival = False return self.cachedIsArrival else: return self == self.trip.schedule[-1] def departure_datetime(self, date): import datetime return datetime.datetime.combine(date, datetime.time()) + self.departure_time def arrival_datetime(self, date): import datetime return datetime.datetime.combine(date, datetime.time()) + self.arrival_time def __repr__(self): return 'BusHalt(%r, %r, %r, %r)' % (self.arrival_time, self.departure_time, self.stop, self.trip) def sign(self, long = False): from busroute import reduce_schedule return reduce_schedule( route = self.trip.concise_schedule(self), trip_length = self.trip.length - self.traveled_distance, long = long, ) class BusStopCluster: def __init__(self): self.stops = set() self.cached_center = None self.name = None @property def url_name(self): return self.name.lower().replace('(', '').replace(')', '').replace(' ', '-') def add_stop(self, stop): assert not stop.cluster stop.cluster = self self.stops.add(stop) self.cached_center = None @property def center(self): if not self.cached_center: if self.stops: from statistics import median pointtype = type(next(iter(self.stops)).location) self.cached_center = pointtype( median(stop.location.x for stop in self.stops), median(stop.location.y for stop in self.stops), ) else: raise ValueError('an empty cluster has no center point') return self.cached_center def merge(self, other): for bus_stop in other.stops: bus_stop.cluster = self self.stops |= other.stops other.stops = set() other.cached_center = None def schedule(self, *, max_amount = 50): result = [] for stop in self.stops: result += stop.schedule(max_amount = max_amount) result.sort(key = lambda schedule_entry: schedule_entry['time']) return result[:max_amount] def __lt__(self, other): return (self.name and other.name) and (self.name < other.name) or (id(self) < id(other)) class CustomBusStopCluster(BusStopCluster): def __init__(self, *, name, stops): super().__init__() self.name = name self.stops = stops def add_stop(self, stop): return NotImplemented @property def url_name(self): from urllib.request import quote return 'custom?stops=' + ';'.join(stop.code for stop in self.stops) + '&name=' + quote(self.name) from collections import defaultdict routes = {} routes_per_id = {} all_trips = {} services = {} bus_stops = {} all_clusters = set() viimeinen_käyttöpäivä = None clusters_by_name = {} services_for_day = {} shapes = defaultdict(list) def load_buses(gtfs_zip_path): global viimeinen_käyttöpäivä from zipfile import ZipFile with ZipFile(gtfs_zip_path) as gtfs_zip: print('Loading routes... ', file = stderr, end = '', flush = True) with gtfs_zip.open('routes.txt') as file: for row in read_csv(map(bytes.decode, file)): route = BusRoute(row) routes[route.reference] = route routes_per_id[route.id] = route print('%d routes' % len(routes), file = stderr) # Add services import re service_patterns = {} if 'service-patterns' in profile: for service_type, regexps in profile['service-patterns'].items(): service_patterns[service_type] = {re.compile(regexp) for regexp in regexps.split('@')} if 'services' in profile and profile['services'].get('default-service'): print('Tagging services...', end = '') for route in routes.values(): for service_type, regexps in service_patterns.items(): if any(regexp.match(route.reference) for regexp in regexps): route.service = service_type break else: route.service = profile['services']['default-service'] print('') print('Loading trips... ', file = stderr, end = '', flush = True) shape_distances = {} try: with gtfs_zip.open('shapes.txt') as file: for row in read_csv(map(bytes.decode, file)): list.append(shapes[row['shape_id']], (row['shape_pt_lat'], row['shape_pt_lon'])) shape_distances[row['shape_id']] = max(shape_distances.get(row['shape_id'], 0), float(row['shape_dist_traveled'])) except KeyError: pass with gtfs_zip.open('trips.txt') as file: for row in read_csv(map(bytes.decode, file)): if row['service_id'] not in services: services[row['service_id']] = BusService(row['service_id']) route = routes_per_id[row['route_id']] trip = BusTrip( reference = row['trip_id'], route = route, service = services[row['service_id']], length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']), block_id = row.get('block_id') or row['service_id'], shape = row.get('shape_id') ) route.trips.add(trip) if trip.name in all_trips: print('Trip %s already exists' % trip.name) else: all_trips[trip.name] = trip print('%d trips' % len(all_trips), file = stderr) def read_date(teksti): return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) def read_time(teksti): hour, minute, second = map(int, teksti.split(':')) return timedelta(hours = hour, minutes = minute, seconds = second) print('Loading dates... ', file = stderr, flush = True) viimeinen_käyttöpäivä = date.today() def date_range(start_date, end_date, *, include_end = False): ''' Generates date from start_date to end_date. If include_end is True, then end_date will be yielded. ''' current_date = start_date while current_date < end_date: yield current_date current_date += timedelta(1) if include_end: yield end_date def add_day_to_service(service_name, day): try: service = services[service_name] except KeyError: return else: service.dates.add(day) if day not in services_for_day: services_for_day[day] = set() services_for_day[day].add(service) global viimeinen_käyttöpäivä viimeinen_käyttöpäivä = max(day, viimeinen_käyttöpäivä) def filter_day(row, day): day_names = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] return int(row[day_names[day.isoweekday() - 1]]) with gtfs_zip.open('calendar.txt') as file: for row in read_csv(map(bytes.decode, file)): for day in date_range(read_date(row['start_date']), read_date(row['end_date']), include_end = True): if filter_day(row, day): add_day_to_service(service_name = row['service_id'], day = day) with gtfs_zip.open('calendar_dates.txt') as file: for row in read_csv(map(bytes.decode, file)): add_day_to_service(service_name = row['service_id'], day = read_date(row['date'])) def services_available_at(day): for service in services.values(): if day in service.dates: yield service print('Loading stops... ', file = stderr, end = '', flush = True) with gtfs_zip.open('stops.txt') as file: for row in read_csv(map(bytes.decode, file)): location = Location(float(row['stop_lat']), float(row['stop_lon'])) stop = BusStop( reference = row['stop_id'], name = row['stop_name'], location = location, code = row.get('stop_code', row['stop_id']), ) bus_stops[stop.reference] = stop if profile['regions']['use-regions']: with open('regions-per-stop.json') as file: for stop_reference, region in json.load(file).items(): try: bus_stops[stop_reference].region = region except KeyError: pass for bus_stop in bus_stops.values(): if not hasattr(bus_stop, 'region'): bus_stop.region = None print('%d stops' % len(bus_stops), file = stderr) from collections import defaultdict bus_stops_by_name = defaultdict(set) for bus_stop in bus_stops.values(): bus_stops_by_name[bus_stop.name].add(bus_stop) bus_stops_by_name = dict(bus_stops_by_name) # ryhmittele bus_stops nimen mukaan global all_clusters all_clusters = [] def cluster_bus_stops(): sorted_bus_stops = sorted(bus_stops.values(), key = lambda bus_stop: bus_stop.name) for bus_stop in sorted_bus_stops: if not bus_stop.cluster: stops_to_cluster = {bus_stop} # etsi pysäkin samannimiset vastaparit for pair_candidate in bus_stops_by_name[bus_stop.name]: distance = pair_candidate.location.distance(bus_stop.location) if pair_candidate is not bus_stop and distance <= 0.4: stops_to_cluster.add(pair_candidate) for stop_to_cluster in stops_to_cluster: if stop_to_cluster.cluster: cluster = stop_to_cluster.cluster break else: cluster = BusStopCluster() all_clusters.append(cluster) for stop_to_cluster in stops_to_cluster: if not stop_to_cluster.cluster: cluster.add_stop(stop_to_cluster) # Merkitse muistiin pysäkkien vastaparit käyttäen hyväksi tämänhetkistä ryhmittelytietoa for bus_stop in bus_stops.values(): if bus_stop.cluster: bus_stop.pairs = bus_stop.cluster.stops - {bus_stop} # Ryhmitä ne bus_stops, joilla ei ollut omaa vastaparia, muiden pysäkkien kanssa for bus_stop in sorted_bus_stops: if len(bus_stop.cluster.stops) == 1: possibilities = set() for cluster in all_clusters: if cluster is not bus_stop.cluster: distance = cluster.center.distance(bus_stop.location) if distance <= 0.4: possibilities.add((distance, cluster)) if possibilities: best = min(possibilities)[1] all_clusters.remove(bus_stop.cluster) best.merge(bus_stop.cluster) def shared_elements_in_n_sets(sets): from itertools import combinations result = set() for pair in combinations(sets, 2): result |= pair[0] & pair[1] return result def name_clusters(): from collections import defaultdict clusters_per_name = defaultdict(set) for cluster in all_clusters: name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] clusters_per_name[name_representing_stop.name].add(cluster) for name, clusters in clusters_per_name.items(): if len(clusters) == 1: # Simple case: this cluster is the only one that wants this name. next(iter(clusters)).name = name else: if profile['regions']['use-regions']: # Find out if all clusters are in different areas common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) # Proposal: cluster -> the areas unique to the cluster proposal = { cluster: {stop.region for stop in cluster.stops} - common_regions - {None} for cluster in clusters } # If at most one cluster is without its own unique region, name the others by region and this one without any. if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: for cluster, unique_areas in proposal.items(): individual_cluster_name = name if unique_areas: individual_cluster_name += ' (' + min(unique_areas) + ')' cluster.name = individual_cluster_name break # If all else fails, just number them. for n, (_, cluster) in enumerate(sorted( min((stop.reference.lower(), cluster) for stop in cluster.stops) for cluster in clusters ), 1): individual_cluster_name = name + '-' + str(n) cluster.name = individual_cluster_name print('Clustering bus stops...') cluster_bus_stops() name_clusters() for cluster in all_clusters: if cluster.url_name in clusters_by_name: print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) else: clusters_by_name[cluster.url_name] = cluster print('Loading schedules... ', end = '', flush = True, file = stderr) with gtfs_zip.open('stop_times.txt') as file: row_count = sum(line.count(b'\n') for line in file) with gtfs_zip.open('stop_times.txt') as file: progress = 0 for row in read_csv(map(bytes.decode, file)): if int(row.get('pickup_type', '') or '0') and int(row.get('drop_off_type', '') or '0'): continue trip = all_trips[transform_trip_reference(row['trip_id'])] arrival_time = read_time(row['arrival_time']) departure_time = read_time(row['departure_time']) stop = bus_stops[row['stop_id']] traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance)) stop.involved_trips.add(trip) progress += 1 if progress % 1000 == 0: print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) print('\rLoading schedules... complete', file = stderr) for trip in all_trips.values(): from busroute import simplify_name schedule = trip.concise_schedule() try: trip.from_place = simplify_name(schedule[0]) trip.to_place = simplify_name(schedule[-1]) except IndexError: trip.from_place = '' trip.to_place = '' for route in routes.values(): from collections import Counter from busroute import simplify_name tally = Counter() for trip in route.trips: schedule = trip.concise_schedule() places = set(schedule) do_add = True assert type(schedule) is list for candidate in tally: if places.issubset(set(candidate)): do_add = False tally.update({tuple(candidate)}) if do_add: tally.update({tuple(schedule)}) try: most_common_route = tally.most_common(1)[0][0] route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) except: route.description = '' route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta()) if 'compatibility' in profile and profile['compatibility'].get('fix-destination-times', False): # Fölin datassa on jotain tosi kummaa. Ilmeisesti ajovuoron viimeisen pysähdyksen saapumisaika on ihan täysin # väärin. Arvaan että se on seuraavan lähdön aika, mutta joka tapauksessa se on väärin. # Arvataan mikä se todellinen saapumisaika on. Se ei voi mennä kauhean paljon pahemmin vikaan kuin alkuperäinen # väärin oleva data. for trip in all_trips.values(): if len(trip.schedule) >= 2: bus_speed_coefficient = 750 # metriä minuutissa last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) global trips_by_vehicle_info trips_by_vehicle_info = {} for trip in all_trips.values(): trips_by_vehicle_info[(trip.block_id, trip.schedule[0].arrival_time)] = trip # Add services to all bus stops for route in routes.values(): for trip in route.trips: for halt in trip.schedule: halt.stop.services.add(route.service) if __name__ == '__main__': profile.read('profiles/föli.ini') load_buses('gtfs.zip') import busroute from regions import parse_regions busroute.regions = parse_regions('föli.osm')