--- a/buses.py Thu Mar 29 23:55:36 2018 +0300 +++ b/buses.py Fri Apr 13 17:32:40 2018 +0300 @@ -173,20 +173,18 @@ clusters_by_name = {} services_for_day = {} -def load_buses(gtfs_zip_path): +def load_buses(gtfs_zip_path, regions): global viimeinen_käyttöpäivä from zipfile import ZipFile with ZipFile(gtfs_zip_path) as gtfs_zip: - print('Ladataan linjat... ', file = stderr, end = '', flush = True) + print('Loading routes... ', file = stderr, end = '', flush = True) with gtfs_zip.open('routes.txt') as file: for row in read_csv(map(bytes.decode, file)): route = BusRoute(row) routes[route.reference] = route routes_per_id[route.id] = route - print('%d linjaa' % len(routes), file = stderr) - - print('Ladataan ajovuorot... ', file = stderr, end = '', flush = True) - + print('%d routes' % len(routes), file = stderr) + print('Loading trips... ', file = stderr, end = '', flush = True) shape_distances = {} try: with gtfs_zip.open('shapes.txt') as file: @@ -212,14 +210,14 @@ print('Trip %s already exists' % trip.name) else: all_trips[trip.name] = trip - print('%d ajoa' % len(all_trips), file = stderr) + print('%d trips' % len(all_trips), file = stderr) def read_date(teksti): return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) def read_time(teksti): - tunti, minuutti, sekunti = map(int, teksti.split(':')) - return timedelta(hours = tunti, minutes = minuutti, seconds = sekunti) + hour, minute, second = map(int, teksti.split(':')) + return timedelta(hours = hour, minutes = minute, seconds = second) print('Ladataan päiväykset... ', file = stderr, flush = True) @@ -266,10 +264,10 @@ if day in service.dates: yield service - print('Ladataan pysäkit... ', file = stderr, end = '', flush = True) + print('Loading stops... ', file = stderr, end = '', flush = True) with gtfs_zip.open('stops.txt') as file: for row in read_csv(map(bytes.decode, file)): - location = Sijainti(float(row['stop_lat']), float(row['stop_lon'])) + location = Location(float(row['stop_lat']), float(row['stop_lon'])) stop = BusStop( reference = row['stop_id'], name = row['stop_name'], @@ -287,13 +285,12 @@ for bus_stop in bus_stops.values(): if not hasattr(bus_stop, 'region'): bus_stop.region = None - print('%d pysäkkiä' % len(bus_stops), file = stderr) - + print('%d stops' % len(bus_stops), file = stderr) class BusStopCluster: def __init__(self): self.stops = set() - self._center = None + self.cached_center = None self.name = None @property def url_name(self): @@ -302,26 +299,26 @@ assert not stop.cluster stop.cluster = self self.stops.add(stop) - self._center = None + self.cached_center = None @property def center(self): - if not self._center: + if not self.cached_center: if self.stops: from statistics import median pointtype = type(next(iter(self.stops)).location) - self._center = pointtype( + self.cached_center = pointtype( median(stop.location.x for stop in self.stops), median(stop.location.y for stop in self.stops), ) else: raise ValueError('an empty cluster has no center point') - return self._center + return self.cached_center def merge(self, other): for bus_stop in other.stops: bus_stop.cluster = self self.stops |= other.stops other.stops = set() - other._center = None + other.cached_center = None def schedule(self, *, max_amount = 50): result = [] for stop in self.stops: @@ -345,7 +342,7 @@ stops_to_cluster = {bus_stop} # etsi pysäkin samannimiset vastaparit for pair_candidate in bus_stops_by_name[bus_stop.name]: - distance = pair_candidate.location.etäisyys(bus_stop.location) + distance = pair_candidate.location.distance(bus_stop.location) if pair_candidate is not bus_stop and distance <= 0.4: stops_to_cluster.add(pair_candidate) for stop_to_cluster in stops_to_cluster: @@ -368,7 +365,7 @@ possibilities = set() for cluster in all_clusters: if cluster is not bus_stop.cluster: - distance = cluster.center.etäisyys(bus_stop.location) + distance = cluster.center.distance(bus_stop.location) if distance <= 0.4: possibilities.add((distance, cluster)) if possibilities: @@ -385,27 +382,24 @@ def name_clusters(): from collections import defaultdict - from pprint import pprint clusters_per_name = defaultdict(set) for cluster in all_clusters: name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] clusters_per_name[name_representing_stop.name].add(cluster) for name, clusters in clusters_per_name.items(): if len(clusters) == 1: - # Ryhmä on ainoa jolla on varaus tälle nimelle. Sen kuin vaan. + # Simple case: this cluster is the only one that wants this name. next(iter(clusters)).name = name else: if profile['regions']['use-regions']: - # Olisiko kaikki klusterit eri alueilla? + # Find out if all clusters are in different areas common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) - # Esitys: ryhmä -> ne alueet jotka ovat tälle ryhmälle ainutlaatuisia + # Proposal: cluster -> the areas unique to the cluster proposal = { cluster: {stop.region for stop in cluster.stops} - common_regions - {None} for cluster in clusters } - # Jos enintään yksi klusteri tässä esityksessä on kokonaan ilman omaa aluetta, jolla se voisi eritellä, - # niin nimetään klusterit näiden alueiden mukaan. - # Se klusteri jolla ei ole omaa aluetta (jos on) jätetään ilman aluepäätettä. + # If at most one cluster is without its own unique region, name the others by region and this one without any. if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: for cluster, unique_areas in proposal.items(): individual_cluster_name = name @@ -413,7 +407,7 @@ individual_cluster_name += ' (' + min(unique_areas) + ')' cluster.name = individual_cluster_name break - # Typerä reunatapaus. Indeksoidaan numeroin... + # If all else fails, just number them. for n, (_, cluster) in enumerate(sorted( min((stop.reference.lower(), cluster) for stop in cluster.stops) for cluster in clusters @@ -421,7 +415,7 @@ individual_cluster_name = name + '-' + str(n) cluster.name = individual_cluster_name - print('Ryhmitellään pysäkit...') + print('Clustering bus stops...') cluster_bus_stops() name_clusters() @@ -431,7 +425,7 @@ else: clusters_by_name[cluster.url_name] = cluster - print('Ladataan aikataulut... ', end = '', flush = True, file = stderr) + print('Loading schedules... ', end = '', flush = True, file = stderr) with gtfs_zip.open('stop_times.txt') as file: row_count = sum(line.count(b'\n') for line in file) with gtfs_zip.open('stop_times.txt') as file: @@ -448,8 +442,8 @@ stop.involved_trips.add(trip) progress += 1 if progress % 1000 == 0: - print('\rLadataan aikataulut... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) - print('\rLadataan aikataulut... ladattu', file = stderr) + print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) + print('\rLoading schedules... complete', file = stderr) for trip in all_trips.values(): from busroute import simplify_name @@ -502,3 +496,6 @@ if __name__ == '__main__': profile.read('profiles/föli.ini') load_buses('gtfs.zip') + import busroute + from regions import parse_regions + busroute.regions = parse_regions('föli.osm')