diff -r 659ab465152e -r f9788970fa46 buses.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/buses.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,263 @@ + +def old_load_gtfs(gtfs_zip_path): + global viimeinen_käyttöpäivä + from zipfile import ZipFile + with ZipFile(gtfs_zip_path) as gtfs_zip: + with gtfs_zip.open('trips.txt') as file: + for row in read_csv(map(bytes.decode, file)): + if row['service_id'] not in services: + services[row['service_id']] = BusService(row['service_id']) + route = routes_per_id[row['route_id']] + trip = GtfsTrip( + reference = row['trip_id'], + route = route, + service = services[row['service_id']], + length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']), + block_id = row.get('block_id') or row['service_id'], + shape = row.get('shape_id') + ) + route.trips.add(trip) + if trip.name in all_trips: + print('Trip %s already exists' % trip.name) + else: + all_trips[trip.name] = trip + print('%d trips' % len(all_trips), file = stderr) + + def read_date(teksti): + return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) + + def read_time(teksti): + hour, minute, second = map(int, teksti.split(':')) + return timedelta(hours = hour, minutes = minute, seconds = second) + + print('Loading dates... ', file = stderr, flush = True) + viimeinen_käyttöpäivä = date.today() + + def date_range(start_date, end_date, *, include_end = False): + ''' Generates date from start_date to end_date. If include_end is True, then end_date will be yielded. ''' + current_date = start_date + while current_date < end_date: + yield current_date + current_date += timedelta(1) + if include_end: + yield end_date + + def add_day_to_service(service_name, day): + try: + service = services[service_name] + except KeyError: + return + else: + service.dates.add(day) + if day not in services_for_day: + services_for_day[day] = set() + services_for_day[day].add(service) + global viimeinen_käyttöpäivä + viimeinen_käyttöpäivä = max(day, viimeinen_käyttöpäivä) + + def filter_day(row, day): + day_names = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] + return int(row[day_names[day.isoweekday() - 1]]) + + with gtfs_zip.open('calendar.txt') as file: + for row in read_csv(map(bytes.decode, file)): + for day in date_range(read_date(row['start_date']), read_date(row['end_date']), include_end = True): + if filter_day(row, day): + add_day_to_service(service_name = row['service_id'], day = day) + + with gtfs_zip.open('calendar_dates.txt') as file: + for row in read_csv(map(bytes.decode, file)): + add_day_to_service(service_name = row['service_id'], day = read_date(row['date'])) + + def services_available_at(day): + for service in services.values(): + if day in service.dates: + yield service + + print('Loading stops... ', file = stderr, end = '', flush = True) + with gtfs_zip.open('stops.txt') as file: + for row in read_csv(map(bytes.decode, file)): + location = Location(float(row['stop_lat']), float(row['stop_lon'])) + stop = BusStop( + reference = row['stop_id'], + name = row['stop_name'], + location = location, + code = row.get('stop_code', row['stop_id']), + ) + bus_stops[stop.reference] = stop + if profile['regions']['use-regions']: + with open('regions-per-stop.json') as file: + for stop_reference, region in json.load(file).items(): + try: + bus_stops[stop_reference].region = region + except KeyError: + pass + for bus_stop in bus_stops.values(): + if not hasattr(bus_stop, 'region'): + bus_stop.region = None + print('%d stops' % len(bus_stops), file = stderr) + + from collections import defaultdict + bus_stops_by_name = defaultdict(set) + for bus_stop in bus_stops.values(): + bus_stops_by_name[bus_stop.name].add(bus_stop) + bus_stops_by_name = dict(bus_stops_by_name) + + # ryhmittele bus_stops nimen mukaan + global all_clusters + all_clusters = [] + def cluster_gtfs_stops(): + sorted_gtfs_stops = sorted(bus_stops.values(), key = lambda bus_stop: bus_stop.name) + for bus_stop in sorted_gtfs_stops: + if not bus_stop.cluster: + stops_to_cluster = {bus_stop} + # etsi pysäkin samannimiset vastaparit + for pair_candidate in bus_stops_by_name[bus_stop.name]: + distance = pair_candidate.location.distance(bus_stop.location) + if pair_candidate is not bus_stop and distance <= 0.4: + stops_to_cluster.add(pair_candidate) + for stop_to_cluster in stops_to_cluster: + if stop_to_cluster.cluster: + cluster = stop_to_cluster.cluster + break + else: + cluster = BusStopCluster() + all_clusters.append(cluster) + for stop_to_cluster in stops_to_cluster: + if not stop_to_cluster.cluster: + cluster.add_stop(stop_to_cluster) + # Merkitse muistiin pysäkkien vastaparit käyttäen hyväksi tämänhetkistä ryhmittelytietoa + for bus_stop in bus_stops.values(): + if bus_stop.cluster: + bus_stop.pairs = bus_stop.cluster.stops - {bus_stop} + # Ryhmitä ne bus_stops, joilla ei ollut omaa vastaparia, muiden pysäkkien kanssa + for bus_stop in sorted_gtfs_stops: + if len(bus_stop.cluster.stops) == 1: + possibilities = set() + for cluster in all_clusters: + if cluster is not bus_stop.cluster: + distance = cluster.center.distance(bus_stop.location) + if distance <= 0.4: + possibilities.add((distance, cluster)) + if possibilities: + best = min(possibilities)[1] + all_clusters.remove(bus_stop.cluster) + best.merge(bus_stop.cluster) + + def shared_elements_in_n_sets(sets): + from itertools import combinations + result = set() + for pair in combinations(sets, 2): + result |= pair[0] & pair[1] + return result + + def name_clusters(): + from collections import defaultdict + clusters_per_name = defaultdict(set) + for cluster in all_clusters: + name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] + clusters_per_name[name_representing_stop.name].add(cluster) + for name, clusters in clusters_per_name.items(): + if len(clusters) == 1: + # Simple case: this cluster is the only one that wants this name. + next(iter(clusters)).name = name + else: + if profile['regions']['use-regions']: + # Find out if all clusters are in different areas + common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) + # Proposal: cluster -> the areas unique to the cluster + proposal = { + cluster: {stop.region for stop in cluster.stops} - common_regions - {None} + for cluster in clusters + } + # If at most one cluster is without its own unique region, name the others by region and this one without any. + if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: + for cluster, unique_areas in proposal.items(): + individual_cluster_name = name + if unique_areas: + individual_cluster_name += ' (' + min(unique_areas) + ')' + cluster.name = individual_cluster_name + break + # If all else fails, just number them. + for n, (_, cluster) in enumerate(sorted( + min((stop.reference.lower(), cluster) for stop in cluster.stops) + for cluster in clusters + ), 1): + individual_cluster_name = name + '-' + str(n) + cluster.name = individual_cluster_name + + print('Clustering bus stops...') + cluster_gtfs_stops() + name_clusters() + for cluster in all_clusters: + if cluster.url_name in clusters_by_name: + print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) + else: + clusters_by_name[cluster.url_name] = cluster + print('Loading schedules... ', end = '', flush = True, file = stderr) + with gtfs_zip.open('stop_times.txt') as file: + row_count = sum(line.count(b'\n') for line in file) + with gtfs_zip.open('stop_times.txt') as file: + progress = 0 + for row in read_csv(map(bytes.decode, file)): + if int(row.get('pickup_type', '') or '0') and int(row.get('drop_off_type', '') or '0'): + continue + trip = all_trips[transform_trip_reference(row['trip_id'])] + arrival_time = read_time(row['arrival_time']) + departure_time = read_time(row['departure_time']) + stop = bus_stops[row['stop_id']] + traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) + visitnumber = len(trip.schedule) + 1 + trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance, visitnumber)) + stop.involved_trips.add(trip) + progress += 1 + if progress % 1000 == 0: + print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) + print('\rLoading schedules... complete', file = stderr) + for trip in all_trips.values(): + from busroute import simplify_name + schedule = trip.concise_schedule() + try: + trip.from_place = simplify_name(schedule[0]) + trip.to_place = simplify_name(schedule[-1]) + except IndexError: + trip.from_place = '' + trip.to_place = '' + for route in routes.values(): + from collections import Counter + from busroute import simplify_name + tally = Counter() + for trip in route.trips: + schedule = trip.concise_schedule() + places = set(schedule) + do_add = True + assert type(schedule) is list + for candidate in tally: + if places.issubset(set(candidate)): + do_add = False + tally.update({tuple(candidate)}) + if do_add: + tally.update({tuple(schedule)}) + try: + most_common_route = tally.most_common(1)[0][0] + route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) + except: + route.description = '' + route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta()) + if 'compatibility' in profile and profile['compatibility'].get('fix-destination-times', False): + # There seems to be something strange going on in Föli's gtfs data. + # It seems that sometimes the arrival time of the last stop is + # completely off, so try estimate when the bus will really arrive + # there based on the last leg distance. + # I noticed this for bus 220's arrival time at Mylly several years + # ago. Possibly this has been fixed in the data by now? + for trip in all_trips.values(): + if len(trip.schedule) >= 2: + bus_speed_coefficient = 750 # meters per minute + last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance + trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) + # Add services to all bus stops + for route in routes.values(): + for trip in route.trips: + for halt in trip.schedule: + halt.stop.services.add(route.service)