# HG changeset patch # User Teemu Piippo # Date 1596055553 -10800 # Node ID f9788970fa46e59341e9f4f016e3b04d2d776bf4 # Parent 659ab465152ea99397bfd5cb5aac799dc6d3fbb0 begin work on bus compiler diff -r 659ab465152e -r f9788970fa46 .hgignore --- a/.hgignore Tue Jul 28 21:51:54 2020 +0300 +++ b/.hgignore Wed Jul 29 23:45:53 2020 +0300 @@ -1,2 +1,4 @@ syntax:glob gtfs.zip +__pycache__ +*.db diff -r 659ab465152e -r f9788970fa46 buses.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/buses.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,263 @@ + +def old_load_gtfs(gtfs_zip_path): + global viimeinen_käyttöpäivä + from zipfile import ZipFile + with ZipFile(gtfs_zip_path) as gtfs_zip: + with gtfs_zip.open('trips.txt') as file: + for row in read_csv(map(bytes.decode, file)): + if row['service_id'] not in services: + services[row['service_id']] = BusService(row['service_id']) + route = routes_per_id[row['route_id']] + trip = GtfsTrip( + reference = row['trip_id'], + route = route, + service = services[row['service_id']], + length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']), + block_id = row.get('block_id') or row['service_id'], + shape = row.get('shape_id') + ) + route.trips.add(trip) + if trip.name in all_trips: + print('Trip %s already exists' % trip.name) + else: + all_trips[trip.name] = trip + print('%d trips' % len(all_trips), file = stderr) + + def read_date(teksti): + return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) + + def read_time(teksti): + hour, minute, second = map(int, teksti.split(':')) + return timedelta(hours = hour, minutes = minute, seconds = second) + + print('Loading dates... ', file = stderr, flush = True) + viimeinen_käyttöpäivä = date.today() + + def date_range(start_date, end_date, *, include_end = False): + ''' Generates date from start_date to end_date. If include_end is True, then end_date will be yielded. ''' + current_date = start_date + while current_date < end_date: + yield current_date + current_date += timedelta(1) + if include_end: + yield end_date + + def add_day_to_service(service_name, day): + try: + service = services[service_name] + except KeyError: + return + else: + service.dates.add(day) + if day not in services_for_day: + services_for_day[day] = set() + services_for_day[day].add(service) + global viimeinen_käyttöpäivä + viimeinen_käyttöpäivä = max(day, viimeinen_käyttöpäivä) + + def filter_day(row, day): + day_names = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] + return int(row[day_names[day.isoweekday() - 1]]) + + with gtfs_zip.open('calendar.txt') as file: + for row in read_csv(map(bytes.decode, file)): + for day in date_range(read_date(row['start_date']), read_date(row['end_date']), include_end = True): + if filter_day(row, day): + add_day_to_service(service_name = row['service_id'], day = day) + + with gtfs_zip.open('calendar_dates.txt') as file: + for row in read_csv(map(bytes.decode, file)): + add_day_to_service(service_name = row['service_id'], day = read_date(row['date'])) + + def services_available_at(day): + for service in services.values(): + if day in service.dates: + yield service + + print('Loading stops... ', file = stderr, end = '', flush = True) + with gtfs_zip.open('stops.txt') as file: + for row in read_csv(map(bytes.decode, file)): + location = Location(float(row['stop_lat']), float(row['stop_lon'])) + stop = BusStop( + reference = row['stop_id'], + name = row['stop_name'], + location = location, + code = row.get('stop_code', row['stop_id']), + ) + bus_stops[stop.reference] = stop + if profile['regions']['use-regions']: + with open('regions-per-stop.json') as file: + for stop_reference, region in json.load(file).items(): + try: + bus_stops[stop_reference].region = region + except KeyError: + pass + for bus_stop in bus_stops.values(): + if not hasattr(bus_stop, 'region'): + bus_stop.region = None + print('%d stops' % len(bus_stops), file = stderr) + + from collections import defaultdict + bus_stops_by_name = defaultdict(set) + for bus_stop in bus_stops.values(): + bus_stops_by_name[bus_stop.name].add(bus_stop) + bus_stops_by_name = dict(bus_stops_by_name) + + # ryhmittele bus_stops nimen mukaan + global all_clusters + all_clusters = [] + def cluster_gtfs_stops(): + sorted_gtfs_stops = sorted(bus_stops.values(), key = lambda bus_stop: bus_stop.name) + for bus_stop in sorted_gtfs_stops: + if not bus_stop.cluster: + stops_to_cluster = {bus_stop} + # etsi pysäkin samannimiset vastaparit + for pair_candidate in bus_stops_by_name[bus_stop.name]: + distance = pair_candidate.location.distance(bus_stop.location) + if pair_candidate is not bus_stop and distance <= 0.4: + stops_to_cluster.add(pair_candidate) + for stop_to_cluster in stops_to_cluster: + if stop_to_cluster.cluster: + cluster = stop_to_cluster.cluster + break + else: + cluster = BusStopCluster() + all_clusters.append(cluster) + for stop_to_cluster in stops_to_cluster: + if not stop_to_cluster.cluster: + cluster.add_stop(stop_to_cluster) + # Merkitse muistiin pysäkkien vastaparit käyttäen hyväksi tämänhetkistä ryhmittelytietoa + for bus_stop in bus_stops.values(): + if bus_stop.cluster: + bus_stop.pairs = bus_stop.cluster.stops - {bus_stop} + # Ryhmitä ne bus_stops, joilla ei ollut omaa vastaparia, muiden pysäkkien kanssa + for bus_stop in sorted_gtfs_stops: + if len(bus_stop.cluster.stops) == 1: + possibilities = set() + for cluster in all_clusters: + if cluster is not bus_stop.cluster: + distance = cluster.center.distance(bus_stop.location) + if distance <= 0.4: + possibilities.add((distance, cluster)) + if possibilities: + best = min(possibilities)[1] + all_clusters.remove(bus_stop.cluster) + best.merge(bus_stop.cluster) + + def shared_elements_in_n_sets(sets): + from itertools import combinations + result = set() + for pair in combinations(sets, 2): + result |= pair[0] & pair[1] + return result + + def name_clusters(): + from collections import defaultdict + clusters_per_name = defaultdict(set) + for cluster in all_clusters: + name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] + clusters_per_name[name_representing_stop.name].add(cluster) + for name, clusters in clusters_per_name.items(): + if len(clusters) == 1: + # Simple case: this cluster is the only one that wants this name. + next(iter(clusters)).name = name + else: + if profile['regions']['use-regions']: + # Find out if all clusters are in different areas + common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) + # Proposal: cluster -> the areas unique to the cluster + proposal = { + cluster: {stop.region for stop in cluster.stops} - common_regions - {None} + for cluster in clusters + } + # If at most one cluster is without its own unique region, name the others by region and this one without any. + if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: + for cluster, unique_areas in proposal.items(): + individual_cluster_name = name + if unique_areas: + individual_cluster_name += ' (' + min(unique_areas) + ')' + cluster.name = individual_cluster_name + break + # If all else fails, just number them. + for n, (_, cluster) in enumerate(sorted( + min((stop.reference.lower(), cluster) for stop in cluster.stops) + for cluster in clusters + ), 1): + individual_cluster_name = name + '-' + str(n) + cluster.name = individual_cluster_name + + print('Clustering bus stops...') + cluster_gtfs_stops() + name_clusters() + for cluster in all_clusters: + if cluster.url_name in clusters_by_name: + print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) + else: + clusters_by_name[cluster.url_name] = cluster + print('Loading schedules... ', end = '', flush = True, file = stderr) + with gtfs_zip.open('stop_times.txt') as file: + row_count = sum(line.count(b'\n') for line in file) + with gtfs_zip.open('stop_times.txt') as file: + progress = 0 + for row in read_csv(map(bytes.decode, file)): + if int(row.get('pickup_type', '') or '0') and int(row.get('drop_off_type', '') or '0'): + continue + trip = all_trips[transform_trip_reference(row['trip_id'])] + arrival_time = read_time(row['arrival_time']) + departure_time = read_time(row['departure_time']) + stop = bus_stops[row['stop_id']] + traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) + visitnumber = len(trip.schedule) + 1 + trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance, visitnumber)) + stop.involved_trips.add(trip) + progress += 1 + if progress % 1000 == 0: + print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) + print('\rLoading schedules... complete', file = stderr) + for trip in all_trips.values(): + from busroute import simplify_name + schedule = trip.concise_schedule() + try: + trip.from_place = simplify_name(schedule[0]) + trip.to_place = simplify_name(schedule[-1]) + except IndexError: + trip.from_place = '' + trip.to_place = '' + for route in routes.values(): + from collections import Counter + from busroute import simplify_name + tally = Counter() + for trip in route.trips: + schedule = trip.concise_schedule() + places = set(schedule) + do_add = True + assert type(schedule) is list + for candidate in tally: + if places.issubset(set(candidate)): + do_add = False + tally.update({tuple(candidate)}) + if do_add: + tally.update({tuple(schedule)}) + try: + most_common_route = tally.most_common(1)[0][0] + route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) + except: + route.description = '' + route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta()) + if 'compatibility' in profile and profile['compatibility'].get('fix-destination-times', False): + # There seems to be something strange going on in Föli's gtfs data. + # It seems that sometimes the arrival time of the last stop is + # completely off, so try estimate when the bus will really arrive + # there based on the last leg distance. + # I noticed this for bus 220's arrival time at Mylly several years + # ago. Possibly this has been fixed in the data by now? + for trip in all_trips.values(): + if len(trip.schedule) >= 2: + bus_speed_coefficient = 750 # meters per minute + last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance + trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) + # Add services to all bus stops + for route in routes.values(): + for trip in route.trips: + for halt in trip.schedule: + halt.stop.services.add(route.service) diff -r 659ab465152e -r f9788970fa46 busroute.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/busroute.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +def via_factor(region_name, *, regions): + # how important is it that + if region_name in regions: + return float(regions[region_name]['via_factor']) + else: + return 0 + +def simplify_name(region_name, *, regions, replace = False): + # take short_name to account + region = regions.get(region_name) + if region: + return region.get('short_name', region_name) + else: + return region_name + +def destinations_list( + itinerary, + *, trip_length, + regions, + whole = False, + format = 'medium', +): + ''' + Produces a sign of destinations for the given itinerary. + `itinerary`: list of region names passed through + `trip_length`: length of the itinerary in meters. + `regions`: the regions table, used to decide what is important to show. + `whole`: whether or not the starting place is also included. + `format` controls what kind of sign to produce: + - 'short': at most 2 destinations, with reducing + - 'medium': at most 3 destinations, with reducing + - 'long': at most 4 destinations, no reducing. + Returns a list of region names. + e.g. ['Turun keskusta', 'Ihala', 'Kauppakeskus Mylly'] + for Föli bus route 220 at the student village. + ''' + # prefer longer destination signs on longer routes + length = ((trip_length / 600) * 3 + len(itinerary) * 2) / 5 + # collect regions along the itinerary + have_already = set() + i = 0 + if not itinerary: + # not going anywhere? + return '' + while i < len(itinerary): + region = regions.get(itinerary[i]) + if not itinerary[i] or itinerary[i] in have_already: + del itinerary[i] + else: + have_already.add(itinerary[i]) + i += 1 + from_place = itinerary[0] + destination = itinerary[-1] + route_weights = {} + # create weights for all places along the way. Transforming by x^-0.3 + # lessens weights for places further out in the itinerary. + f = lambda i: i**-0.3 + # this factor scales the weights so that they become comparable against + # constant values + factor = 1 / max(f(i + 1) for i in range(len(itinerary))) + while via_factor(itinerary[-1], regions = regions) < 0: + del itinerary[-1] + if not itinerary: + return '' + destination = itinerary[-1] + for i, stop in enumerate(itinerary): + # transform index by: + # - our gradually decreasing x^-0.3 curve, + # - our normalising factor, + # - and the via_factor of the stop + route_weights[stop] = f(i + 1) * factor * via_factor(stop, regions = regions) + # ensure that the starting region does not make it into the destinations + # sign by setting its weight to 0 + if from_place in route_weights: + route_weights[from_place] = 0 + # ensure that the destination does make it to the signpost + route_weights[destination] = 1.0e+10 + # sort destinations by importance + weights = sorted( + [ + (stop, route_weights[stop], i) + for i, stop in enumerate(itinerary) + if route_weights[stop] >= 1 + ], key = lambda stop: -stop[1]) + # now consider what do we want to display: + if format == 'long': + # long format, just take at most four destinations + weights = weights[:4] + elif format == 'short': + # short format, take at most two destinations + weights = weights[:2] + # possibly drop the via-region as well + try: + if weights[1][0] != destination and weights[1][1] < (500 / length ** 1.15): + del weights[1] + except IndexError: + pass + elif format == 'medium': + # regular format, at most three destinations + weights = weights[:3] + # if the third sign value is not significant enough, drop it + try: + if weights[2][0] != destination and weights[2][1] < (725 / length ** 0.8): + del weights[2] + except IndexError: + pass + # and repeat for the second sign value + try: + if weights[1][0] != destination and weights[1][1] < (500 / length ** 1.15): + del weights[1] + except IndexError: + pass + else: + raise ValueError(str.format('Unknown format {format}', format = repr(format))) + # restore the signpost back to itinerary order + weights.sort(key = lambda weight_data: weight_data[2]) + # form the sign + sign = [paino[0] for paino in weights] + old_sign = sign.copy() + sign = [] + for place in old_sign: + if place not in sign: + sign.append(place) + if whole: + # whole format, also include the starting point + sign = [from_place] + sign + if not sign: + sign = [destination] + return sign diff -r 659ab465152e -r f9788970fa46 compute_regions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compute_regions.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +import sys, json +from misc import * +from geometry import * +from configparser import ConfigParser + +class Blockmap: + ''' + The blockmap is a grid of latitude and longitude lines and models + a block -> set relation. A block is a latitude and longitude square. + ''' + block_size = 200.0 # How big are blocks? + def __init__(self, blocks = None): + from collections import defaultdict + self.blocks = blocks or defaultdict(set) + def __getitem__(self, blockid): + ''' + Returns a block for block coordinates. The block is a set that can + contain anything. + ''' + return self.blocks[blockid] + def blockpoint(self, point): + ''' + Returns blockmap coordinates for geographical coordinates. + The blockmap coordinates refer to a block in the blockmap. + ''' + block_coordinate = lambda x: int(x * self.block_size) + return block_coordinate(point.x), block_coordinate(point.y) + +def blocks_in_shape(blockmap, shape): + ''' + Finds all blocks inside the bounding box of a shape. + ''' + from itertools import product + min_x, max_x = minmax(point.x for point in shape.points) + min_y, max_y = minmax(point.y for point in shape.points) + min_blockpoint = blockmap.blockpoint(Location(min_x, min_y)) + max_blockpoint = blockmap.blockpoint(Location(max_x, max_y)) + range_x = range(min_blockpoint[0], max_blockpoint[0] + 1) + range_y = range(min_blockpoint[1], max_blockpoint[1] + 1) + yield from (blockmap[x, y] for x, y in product(range_x, range_y)) + +def create_blockmap(regions): + ''' + Creates a blockmap of regions + ''' + blockmap = Blockmap() + for region in regions.values(): + # Minor shapes contain major shapes, so just use those + for shape in (region['minor_shapes'] or region['major_shapes']): + for block in blocks_in_shape(blockmap, shape): + set.add(block, region['name']) + return blockmap + +def get_args(): + from argparse import ArgumentParser + parser = ArgumentParser() + parser.add_argument('gtfs_zip') + parser.add_argument('profile') + return parser.parse_args() + +def test_shapes(shapes, point): + return any(shape.contains_point(point) for shape in shapes) + +class RegionalLocation: + def __init__(self, *, region, region_class): + self.region, self.region_class = region, region_class + def __repr__(self): + return str.format( + 'RegionalLocation(region = {region}, region_class = {region_class})', + region = repr(self.region), + region_class = repr(self.region_class), + ) + +def locate_regionally(position, region): + if test_shapes(region['major_shapes'], position): + return RegionalLocation(region = region['name'], region_class = 'major') + elif test_shapes(region['minor_shapes'], position): + return RegionalLocation(region = region['name'], region_class = 'minor') + else: + return None + +def find_region_for_point(position, regions, blockmap): + for region_name in blockmap[blockmap.blockpoint(position)]: + region = regions[region_name] + classification = locate_regionally(position, region) + if classification: + return classification + +class RegionTester: + def __init__(self, regions): + self.regions = regions + self.blockmap = create_blockmap(regions) + def __call__(self, latitude, longitude): + return find_region_for_point( + position = Location(float(latitude), float(longitude)), + regions = self.regions, + blockmap = self.blockmap, + ) diff -r 659ab465152e -r f9788970fa46 datamodel.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datamodel.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,55 @@ +import sqlalchemy.ext.declarative +GtfsBase = sqlalchemy.ext.declarative.declarative_base(name = 'GtfsBase') + +class GtfsRoute(GtfsBase): + __tablename__ = 'routes' + id = sqlalchemy.Column(sqlalchemy.String, primary_key = True) + reference = sqlalchemy.Column(sqlalchemy.String) + description = sqlalchemy.Column(sqlalchemy.String) + type = sqlalchemy.Column(sqlalchemy.Integer) + +class GtfsShape(GtfsBase): + __tablename__ = 'shapes' + id = sqlalchemy.Column(sqlalchemy.String, primary_key = True) + length = sqlalchemy.Column(sqlalchemy.Numeric, default = 0) + shape_coordinates = sqlalchemy.Column(sqlalchemy.String) + +class GtfsTrip(GtfsBase): + __tablename__ = 'trips' + id = sqlalchemy.Column(sqlalchemy.String, primary_key = True) + route_id = sqlalchemy.Column(sqlalchemy.String, sqlalchemy.ForeignKey(GtfsRoute.id)) + service = sqlalchemy.Column(sqlalchemy.String) + shape_id = sqlalchemy.Column(sqlalchemy.String, sqlalchemy.ForeignKey(GtfsShape.id)) + route = sqlalchemy.orm.relationship('GtfsRoute', foreign_keys = 'GtfsTrip.route_id') + shape = sqlalchemy.orm.relationship('GtfsShape', foreign_keys = 'GtfsTrip.shape_id') + +class GtfsService(GtfsBase): + __tablename__ = 'services' + id = sqlalchemy.Column(sqlalchemy.String, primary_key = True) + +class GtfsStop(GtfsBase): + __tablename__ = 'stops' + stop_id = sqlalchemy.Column(sqlalchemy.String, primary_key = True) + stop_name = sqlalchemy.Column(sqlalchemy.String) + stop_latitude = sqlalchemy.Column(sqlalchemy.Numeric) + stop_longitude = sqlalchemy.Column(sqlalchemy.Numeric) + stop_region = sqlalchemy.Column(sqlalchemy.String) + stop_region_major = sqlalchemy.Column(sqlalchemy.Boolean) + +class GtfsRegion(GtfsBase): + # Not a gtfs data set, but the Gtfs prefix added for consistency + __tablename__ = 'regions' + region_name = sqlalchemy.Column(sqlalchemy.String, primary_key = True) + region_name_sv = sqlalchemy.Column(sqlalchemy.String) + region_name_en = sqlalchemy.Column(sqlalchemy.String) + region_name_ja = sqlalchemy.Column(sqlalchemy.String) + region_short_name = sqlalchemy.Column(sqlalchemy.String) + region_short_name_sv = sqlalchemy.Column(sqlalchemy.String) + region_short_name_en = sqlalchemy.Column(sqlalchemy.String) + region_short_name_ja = sqlalchemy.Column(sqlalchemy.String) + region_internal_name = sqlalchemy.Column(sqlalchemy.String) + region_internal_name_sv = sqlalchemy.Column(sqlalchemy.String) + region_internal_name_en = sqlalchemy.Column(sqlalchemy.String) + region_internal_name_ja = sqlalchemy.Column(sqlalchemy.String) + municipality = sqlalchemy.Column(sqlalchemy.String, nullable = False) + external = sqlalchemy.Column(sqlalchemy.Boolean) diff -r 659ab465152e -r f9788970fa46 föli.ini --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/föli.ini Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,13 @@ +[metrics] +shape-modifier = 1 + +[compatibility] +fix-destination-times = 1 + +[regions] +use-regions = 1 +osm-path = föli.osm + +[date-exceptions] +1.5. = sunday +30.4. = weekday diff -r 659ab465152e -r f9788970fa46 föli.osm --- a/föli.osm Tue Jul 28 21:51:54 2020 +0300 +++ b/föli.osm Wed Jul 29 23:45:53 2020 +0300 @@ -254,12 +254,6 @@ - - - - - - @@ -933,7 +927,7 @@ - + @@ -959,8 +953,8 @@ - - + + @@ -1180,11 +1174,10 @@ - - - - - + + + + @@ -1395,8 +1388,7 @@ - - + @@ -1493,10 +1485,6 @@ - - - - @@ -1515,7 +1503,7 @@ - + @@ -2594,7 +2582,6 @@ - @@ -2676,13 +2663,10 @@ - - - + - - + @@ -2951,6 +2935,17 @@ + + + + + + + + + + + @@ -3615,23 +3610,13 @@ - + + - - - - - - - - - - - @@ -4868,14 +4853,17 @@ + + + @@ -5655,7 +5643,7 @@ - + @@ -5734,8 +5722,11 @@ + + + - + @@ -5786,11 +5777,11 @@ - + - + @@ -6265,7 +6256,7 @@ - + @@ -6294,9 +6285,7 @@ - - @@ -6517,16 +6506,6 @@ - - - - - - - - - - @@ -7299,7 +7278,6 @@ - @@ -7966,7 +7944,6 @@ - @@ -8032,6 +8009,8 @@ + + @@ -8473,7 +8452,7 @@ - + @@ -8528,6 +8507,7 @@ + @@ -8550,6 +8530,7 @@ + @@ -8822,7 +8803,7 @@ - + @@ -8938,8 +8919,6 @@ - - @@ -8959,6 +8938,7 @@ + @@ -9227,6 +9207,7 @@ + @@ -9349,4 +9330,17 @@ + + + + + + + + + + + + + diff -r 659ab465152e -r f9788970fa46 geometry.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/geometry.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,124 @@ +from math import sqrt, hypot, radians, sin, cos, atan2 + +class Location: + def __init__(self, latitude, longitude): + self.latitude, self.longitude = latitude, longitude + def distance(self, other): + # https://stackoverflow.com/a/365853 + a = sin(radians(self.latitude - other.latitude) / 2) ** 2 + a += sin(radians(self.longitude - other.longitude) / 2) ** 2 * cos(radians(self.latitude)) * cos(radians(other.latitude)) + return 6371 * 2 * atan2(sqrt(a), sqrt(1 - a)) + def __repr__(self): + return '%s(%r, %r)' % (type(self).__name__, self.latitude, self.longitude) + def __str__(self): + return '%.5f, %.5f' % (self.latitude, self.longitude) + @property + def x(self): + return self.latitude + @property + def y(self): + return self.longitude + @property + def link_to_map(self): + return 'http://www.openstreetmap.org/#map=19/%f/%f' % (self.latitude, self.longitude) + +class Ring: + def __init__(self, container): + self.container = container + def __getitem__(self, i): + while i < 0: + i += len(self.container) + while i >= len(self.container): + i -= len(self.container) + return self.container[i] + def __iter__(self): + return iter(self.container) + def __len__(self): + return len(self.container) + +class Polygon: + def __init__(self, *points): + self.points = points + def __repr__(self): + return '%s(%s)' % (type(self).__name__, ', '.join(map(repr, self.points))) + def area(self): + ring = Ring(self.points) + return sum( + ring[i].x * ring[i + 1].y - ring[i + 1].x * ring[i].y + for i in range(len(ring)) + ) / 2 + def circumference(self): + ring = Ring(self.points) + return sum( + sqrt((ring[i + 1].x - ring[i].x)**2 + (ring[i + 1].y - ring[i].y)**2) + for i in range(len(ring)) + ) + def centroid(self): + ring = Ring(self.points) + x = sum( + (ring[i].x + ring[i + 1].x) * (ring[i].x * ring[i + 1].y - ring[i + 1].x * ring[i].y) + for i in range(len(ring)) + ) / 6 / self.area() + y = sum( + (ring[i].y + ring[i + 1].y) * (ring[i].x * ring[i + 1].y - ring[i + 1].x * ring[i].y) + for i in range(len(ring)) + ) / 6 / self.area() + return self.point_type()(x, y) + def point_type(self): + if len(self.points): + return type(self.points[0]) + else: + return Point + def segments(self): + ring = Ring(self.points) + for i in range(len(ring)): + yield LineSegment(ring[i], ring[i + 1]) + def contains_point(self, point): + outer_point = self.point_type()( + min(point.x for point in self.points) - 1, + min(point.y for point in self.points) - 1 + ) + outer_segment = LineSegment(point, outer_point) + intersections = 0 + for segment in self.segments(): + if segment.intersection(outer_segment) is not None: + intersections += 1 + return bool(intersections & 1) + +class LineSegment: + def __init__(self, p1, p2): + self.p1, self.p2 = p1, p2 + def __repr__(self): + return 'LineSegment(%r, %r)' % (self.p1, self.p2) + def length(self): + return hypot(self.p1.x - self.p2.x, self.p1.y - self.p2.y) + def intersection(self, other): + point_type = type(self.p1) + x = (self.p1.x, self.p2.x, other.p1.x, other.p2.x) + y = (self.p1.y, self.p2.y, other.p1.y, other.p2.y) + try: + denominator = (x[0] - x[1]) * (y[2] - y[3]) - (y[0] - y[1]) * (x[2] - x[3]) + Px = ((x[0] * y[1] - y[0] * x[1]) * (x[2] - x[3]) - (x[0] - x[1]) * (x[2] * y[3] - y[2] * x[3])) / denominator + Py = ((x[0] * y[1] - y[0] * x[1]) * (y[2] - y[3]) - (y[0] - y[1]) * (x[2] * y[3] - y[2] * x[3])) / denominator + distance = lambda n: hypot(Px - x[n], Py - y[n]) + if max(distance(0), distance(1)) <= self.length() and max(distance(2), distance(3)) <= other.length(): + return point_type(Px, Py) + else: + return None + except ZeroDivisionError: + return None + +class Point: + def __init__(self, x, y): + self.x, self.y = x, y + def __repr__(self): + return 'Point(%r, %r)' % (self.x, self.y) + +A = Polygon( + Point(2,3), + Point(1,1), + Point(4,0), + Point(6,2), + Point(4,4)) +L1 = LineSegment(Point(1, 1), Point(-1, 5)) +L2 = LineSegment(Point(1, 5), Point(5, 1)) diff -r 659ab465152e -r f9788970fa46 gtfsc.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gtfsc.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +import io +import sys +import sqlalchemy +import sqlalchemy.orm +from datamodel import * + +ROUTE_TYPES = { + '0': 'tram', + '1': 'subway', + '2': 'rail', + '3': 'bus', + '4': 'ferry', + '5': 'cable-tram', + '6': 'aerial-lift', + '7': 'funicular', + '11': 'trolleybus', + '12': 'monorail', +} + +def read_csv(file): + import csv + reader = csv.reader(file) + keys = next(reader) + for i in range(len(keys)): + keys[i] = keys[i].replace('\ufeff', '').strip() + for row in reader: + yield dict(zip(keys, row)) + +def load_gtfs_routes(gtfs_zip): + with gtfs_zip.open('routes.txt') as file: + for row in read_csv(map(bytes.decode, file)): + route = GtfsRoute( + id = row['route_id'], + reference = row['route_short_name'], + description = row['route_long_name'], + type = int(row['route_type']), + ) + yield route.id, route + +def load_shapes(gtfs_zip): + from collections import defaultdict + shapes = dict() + with gtfs_zip.open('shapes.txt') as file: + for row in read_csv(map(bytes.decode, file)): + shape_id = row['shape_id'] + if shape_id not in shapes: + shapes[shape_id] = GtfsShape( + id = shape_id, + shape_coordinates = '', + length = 0, + ) + shape = shapes[shape_id] + if len(shape.shape_coordinates) > 0: + shape.shape_coordinates += ' ' + shape.shape_coordinates += str.format( + '{shape_pt_lat} {shape_pt_lon}', + **row, + ) + shape.length = max(shape.length, float(row['shape_dist_traveled'])) + return shapes.values() + +def trip_length(trip, *, shapes): + if trip.shape_id: + return dict.get(shapes, trip.shape_id).length * float(profile['metrics']['shape-modifier']) + else: + return 0 + +def load_trips(gtfs_zip): + services = set() + with gtfs_zip.open('trips.txt') as file: + for row in read_csv(map(bytes.decode, file)): + if row['service_id'] not in services: + set.add(services, row['service_id']) + yield GtfsService(id = row['service_id']) + yield GtfsTrip( + id = row['trip_id'], + route_id = row['route_id'], + service = row['service_id'], + shape_id = dict.get(row, 'shape_id') + ) + +def load_stops(gtfs_zip): + with gtfs_zip.open('stops.txt') as file: + for row in read_csv(map(bytes.decode, file)): + lat = float(row['stop_lat']) + lon = float(row['stop_lon']) + yield GtfsStop( + stop_id = row['stop_id'], + stop_name = row['stop_name'], + stop_latitude = lat, + stop_longitude = float(row['stop_lon']), + ) + +def gtfs_stop_spatial_testing(session, regions): + print('Finding out in which regions bus stops are...') + from compute_regions import RegionTester + regiontester = RegionTester(regions) + for bus_stop in session.query(GtfsStop): + classification = regiontester( + latitude = bus_stop.stop_latitude, + longitude = bus_stop.stop_longitude, + ) + if classification: + bus_stop.stop_region = classification.region + bus_stop.stop_region_major = classification.region_class == 'major' + +def load_with_loading_text(fn, what, device): + print( + str.format('Loading {}s... ', what), + file = device, + end = '', + flush = True, + ) + result = fn() + print( + str.format( + '{n} {what}s', + n = len(result if type(result) is not tuple else result[0]), + what = what, + ), + file = device, + ) + return result + +def load_gtfs( + gtfs_zip_path, + *, + profile, + session, + device = sys.stderr +): + from zipfile import ZipFile + with ZipFile(gtfs_zip_path) as gtfs_zip: + print('Loading routes...') + for route_id, route in load_gtfs_routes(gtfs_zip): + session.add(route) + print('Loading stops...') + for stop in load_stops(gtfs_zip): + session.add(stop) + print('Loading shapes...') + for shape in load_shapes(gtfs_zip): + session.add(shape) + print('Loading trips...') + for trip_or_service in load_trips(gtfs_zip): + session.add(trip_or_service) + +def parse_yesno(value): + return value and value != 'no' + +def regions_to_db(regions): + from itertools import product + for region in regions.values(): + names = dict() + for prefix, language in product( + ['', 'short_', 'internal_'], + ['', ':sv', ':en', ':ja'], + ): + key = 'region_' + prefix + 'name' + str.replace(language, ':', '_') + value = dict.get(region, prefix + 'name' + language) + names[key] = value + yield GtfsRegion( + **names, + municipality = dict.get(region, 'municipality'), + external = parse_yesno(dict.get(region, 'external')), + ) + +if __name__ == '__main__': + import sys + from configparser import ConfigParser + from regions import parse_regions + profile = ConfigParser() + profile.read('föli.ini') + engine = sqlalchemy.create_engine('sqlite:///gtfs.db') + GtfsBase.metadata.create_all(engine) + session = sqlalchemy.orm.sessionmaker(bind = engine)() + regions = parse_regions('föli.osm') + for region in regions_to_db(regions): + session.add(region) + session.commit() + buses = load_gtfs('gtfs.zip', profile = profile, session = session) + gtfs_stop_spatial_testing(session = session, regions = regions) + print('Committing to database...') + session.commit() diff -r 659ab465152e -r f9788970fa46 katakana.py --- a/katakana.py Tue Jul 28 21:51:54 2020 +0300 +++ b/katakana.py Wed Jul 29 23:45:53 2020 +0300 @@ -153,14 +153,13 @@ katakana[latin[0] + latin] = 'ッ' + katakana[latin] # add long vowel versions for latin in copy(list(katakana.keys())): - katakana[latin + latin[-1]] = katakana[latin] + 'ー' + if latin != 'n': + katakana[latin + latin[-1]] = katakana[latin] + 'ー' return katakana def katakana_keys(kana_table): return sorted(kana_table.keys(), key = len)[::-1] -katakana_table = full_katakana_table(RAW_KATAKANA_TABLE) - def finnish_to_romaji(finnish): # translates finnish text to Japanese romaji # does not, however, fill in 'u' vowels to consonants, that is done @@ -175,6 +174,8 @@ .replace('l', 'r') .replace('ä', 'a') .replace('ö', 'o') + .replace('x', 'ks') + .replace('c', 'k') .replace('å', 'oo')) def splice_romaji(romaji, keys): diff -r 659ab465152e -r f9788970fa46 misc.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,21 @@ +from configparser import ConfigParser +profile = ConfigParser() + +def minmax(data): + ''' + From: http://code.activestate.com/recipes/577916-fast-minmax-function/ + Computes the minimum and maximum values in one-pass using only + 1.5*len(data) comparisons + ''' + import itertools + it = iter(data) + try: + lo = hi = next(it) + except StopIteration: + raise ValueError('minmax() arg is an empty sequence') + for x, y in itertools.zip_longest(it, it, fillvalue = lo): + if x > y: + x, y = y, x + lo = min(x, lo) + hi = max(y, hi) + return lo, hi diff -r 659ab465152e -r f9788970fa46 regions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regions.py Wed Jul 29 23:45:53 2020 +0300 @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +from xml.etree import ElementTree +from geometry import * + +REGION_TYPES = ['major', 'minor'] +REGION_KEY_VALUES = [x + '_region' for x in REGION_TYPES] +SHAPE_KEYS = [x + '_shapes' for x in REGION_TYPES] + +def parse_nodes(root): + nodes = {} + for child in root: + if child.tag == 'node': + lat, lon = float(child.attrib['lat']), float(child.attrib['lon']) + nodes[child.attrib['id']] = Location(lat, lon) + return nodes + +def parse_way(way, nodes): + def choose_shapes(way, boundary): + return (way['major_shapes'] + if boundary == 'major_region' + else way['minor_shapes']) + result = {'minor_shapes': [], 'major_shapes': []} + shape = [] + for child in way: + if child.tag == 'nd': + shape.append(child.attrib['ref']) + elif child.tag == 'tag': + key = child.attrib['k'] + if key in SHAPE_KEYS: + raise ValueError(str.format('tag "{}" is not allowed', key)) + result[key] = child.attrib['v'] + if key == 'boundary' and result['boundary'] not in REGION_KEY_VALUES: + return None # we're not interested in it! + if shape[-1] != shape[0]: + raise ValueError('polygon is not closed: %r' % result) + if 'boundary' not in result: + raise ValueError('polygon not tagged as a boundary: %r' % result) + shape = [nodes[ref] for ref in shape[:-1]] + choose_shapes(result, result['boundary']).append(Polygon(*shape)) + return result + +def parse_boundaries(root, *, nodes): + for child in root: + if child.tag == 'way': + way = parse_way(child, nodes = nodes) + if way: + yield way + +def parse_regions(filename): + from katakana import transliterate as transliterate_katakana + tree = ElementTree.parse(filename) + root = tree.getroot() + nodes = parse_nodes(root) + regions = dict() + extra_shapes = list() + for way in parse_boundaries(root, nodes = nodes): + if 'boundary' in way and way['boundary'] != 'subregion' and 'name' in way: + # defines a region + way['via_factor'] = int(way.get('via_factor', 1)) + if way['name'] in regions: + raise ValueError(str.format( + 'Region {name} defined twice', + name = repr(way['name']), + )) + regions[way['name']] = way + del way['boundary'] + if 'external' in way: + way['boundary'] = 'minor_region' + for prefix in ['', 'short_', 'internal_']: + name_key = prefix + 'name' + if name_key in way and way[name_key] and name_key + ':ja' not in way: + way[name_key + ':ja'] = transliterate_katakana(way[name_key]) + elif 'boundary' in way and 'is_in' in way: + # adds an extra shape to an existing region + extra_shapes.append(way) + for extra_shape in extra_shapes: + name = extra_shape['is_in'] + try: + region = regions[name] + except KeyError: + raise ValueError(str.format( + 'Extra shape refers to {name} which was not found: {extra_shape}', + name = repr(name), + extra_shape = repr(extra_shape), + )) + for key in SHAPE_KEYS: + region[key].extend(extra_shape[key]) + return regions