# HG changeset patch # User Teemu Piippo # Date 1537956731 -10800 # Node ID 7a55abeab5fd06d19b19d04901850483238bd186 # Parent 369e242edc5d15b113ad62fb9b4e99cb4737b0c7 refactor diff -r 369e242edc5d -r 7a55abeab5fd compute-regions.py --- a/compute-regions.py Tue Sep 25 22:22:10 2018 +0300 +++ b/compute-regions.py Wed Sep 26 13:12:11 2018 +0300 @@ -4,54 +4,93 @@ from geometry import * from zipfile import ZipFile from configparser import ConfigParser -from regions import parse_regions - -representatives = {} - -regions = parse_regions(sys.argv[2]) -bus_stops = {} -block_factor = 0.005 class Blockmap: - ''' Models a map of blocks. Maps each location to a square area. ''' + ''' + Models a map of blocks. Maps each location to a square area. + ''' + block_factor = 0.005 def __init__(self, default = None): from collections import defaultdict self.blocks = default or defaultdict(set) def __getitem__(self, blockid): - ''' Returns a block for block coordinates. The block is a set that can contain anything. ''' + ''' + Returns a block for block coordinates. The block is a set that can + contain anything. + ''' return self.blocks[blockid] def blockpoint(self, point): - ''' Returns a block point for a location. The block point is a coordinate in the blockmap. ''' - return int(point.x / block_factor), int(point.y / block_factor) + ''' + Returns a block point for a location. The block point is a + coordinate in the blockmap. + ''' + block_coordinate = lambda x: int(x / self.block_factor) + return block_coordinate(point.x), block_coordinate(point.y) + +def minmax(data): + ''' + Computes the minimum and maximum values in one-pass using only + 1.5*len(data) comparisons + ''' + import itertools + it = iter(data) + try: + lo = hi = next(it) + except StopIteration: + raise ValueError('minmax() arg is an empty sequence') + for x, y in itertools.zip_longest(it, it, fillvalue = lo): + if x > y: + x, y = y, x + lo = min(x, lo) + hi = max(y, hi) + return lo, hi def blocks_in_shape(blockmap, shape): - ''' Finds all blocks inside the bounding box of a shape. ''' - min_x = min(point.x for point in shape.points) - min_y = min(point.y for point in shape.points) - max_x = max(point.x for point in shape.points) - max_y = max(point.y for point in shape.points) + ''' + Finds all blocks inside the bounding box of a shape. + ''' + from itertools import product + min_x, max_x = minmax(point.x for point in shape.points) + min_y, max_y = minmax(point.y for point in shape.points) min_blockpoint = blockmap.blockpoint(Location(min_x, min_y)) max_blockpoint = blockmap.blockpoint(Location(max_x, max_y)) - for x in range(min_blockpoint[0], max_blockpoint[0] + 1): - for y in range(min_blockpoint[1], max_blockpoint[1] + 1): - yield blockmap[x, y] + range_x = range(min_blockpoint[0], max_blockpoint[0] + 1) + range_y = range(min_blockpoint[1], max_blockpoint[1] + 1) + yield from (blockmap[x, y] for x, y in product(range_x, range_y)) -def main(): - with ZipFile(sys.argv[1]) as archive: +def location_from_row(row): + return Location(float(row['stop_lat']), float(row['stop_lon'])) + +def read_bus_stops(archive_path): + bus_stops = dict() + with ZipFile(archive_path) as archive: with archive.open('stops.txt') as file: - for row in read_csv(map(bytes.decode, file)): - location = Location(float(row['stop_lat']), float(row['stop_lon'])) - reference = row['stop_id'] - bus_stops[reference] = location - region_shapes = list() - districts = dict() - bus_stop_regions = dict() + return { + row['stop_id']: location_from_row(row) + for row in read_csv(map(bytes.decode, file)) + } + +def create_blockmap(regions): + ''' + Creates a blockmap of regions + ''' blockmap = Blockmap() - # Find all regions for every block. for region in regions.values(): for block in blocks_in_shape(blockmap, region['shape']): set.add(block, region['name']) + return blockmap + +def get_args(): + from argparse import ArgumentParser + parser = ArgumentParser() + parser.add_argument('gtfs_zip') + parser.add_argument('mapfile') + return parser.parse_args() + +def compute_bus_stop_regions(regions, bus_stops): # Find the region every node is in + blockmap = create_blockmap(regions) + bus_stop_regions = dict() for stop_id, stop_position in bus_stops.items(): for region_name in blockmap[blockmap.blockpoint(stop_position)]: region = regions[region_name] @@ -60,9 +99,21 @@ break else: bus_stop_regions[stop_id] = None + return bus_stop_regions + +def percentage(a): + return '%.1f%%' % (a * 100) + +def main(): + from regions import parse_regions + args = get_args() + regions = parse_regions(args.mapfile) + bus_stops = read_bus_stops(args.gtfs_zip) + bus_stop_regions = compute_bus_stop_regions(regions, bus_stops) covered = sum(1 if value else 0 for value in bus_stop_regions.values()) total = len(bus_stops) - print('%.1f%% bus stops covered.' % (covered * 100 / total), file = sys.stderr) + print('%s bus stops covered.' % percentage(covered / total), + file = sys.stderr) json.dump(bus_stop_regions, sys.stdout, indent = 2) if __name__ == '__main__':