Fri, 05 Feb 2021 12:16:29 +0200
update
#!/usr/bin/env python3 from xml.etree import ElementTree from geometry import * REGION_TYPES = ['major', 'minor'] REGION_KEY_VALUES = [x + '_region' for x in REGION_TYPES] SHAPE_KEYS = [x + '_shapes' for x in REGION_TYPES] def parse_nodes(root): nodes = {} for child in root: if child.tag == 'node': lat, lon = float(child.attrib['lat']), float(child.attrib['lon']) nodes[child.attrib['id']] = Location(lat, lon) return nodes def parse_way(way, nodes): def choose_shapes(way, boundary): return (way['major_shapes'] if boundary == 'major_region' else way['minor_shapes']) result = {'minor_shapes': [], 'major_shapes': []} shape = [] for child in way: if child.tag == 'nd': shape.append(child.attrib['ref']) elif child.tag == 'tag': key = child.attrib['k'] if key in SHAPE_KEYS: raise ValueError(str.format('tag "{}" is not allowed', key)) result[key] = child.attrib['v'] if key == 'boundary' and result['boundary'] not in REGION_KEY_VALUES: return None # we're not interested in it! if shape[-1] != shape[0]: raise ValueError('polygon is not closed: %r' % result) if 'boundary' in result: shape = [nodes[ref] for ref in shape[:-1]] choose_shapes(result, result['boundary']).append(Polygon(*shape)) return result def parse_boundaries(root, *, nodes): for child in root: if child.tag == 'way': way = parse_way(child, nodes = nodes) if way: yield way def parse_regions(filename): from katakana import transliterate as transliterate_katakana tree = ElementTree.parse(filename) root = tree.getroot() nodes = parse_nodes(root) regions = dict() extra_shapes = list() for way in parse_boundaries(root, nodes = nodes): if 'boundary' in way and 'municipality' in way and 'name:fi' in way: def clean(x): return x.replace('/', '').replace(';', '') way['ref'] = str.format( '{municipality}/{name}', municipality = str.upper(clean(way['municipality'])), name = clean(way['name:fi']), ) # defines a region way['via_factor'] = int(way.get('via_factor', 1)) if way['ref'] in regions: raise ValueError(str.format( 'Region {ref} defined twice', ref = repr(way['ref']), )) regions[way['ref']] = way del way['boundary'] if 'external' in way: way['boundary'] = 'minor_region' for prefix in ['', 'short_', 'internal_']: name_key = prefix + 'name:fi' ja_name_key = prefix + 'name:ja' if name_key in way and way[name_key] and ja_name_key not in way: way[ja_name_key] = transliterate_katakana(way[name_key]) elif 'boundary' in way and 'is_in' in way: # adds an extra shape to an existing region extra_shapes.append(way) for extra_shape in extra_shapes: name = extra_shape['is_in'] try: region = regions[name] except KeyError: raise ValueError(str.format( 'Extra shape refers to {name} which was not found: {extra_shape}', name = repr(name), extra_shape = repr(extra_shape), )) for key in SHAPE_KEYS: region[key].extend(extra_shape[key]) return regions