scripts/get-external-data.py

Mon, 14 Sep 2020 22:55:45 +0300

author
Teemu Piippo <teemu@hecknology.net>
date
Mon, 14 Sep 2020 22:55:45 +0300
changeset 0
b0eb3af2f9ee
permissions
-rwxr-xr-x

restore .hg...

0
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
1 #!/usr/bin/env python3
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
2 '''This script is designed to load quasi-static data into a PostGIS database
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
3 for rendering maps. It differs from the usual scripts to do this in that it is
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
4 designed to take its configuration from a file rather than be a series of shell
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
5 commands.
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
6
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
7 Some implicit assumptions are
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
8 - Time spent querying (rendering) the data is more valuable than the one-time
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
9 cost of loading it
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
10 - The script will not be running multiple times in parallel. This is not
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
11 normally likely because the script is likely to be called daily or less,
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
12 not minutely.
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
13 - Usage patterns will be similar to typical map rendering
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
14 '''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
15
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
16 import yaml
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
17 import os
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
18 import re
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
19 import argparse
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
20 import shutil
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
21
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
22 # modules for getting data
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
23 import zipfile
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
24 import requests
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
25 import io
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
26
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
27 # modules for converting and postgres loading
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
28 import subprocess
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
29 import psycopg2
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
30
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
31 import logging
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
32
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
33
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
34 def database_setup(conn, temp_schema, schema, metadata_table):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
35 with conn.cursor() as cur:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
36 cur.execute('''CREATE SCHEMA IF NOT EXISTS {temp_schema};'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
37 .format(temp_schema=temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
38 cur.execute(('''CREATE TABLE IF NOT EXISTS "{schema}"."{metadata_table}"'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
39 ''' (name text primary key, last_modified text);''')
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
40 .format(schema=schema, metadata_table=metadata_table))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
41 conn.commit()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
42
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
43
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
44 class Table:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
45 def __init__(self, name, conn, temp_schema, schema, metadata_table):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
46 self._name = name
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
47 self._conn = conn
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
48 self._temp_schema = temp_schema
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
49 self._dst_schema = schema
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
50 self._metadata_table = metadata_table
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
51
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
52 # Clean up the temporary schema in preperation for loading
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
53 def clean_temp(self):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
54 with self._conn.cursor() as cur:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
55 cur.execute('''DROP TABLE IF EXISTS "{temp_schema}"."{name}"'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
56 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
57 self._conn.commit()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
58
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
59 # get the last modified date from the metadata table
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
60 def last_modified(self):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
61 with self._conn.cursor() as cur:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
62 cur.execute('''SELECT last_modified FROM "{schema}"."{metadata_table}" WHERE name = %s'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
63 .format(schema=self._dst_schema, metadata_table=self._metadata_table), [self._name])
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
64 results = cur.fetchone()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
65 if results is not None:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
66 return results[0]
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
67
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
68 def index(self):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
69 with self._conn.cursor() as cur:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
70 # Disable autovacuum while manipulating the table, since it'll get clustered towards the end.
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
71 cur.execute('''ALTER TABLE "{temp_schema}"."{name}" SET ( autovacuum_enabled = FALSE );'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
72 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
73 # ogr creates a ogc_fid column we don't need
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
74 cur.execute('''ALTER TABLE "{temp_schema}"."{name}" DROP COLUMN ogc_fid;'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
75 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
76
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
77 # Null geometries are useless for rendering
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
78 cur.execute('''DELETE FROM "{temp_schema}"."{name}" WHERE way IS NULL;'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
79 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
80 cur.execute('''ALTER TABLE "{temp_schema}"."{name}" ALTER COLUMN way SET NOT NULL;'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
81 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
82 # sorting static tables helps performance and reduces size from the column drop above
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
83 cur.execute(('''CREATE INDEX "{name}_order" ON "{temp_schema}"."{name}" '''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
84 '''(ST_Envelope(way));'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
85 '''CLUSTER "{temp_schema}"."{name}" '''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
86 '''USING "{name}_order";'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
87 '''DROP INDEX "{temp_schema}"."{name}_order";'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
88 '''CREATE INDEX ON "{temp_schema}"."{name}" '''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
89 '''USING GIST (way) WITH (fillfactor=100);''')
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
90 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
91 # Reset autovacuum. The table is static, so this doesn't really
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
92 # matter since it'll never need a vacuum.
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
93 cur.execute('''ALTER TABLE "{temp_schema}"."{name}" RESET ( autovacuum_enabled );'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
94 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
95 self._conn.commit()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
96
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
97 # VACUUM can't be run in transaction, so autocommit needs to be turned on
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
98 old_autocommit = self._conn.autocommit
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
99 try:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
100 self._conn.autocommit = True
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
101 with self._conn.cursor() as cur:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
102 cur.execute('''VACUUM ANALYZE "{temp_schema}"."{name}";'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
103 .format(name=self._name, temp_schema=self._temp_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
104 finally:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
105 self._conn.autocommit = old_autocommit
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
106
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
107 def replace(self, new_last_modified):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
108 with self._conn.cursor() as cur:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
109 cur.execute('''BEGIN;''')
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
110 cur.execute(('''DROP TABLE IF EXISTS "{schema}"."{name}";'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
111 '''ALTER TABLE "{temp_schema}"."{name}" SET SCHEMA "{schema}";''')
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
112 .format(name=self._name, temp_schema=self._temp_schema, schema=self._dst_schema))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
113
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
114 # We checked if the metadata table had this table way up above
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
115 cur.execute('''SELECT 1 FROM "{schema}"."{metadata_table}" WHERE name = %s'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
116 .format(schema=self._dst_schema, metadata_table=self._metadata_table),
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
117 [self._name])
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
118 if cur.rowcount == 0:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
119 cur.execute(('''INSERT INTO "{schema}"."{metadata_table}" '''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
120 '''(name, last_modified) VALUES (%s, %s)''')
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
121 .format(schema=self._dst_schema, metadata_table=self._metadata_table),
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
122 [self._name, new_last_modified])
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
123 else:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
124 cur.execute('''UPDATE "{schema}"."{metadata_table}" SET last_modified = %s WHERE name = %s'''
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
125 .format(schema=self._dst_schema, metadata_table=self._metadata_table),
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
126 [new_last_modified, self._name])
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
127 self._conn.commit()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
128
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
129
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
130 def main():
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
131 # parse options
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
132 parser = argparse.ArgumentParser(description="Load external data into a database")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
133
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
134 parser.add_argument("-f", "--force", action="store_true", help="Download new data, even if not required")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
135
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
136 parser.add_argument("-c", "--config", action="store", default="external-data.yml",
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
137 help="Name of configuration file (default external-data.yml)")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
138 parser.add_argument("-D", "--data", action="store", help="Override data download directory")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
139
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
140 parser.add_argument("-d", "--database", action="store", help="Override database name to connect to")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
141 parser.add_argument("-H", "--host", action="store",
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
142 help="Override database server host or socket directory")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
143 parser.add_argument("-p", "--port", action="store", help="Override database server port")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
144 parser.add_argument("-U", "--username", action="store", help="Override database user name")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
145 parser.add_argument("-v", "--verbose", action="store_true", help="Be more verbose. Overrides -q")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
146 parser.add_argument("-q", "--quiet", action="store_true", help="Only report serious problems")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
147
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
148 opts = parser.parse_args()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
149
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
150 if opts.verbose:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
151 logging.basicConfig(level=logging.DEBUG)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
152 elif opts.quiet:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
153 logging.basicConfig(level=logging.WARNING)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
154 else:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
155 logging.basicConfig(level=logging.INFO)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
156
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
157 with open(opts.config) as config_file:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
158 config = yaml.safe_load(config_file)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
159 data_dir = opts.data or config["settings"]["data_dir"]
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
160 os.makedirs(data_dir, exist_ok=True)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
161
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
162 # If the DB options are unspecified in both on the command line and in the
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
163 # config file, libpq will pick what to use with the None
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
164 database = opts.database or config["settings"].get("database")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
165 host = opts.host or config["settings"].get("host")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
166 port = opts.port or config["settings"].get("port")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
167 user = opts.username or config["settings"].get("username")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
168 with requests.Session() as s, \
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
169 psycopg2.connect(database=database,
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
170 host=host, port=port,
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
171 user=user) as conn:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
172
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
173 s.headers.update({'User-Agent': 'get-external-data.py/osm-carto'})
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
174
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
175 # DB setup
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
176 database_setup(conn, config["settings"]["temp_schema"],
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
177 config["settings"]["schema"],
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
178 config["settings"]["metadata_table"])
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
179
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
180 for name, source in config["sources"].items():
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
181 logging.info("Checking table {}".format(name))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
182 # Don't attempt to handle strange names
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
183 # Even if there was code to escape them properly here, you don't want
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
184 # in a style with all the quoting headaches
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
185 if not re.match('''^[a-zA-Z0-9_]+$''', name):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
186 raise RuntimeError("Only ASCII alphanumeric table are names supported")
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
187
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
188 workingdir = os.path.join(data_dir, name)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
189 # Clean up anything left over from an aborted run
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
190 shutil.rmtree(workingdir, ignore_errors=True)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
191
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
192 os.makedirs(workingdir, exist_ok=True)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
193
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
194 this_table = Table(name, conn,
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
195 config["settings"]["temp_schema"],
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
196 config["settings"]["schema"],
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
197 config["settings"]["metadata_table"])
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
198 this_table.clean_temp()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
199
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
200 if not opts.force:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
201 headers = {'If-Modified-Since': this_table.last_modified()}
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
202 else:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
203 headers = {}
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
204
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
205 download = s.get(source["url"], headers=headers)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
206 download.raise_for_status()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
207
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
208 if (download.status_code == 200):
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
209 if "Last-Modified" in download.headers:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
210 new_last_modified = download.headers["Last-Modified"]
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
211 else:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
212 new_last_modified = None
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
213 if "archive" in source and source["archive"]["format"] == "zip":
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
214 zip = zipfile.ZipFile(io.BytesIO(download.content))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
215 for member in source["archive"]["files"]:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
216 zip.extract(member, workingdir)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
217
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
218 ogrpg = "PG:dbname={}".format(database)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
219
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
220 if port is not None:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
221 ogrpg = ogrpg + " port={}".format(port)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
222 if user is not None:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
223 ogrpg = ogrpg + " user={}".format(user)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
224 if host is not None:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
225 ogrpg = ogrpg + " host={}".format(host)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
226
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
227 ogrcommand = ["ogr2ogr",
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
228 '-f', 'PostgreSQL',
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
229 '-lco', 'GEOMETRY_NAME=way',
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
230 '-lco', 'SPATIAL_INDEX=FALSE',
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
231 '-lco', 'EXTRACT_SCHEMA_FROM_LAYER_NAME=YES',
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
232 '-nln', "{}.{}".format(config["settings"]["temp_schema"], name)]
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
233
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
234 if "ogropts" in source:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
235 ogrcommand += source["ogropts"]
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
236
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
237 ogrcommand += [ogrpg, os.path.join(workingdir, source["file"])]
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
238
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
239 logging.debug("running {}".format(subprocess.list2cmdline(ogrcommand)))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
240
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
241 # ogr2ogr can raise errors here, so they need to be caught
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
242 try:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
243 subprocess.check_output(ogrcommand, stderr=subprocess.PIPE, universal_newlines=True)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
244 except subprocess.CalledProcessError as e:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
245 # Add more detail on stdout for the logs
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
246 logging.critical("ogr2ogr returned {} with layer {}".format(e.returncode, name))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
247 logging.critical("Command line was {}".format(subprocess.list2cmdline(e.cmd)))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
248 logging.critical("Output was\n{}".format(e.output))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
249 raise RuntimeError("ogr2ogr error when loading table {}".format(name))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
250
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
251 this_table.index()
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
252 this_table.replace(new_last_modified)
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
253 else:
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
254 logging.info("Table {} did not require updating".format(name))
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
255
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
256
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
257 if __name__ == '__main__':
b0eb3af2f9ee restore .hg...
Teemu Piippo <teemu@hecknology.net>
parents:
diff changeset
258 main()

mercurial