things

Thu, 05 Nov 2020 14:52:50 +0200

author
Teemu Piippo <teemu@hecknology.net>
date
Thu, 05 Nov 2020 14:52:50 +0200
changeset 3
10ce28475e9c
parent 2
7378b802ddf8
child 4
ac067a42b00f

things

.hgignore file | annotate | diff | comparison | revisions
föli.osm file | annotate | diff | comparison | revisions
itienary_processing.py file | annotate | diff | comparison | revisions
regiontest.py file | annotate | diff | comparison | revisions
--- a/.hgignore	Thu Jul 30 21:52:31 2020 +0300
+++ b/.hgignore	Thu Nov 05 14:52:50 2020 +0200
@@ -2,3 +2,4 @@
 gtfs.zip
 __pycache__
 *.db
+gtfs
--- a/föli.osm	Thu Jul 30 21:52:31 2020 +0300
+++ b/föli.osm	Thu Nov 05 14:52:50 2020 +0200
@@ -2223,8 +2223,6 @@
   <node id='-755518' action='modify' visible='true' lat='60.42240086317' lon='22.20225332022' />
   <node id='-755520' action='modify' visible='true' lat='60.4265418945' lon='22.20429179907' />
   <node id='-755522' action='modify' visible='true' lat='60.4265313043' lon='22.21356151342' />
-  <node id='-755524' action='modify' visible='true' lat='60.51781749962' lon='22.30113027334' />
-  <node id='-755526' action='modify' visible='true' lat='60.51803926842' lon='22.29801891088' />
   <node id='-755528' action='modify' visible='true' lat='60.51840887971' lon='22.29437110662' />
   <node id='-755530' action='modify' visible='true' lat='60.51232559748' lon='22.29334113836' />
   <node id='-755532' action='modify' visible='true' lat='60.51126935572' lon='22.30615136862' />
@@ -3058,6 +3056,7 @@
     <tag k='name:sv' v='Åbo universitetcentralsjukhus' />
     <tag k='ref' v='Tyks' />
     <tag k='short_name:fi' v='Tyks' />
+    <tag k='short_name:ja' v='Tyks' />
     <tag k='short_name:sv' v='Åucs' />
   </way>
   <way id='-755870' action='modify' visible='true'>
@@ -6078,7 +6077,7 @@
     <nd ref='-753440' />
     <tag k='boundary' v='major_region' />
     <tag k='municipality' v='Turku' />
-    <tag k='name:en' v='Fair center' />
+    <tag k='name:en' v='Fair centre' />
     <tag k='name:fi' v='Messukeskus' />
     <tag k='name:ru' v='Ярморочный павильон' />
     <tag k='name:sv' v='Mässcentrum' />
@@ -6325,6 +6324,7 @@
     <tag k='boundary' v='minor_region' />
     <tag k='municipality' v='Turku' />
     <tag k='name:fi' v='Itäkeskus' />
+    <tag k='name:sv' v='Östra centrum' />
     <tag k='ref' v='Vsi' />
   </way>
   <way id='-756107' action='modify' visible='true'>
--- a/itienary_processing.py	Thu Jul 30 21:52:31 2020 +0300
+++ b/itienary_processing.py	Thu Nov 05 14:52:50 2020 +0200
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import sqlalchemy
 import sqlalchemy.orm
+import multiprocessing
 from regions import parse_regions
 from datamodel import *
 engine = sqlalchemy.create_engine('sqlite:///gtfs.db')
@@ -24,27 +25,42 @@
 def length_left(stoptime):
 	return stoptime.trip.shape.length - stoptime.shape_distance_traveled
 
+from datetime import datetime, timedelta
+time_in_db = timedelta(0)
+time_in_process = timedelta(0)
 amount = session.query(GtfsStopTime).filter(GtfsStopTime.destination == None).count()
 k = 0
 got_stoptimes = amount != 0
+from threading import Thread
+
+def get_filtered_itinerary(stoptime):
+	return list(filter_itinerary(
+		entry.stop.stop_region
+		for entry in session.query(GtfsStopTime)
+			.filter(GtfsStopTime.trip_id == stoptime.trip_id)
+			.filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence)
+	))
+
+def get_destination(stoptime, itinerary):
+	from busroute import destinations_list
+	dests = destinations_list(
+		itinerary = itinerary,
+		trip_length = float(length_left(stoptime)),
+		regions = regions,
+	)
+	return '-'.join(dests)
 
 while got_stoptimes:
-	print('%.2f%%' % (k * 100 / amount))
+	if k > 0:
+		print('%.2f%% done' % (k * 100 / amount))
+		#print('%s spent in query, %s spent in processing (%.1f%% of time spent processing)' % (time_in_db, time_in_process, (time_in_process / (time_in_db + time_in_process) * 100)))
 	got_stoptimes = False
-	for stoptime in get_stoptimes(session):
+	stoptimes = list(get_stoptimes(session))
+	with multiprocessing.Pool(12) as p:
+		itineraries = list(p.map(get_filtered_itinerary, stoptimes))
+	for stoptime, itinerary in zip(stoptimes, itineraries):
 		got_stoptimes = True
+		destination = get_destination(stoptime, itinerary)
+		stoptime.destination = destination
 		k += 1
-		itinerary = list(filter_itinerary(
-			entry.stop.stop_region
-			for entry in session.query(GtfsStopTime)
-				.filter(GtfsStopTime.trip_id == stoptime.trip_id)
-				.filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence)
-		))
-		from busroute import destinations_list
-		dests = destinations_list(
-			itinerary = itinerary,
-			trip_length = float(length_left(stoptime)),
-			regions = regions,
-		)
-		stoptime.destination = '-'.join(dests)
 	session.commit()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regiontest.py	Thu Nov 05 14:52:50 2020 +0200
@@ -0,0 +1,11 @@
+
+	from collections import defaultdict
+	region_names_per_ref = defaultdict(set)
+	for ref, region in dict.items(regions):
+		set.add(region_names_per_ref[ref], region['name:fi'])
+	problem_regions = dict(filter(lambda k: len(region_names_per_ref[k]) > 1, region_names_per_ref))
+	if problem_regions:
+		from sys import stderr, exit
+		print('Found problems with regions', file = stderr)
+		print(problem_regions, file = stderr)
+		exit(1)

mercurial