itienary_processing.py

changeset 3
10ce28475e9c
parent 2
7378b802ddf8
child 4
ac067a42b00f
--- a/itienary_processing.py	Thu Jul 30 21:52:31 2020 +0300
+++ b/itienary_processing.py	Thu Nov 05 14:52:50 2020 +0200
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import sqlalchemy
 import sqlalchemy.orm
+import multiprocessing
 from regions import parse_regions
 from datamodel import *
 engine = sqlalchemy.create_engine('sqlite:///gtfs.db')
@@ -24,27 +25,42 @@
 def length_left(stoptime):
 	return stoptime.trip.shape.length - stoptime.shape_distance_traveled
 
+from datetime import datetime, timedelta
+time_in_db = timedelta(0)
+time_in_process = timedelta(0)
 amount = session.query(GtfsStopTime).filter(GtfsStopTime.destination == None).count()
 k = 0
 got_stoptimes = amount != 0
+from threading import Thread
+
+def get_filtered_itinerary(stoptime):
+	return list(filter_itinerary(
+		entry.stop.stop_region
+		for entry in session.query(GtfsStopTime)
+			.filter(GtfsStopTime.trip_id == stoptime.trip_id)
+			.filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence)
+	))
+
+def get_destination(stoptime, itinerary):
+	from busroute import destinations_list
+	dests = destinations_list(
+		itinerary = itinerary,
+		trip_length = float(length_left(stoptime)),
+		regions = regions,
+	)
+	return '-'.join(dests)
 
 while got_stoptimes:
-	print('%.2f%%' % (k * 100 / amount))
+	if k > 0:
+		print('%.2f%% done' % (k * 100 / amount))
+		#print('%s spent in query, %s spent in processing (%.1f%% of time spent processing)' % (time_in_db, time_in_process, (time_in_process / (time_in_db + time_in_process) * 100)))
 	got_stoptimes = False
-	for stoptime in get_stoptimes(session):
+	stoptimes = list(get_stoptimes(session))
+	with multiprocessing.Pool(12) as p:
+		itineraries = list(p.map(get_filtered_itinerary, stoptimes))
+	for stoptime, itinerary in zip(stoptimes, itineraries):
 		got_stoptimes = True
+		destination = get_destination(stoptime, itinerary)
+		stoptime.destination = destination
 		k += 1
-		itinerary = list(filter_itinerary(
-			entry.stop.stop_region
-			for entry in session.query(GtfsStopTime)
-				.filter(GtfsStopTime.trip_id == stoptime.trip_id)
-				.filter(GtfsStopTime.stop_sequence > stoptime.stop_sequence)
-		))
-		from busroute import destinations_list
-		dests = destinations_list(
-			itinerary = itinerary,
-			trip_length = float(length_left(stoptime)),
-			regions = regions,
-		)
-		stoptime.destination = '-'.join(dests)
 	session.commit()

mercurial