|
1 |
|
2 def old_load_gtfs(gtfs_zip_path): |
|
3 global viimeinen_käyttöpäivä |
|
4 from zipfile import ZipFile |
|
5 with ZipFile(gtfs_zip_path) as gtfs_zip: |
|
6 with gtfs_zip.open('trips.txt') as file: |
|
7 for row in read_csv(map(bytes.decode, file)): |
|
8 if row['service_id'] not in services: |
|
9 services[row['service_id']] = BusService(row['service_id']) |
|
10 route = routes_per_id[row['route_id']] |
|
11 trip = GtfsTrip( |
|
12 reference = row['trip_id'], |
|
13 route = route, |
|
14 service = services[row['service_id']], |
|
15 length = shape_distances.get(row.get('shape_id'), 1) * float(profile['metrics']['shape-modifier']), |
|
16 block_id = row.get('block_id') or row['service_id'], |
|
17 shape = row.get('shape_id') |
|
18 ) |
|
19 route.trips.add(trip) |
|
20 if trip.name in all_trips: |
|
21 print('Trip %s already exists' % trip.name) |
|
22 else: |
|
23 all_trips[trip.name] = trip |
|
24 print('%d trips' % len(all_trips), file = stderr) |
|
25 |
|
26 def read_date(teksti): |
|
27 return date(int(teksti[:4]), int(teksti[4:6]), int(teksti[6:])) |
|
28 |
|
29 def read_time(teksti): |
|
30 hour, minute, second = map(int, teksti.split(':')) |
|
31 return timedelta(hours = hour, minutes = minute, seconds = second) |
|
32 |
|
33 print('Loading dates... ', file = stderr, flush = True) |
|
34 viimeinen_käyttöpäivä = date.today() |
|
35 |
|
36 def date_range(start_date, end_date, *, include_end = False): |
|
37 ''' Generates date from start_date to end_date. If include_end is True, then end_date will be yielded. ''' |
|
38 current_date = start_date |
|
39 while current_date < end_date: |
|
40 yield current_date |
|
41 current_date += timedelta(1) |
|
42 if include_end: |
|
43 yield end_date |
|
44 |
|
45 def add_day_to_service(service_name, day): |
|
46 try: |
|
47 service = services[service_name] |
|
48 except KeyError: |
|
49 return |
|
50 else: |
|
51 service.dates.add(day) |
|
52 if day not in services_for_day: |
|
53 services_for_day[day] = set() |
|
54 services_for_day[day].add(service) |
|
55 global viimeinen_käyttöpäivä |
|
56 viimeinen_käyttöpäivä = max(day, viimeinen_käyttöpäivä) |
|
57 |
|
58 def filter_day(row, day): |
|
59 day_names = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] |
|
60 return int(row[day_names[day.isoweekday() - 1]]) |
|
61 |
|
62 with gtfs_zip.open('calendar.txt') as file: |
|
63 for row in read_csv(map(bytes.decode, file)): |
|
64 for day in date_range(read_date(row['start_date']), read_date(row['end_date']), include_end = True): |
|
65 if filter_day(row, day): |
|
66 add_day_to_service(service_name = row['service_id'], day = day) |
|
67 |
|
68 with gtfs_zip.open('calendar_dates.txt') as file: |
|
69 for row in read_csv(map(bytes.decode, file)): |
|
70 add_day_to_service(service_name = row['service_id'], day = read_date(row['date'])) |
|
71 |
|
72 def services_available_at(day): |
|
73 for service in services.values(): |
|
74 if day in service.dates: |
|
75 yield service |
|
76 |
|
77 print('Loading stops... ', file = stderr, end = '', flush = True) |
|
78 with gtfs_zip.open('stops.txt') as file: |
|
79 for row in read_csv(map(bytes.decode, file)): |
|
80 location = Location(float(row['stop_lat']), float(row['stop_lon'])) |
|
81 stop = BusStop( |
|
82 reference = row['stop_id'], |
|
83 name = row['stop_name'], |
|
84 location = location, |
|
85 code = row.get('stop_code', row['stop_id']), |
|
86 ) |
|
87 bus_stops[stop.reference] = stop |
|
88 if profile['regions']['use-regions']: |
|
89 with open('regions-per-stop.json') as file: |
|
90 for stop_reference, region in json.load(file).items(): |
|
91 try: |
|
92 bus_stops[stop_reference].region = region |
|
93 except KeyError: |
|
94 pass |
|
95 for bus_stop in bus_stops.values(): |
|
96 if not hasattr(bus_stop, 'region'): |
|
97 bus_stop.region = None |
|
98 print('%d stops' % len(bus_stops), file = stderr) |
|
99 |
|
100 from collections import defaultdict |
|
101 bus_stops_by_name = defaultdict(set) |
|
102 for bus_stop in bus_stops.values(): |
|
103 bus_stops_by_name[bus_stop.name].add(bus_stop) |
|
104 bus_stops_by_name = dict(bus_stops_by_name) |
|
105 |
|
106 # ryhmittele bus_stops nimen mukaan |
|
107 global all_clusters |
|
108 all_clusters = [] |
|
109 def cluster_gtfs_stops(): |
|
110 sorted_gtfs_stops = sorted(bus_stops.values(), key = lambda bus_stop: bus_stop.name) |
|
111 for bus_stop in sorted_gtfs_stops: |
|
112 if not bus_stop.cluster: |
|
113 stops_to_cluster = {bus_stop} |
|
114 # etsi pysäkin samannimiset vastaparit |
|
115 for pair_candidate in bus_stops_by_name[bus_stop.name]: |
|
116 distance = pair_candidate.location.distance(bus_stop.location) |
|
117 if pair_candidate is not bus_stop and distance <= 0.4: |
|
118 stops_to_cluster.add(pair_candidate) |
|
119 for stop_to_cluster in stops_to_cluster: |
|
120 if stop_to_cluster.cluster: |
|
121 cluster = stop_to_cluster.cluster |
|
122 break |
|
123 else: |
|
124 cluster = BusStopCluster() |
|
125 all_clusters.append(cluster) |
|
126 for stop_to_cluster in stops_to_cluster: |
|
127 if not stop_to_cluster.cluster: |
|
128 cluster.add_stop(stop_to_cluster) |
|
129 # Merkitse muistiin pysäkkien vastaparit käyttäen hyväksi tämänhetkistä ryhmittelytietoa |
|
130 for bus_stop in bus_stops.values(): |
|
131 if bus_stop.cluster: |
|
132 bus_stop.pairs = bus_stop.cluster.stops - {bus_stop} |
|
133 # Ryhmitä ne bus_stops, joilla ei ollut omaa vastaparia, muiden pysäkkien kanssa |
|
134 for bus_stop in sorted_gtfs_stops: |
|
135 if len(bus_stop.cluster.stops) == 1: |
|
136 possibilities = set() |
|
137 for cluster in all_clusters: |
|
138 if cluster is not bus_stop.cluster: |
|
139 distance = cluster.center.distance(bus_stop.location) |
|
140 if distance <= 0.4: |
|
141 possibilities.add((distance, cluster)) |
|
142 if possibilities: |
|
143 best = min(possibilities)[1] |
|
144 all_clusters.remove(bus_stop.cluster) |
|
145 best.merge(bus_stop.cluster) |
|
146 |
|
147 def shared_elements_in_n_sets(sets): |
|
148 from itertools import combinations |
|
149 result = set() |
|
150 for pair in combinations(sets, 2): |
|
151 result |= pair[0] & pair[1] |
|
152 return result |
|
153 |
|
154 def name_clusters(): |
|
155 from collections import defaultdict |
|
156 clusters_per_name = defaultdict(set) |
|
157 for cluster in all_clusters: |
|
158 name_representing_stop = min((len(stop.reference), stop.reference, stop) for stop in cluster.stops)[2] |
|
159 clusters_per_name[name_representing_stop.name].add(cluster) |
|
160 for name, clusters in clusters_per_name.items(): |
|
161 if len(clusters) == 1: |
|
162 # Simple case: this cluster is the only one that wants this name. |
|
163 next(iter(clusters)).name = name |
|
164 else: |
|
165 if profile['regions']['use-regions']: |
|
166 # Find out if all clusters are in different areas |
|
167 common_regions = shared_elements_in_n_sets({stop.region for stop in cluster.stops} for cluster in clusters) |
|
168 # Proposal: cluster -> the areas unique to the cluster |
|
169 proposal = { |
|
170 cluster: {stop.region for stop in cluster.stops} - common_regions - {None} |
|
171 for cluster in clusters |
|
172 } |
|
173 # If at most one cluster is without its own unique region, name the others by region and this one without any. |
|
174 if sum([1 for unique_areas in proposal.values() if not unique_areas]) <= 1: |
|
175 for cluster, unique_areas in proposal.items(): |
|
176 individual_cluster_name = name |
|
177 if unique_areas: |
|
178 individual_cluster_name += ' (' + min(unique_areas) + ')' |
|
179 cluster.name = individual_cluster_name |
|
180 break |
|
181 # If all else fails, just number them. |
|
182 for n, (_, cluster) in enumerate(sorted( |
|
183 min((stop.reference.lower(), cluster) for stop in cluster.stops) |
|
184 for cluster in clusters |
|
185 ), 1): |
|
186 individual_cluster_name = name + '-' + str(n) |
|
187 cluster.name = individual_cluster_name |
|
188 |
|
189 print('Clustering bus stops...') |
|
190 cluster_gtfs_stops() |
|
191 name_clusters() |
|
192 for cluster in all_clusters: |
|
193 if cluster.url_name in clusters_by_name: |
|
194 print('Warning: Clusters %r and %r share the same URL name: %r' % (cluster.name, clusters_by_name[cluster.url_name].name, cluster.url_name)) |
|
195 else: |
|
196 clusters_by_name[cluster.url_name] = cluster |
|
197 print('Loading schedules... ', end = '', flush = True, file = stderr) |
|
198 with gtfs_zip.open('stop_times.txt') as file: |
|
199 row_count = sum(line.count(b'\n') for line in file) |
|
200 with gtfs_zip.open('stop_times.txt') as file: |
|
201 progress = 0 |
|
202 for row in read_csv(map(bytes.decode, file)): |
|
203 if int(row.get('pickup_type', '') or '0') and int(row.get('drop_off_type', '') or '0'): |
|
204 continue |
|
205 trip = all_trips[transform_trip_reference(row['trip_id'])] |
|
206 arrival_time = read_time(row['arrival_time']) |
|
207 departure_time = read_time(row['departure_time']) |
|
208 stop = bus_stops[row['stop_id']] |
|
209 traveled_distance = float(row.get('shape_dist_traveled', 1)) * float(profile['metrics']['shape-modifier']) |
|
210 visitnumber = len(trip.schedule) + 1 |
|
211 trip.schedule.append(BusHalt(arrival_time, departure_time, stop, trip, traveled_distance, visitnumber)) |
|
212 stop.involved_trips.add(trip) |
|
213 progress += 1 |
|
214 if progress % 1000 == 0: |
|
215 print('\rLoading schedules... %.1f%%' % (progress * 100 / row_count), end = ' ', file = stderr) |
|
216 print('\rLoading schedules... complete', file = stderr) |
|
217 for trip in all_trips.values(): |
|
218 from busroute import simplify_name |
|
219 schedule = trip.concise_schedule() |
|
220 try: |
|
221 trip.from_place = simplify_name(schedule[0]) |
|
222 trip.to_place = simplify_name(schedule[-1]) |
|
223 except IndexError: |
|
224 trip.from_place = '' |
|
225 trip.to_place = '' |
|
226 for route in routes.values(): |
|
227 from collections import Counter |
|
228 from busroute import simplify_name |
|
229 tally = Counter() |
|
230 for trip in route.trips: |
|
231 schedule = trip.concise_schedule() |
|
232 places = set(schedule) |
|
233 do_add = True |
|
234 assert type(schedule) is list |
|
235 for candidate in tally: |
|
236 if places.issubset(set(candidate)): |
|
237 do_add = False |
|
238 tally.update({tuple(candidate)}) |
|
239 if do_add: |
|
240 tally.update({tuple(schedule)}) |
|
241 try: |
|
242 most_common_route = tally.most_common(1)[0][0] |
|
243 route.description = simplify_name(most_common_route[0]) + ' - ' + simplify_name(most_common_route[-1]) |
|
244 except: |
|
245 route.description = '' |
|
246 route.trips = sorted(route.trips, key = lambda trip: trip.schedule and trip.schedule[0].departure_time or timedelta()) |
|
247 if 'compatibility' in profile and profile['compatibility'].get('fix-destination-times', False): |
|
248 # There seems to be something strange going on in Föli's gtfs data. |
|
249 # It seems that sometimes the arrival time of the last stop is |
|
250 # completely off, so try estimate when the bus will really arrive |
|
251 # there based on the last leg distance. |
|
252 # I noticed this for bus 220's arrival time at Mylly several years |
|
253 # ago. Possibly this has been fixed in the data by now? |
|
254 for trip in all_trips.values(): |
|
255 if len(trip.schedule) >= 2: |
|
256 bus_speed_coefficient = 750 # meters per minute |
|
257 last_leg_distance = trip.schedule[-1].traveled_distance - trip.schedule[-2].traveled_distance |
|
258 trip.schedule[-1].arrival_time = trip.schedule[-2].departure_time + timedelta(minutes = last_leg_distance / bus_speed_coefficient) |
|
259 # Add services to all bus stops |
|
260 for route in routes.values(): |
|
261 for trip in route.trips: |
|
262 for halt in trip.schedule: |
|
263 halt.stop.services.add(route.service) |