This demonstration shows the power of the datashader library to make geospatial visualizations of public transport data in Chicago. The osmnx is used to compute the edge bearings of the Chicago GTFS data, the distribution of bearings properties of Chicago GTFS shows grid pattern in histgram distribution plotting and polar plotting.
The General Transit Feed Specification—or GTFS—is an open format for packaging scheduled service data. GTFS data is produced by hundreds of transit agencies around the world to deliver content for inclusion in maps and directions-giving services, including Google Maps.
Setup the environment, loading library, download the GTFS data in Chicago, and load the GTFS data in Chicago
%matplotlib inline
import osmnx as ox, matplotlib.pyplot as plt
ox.config(log_console=True, use_cache=True)
import networkx as nx
import pandas as pd
import requests
import shutil
import os
import zipfile
req = requests.get('http://gtfs.s3.amazonaws.com/chicago-transit-authority_20160416_0123.zip', stream=True)
with open('chicago-transit-authority_20160416_0123.zip', 'wb') as file:
shutil.copyfileobj(req.raw, file)
if not os.path.exists('./gtfsdata'):
os.mkdir('./gtfsdata')
with zipfile.ZipFile('chicago-transit-authority_20160416_0123.zip', 'r') as file:
file.extractall('./gtfsdata')
os.listdir('./gtfsdata')
trips = pd.read_csv('gtfsdata/trips.txt', low_memory=False)
shapes = pd.read_csv('gtfsdata/shapes.txt', low_memory=False)
stops = pd.read_csv('gtfsdata/stops.txt', low_memory=False)
print ('The numer of trips is:{}'.format(len(trips)))
trips.head(5)
print ('The numer of shapes is:{}'.format(len(shapes)))
shapes.head(5)
print ('The numer of stops is:{}'.format(len(stops)))
stops.head(5)
import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 10))
fig.suptitle('Distributions of latitude and longitude for Chicago GTFS dataset', fontsize=16)
ax1.hist(shapes.shape_pt_lat)
ax1.set_xlabel('Latitude', fontsize=13)
ax1.set_ylabel('Frequency', fontsize=13)
ax2.hist(shapes.shape_pt_lon)
ax2.set_xlabel('Longitude', fontsize=13)
ax2.set_ylabel('Frequency', fontsize=13)
#Heatmap for the GTFS data in Chicago
import matplotlib.pyplot as plt
plt.figure(figsize = (20,16))
plt.hist2d(shapes.shape_pt_lon, shapes.shape_pt_lat, bins=150, cmap='hot')
plt.colorbar().set_label('Number of properties')
plt.xlabel('Longitude', fontsize=14)
plt.ylabel('Latitude', fontsize=14)
plt.title('Number of properties for GTFS data in Chicago', fontsize=17)
plt.show()
Datashader is a graphics pipeline system for creating meaningful representations of large datasets quickly and flexibly. Datashader breaks the creation of images into a series of explicit steps that allow computations to be done on intermediate representations (https://datashader.org/).
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
plt.style.use('ggplot')
import os
import glob
import datashader as ds
import datashader.transfer_functions as tf
import folium
# Show GTFS data in Chicago
#maindt = shapes[abs(shapes.shape_pt_lon +87.6298) <= 0.95]
#maindt = maindt[abs(maindt.shape_pt_lat -41.8781 ) <= 0.95]
cvs = ds.Canvas(plot_width=800, plot_height=800)
agg = cvs.points(shapes,'shape_pt_lon','shape_pt_lat')
img = tf.shade(agg, cmap=['lightblue','darkblue'],how='log')
img
def filter_by_int(seq):
for el in seq:
res = None
try:
res = int(el)
except ValueError:
pass
if res: yield res
rts = pd.read_csv('gtfsdata/routes.txt', low_memory=False)
unique_routes = set(rts[~rts['route_short_name'].isnull()]['route_short_name'].values)
sorted_unique = sorted(filter_by_int(unique_routes))
print('The number of Routes in feed is : {}'.format(len(sorted_unique)))
print('Routes in feed: {}'.format(sorted_unique))
Extract each shape related to that route separately. Because routes in GTFS can have multiple shapes.
#trips = gtfs_dfs['trips']
routes = rts
all_rts_shapes = {}
for rte_id in set(routes['route_id'].values):
rte_trips = trips[trips['route_id'] == rte_id]
rte_shapes = set(rte_trips['shape_id'].values)
#shapes = gtfs_dfs['shapes']
rte_sh_table = shapes[shapes['shape_id'].isin(rte_shapes)]
sh_dict = {}
for shid in rte_shapes:
one_shid = rte_sh_table[rte_sh_table['shape_id'] == shid]
sorted_shid_table = one_shid.sort_values(by='shape_pt_sequence', ascending=True)
shid_list = []
for id, row in sorted_shid_table.iterrows():
shid_list.append({
'id': id,
'lon': row.shape_pt_lon,
'lat': row.shape_pt_lat,
'seq': row.shape_pt_sequence,
'dist': row.shape_dist_traveled
})
sh_dict[shid] = shid_list
# now update the reference list
all_rts_shapes[rte_id] = sh_dict
This stepd will need much time to process the Chicago network data
import hashlib
def _make_id(shape_id, item):
s = ''.join([shape_id, str(item['seq'])]).replace('_', '')
s = s.encode('utf-8')
s_int = abs(int(hashlib.sha1(s).hexdigest(), 16) % (10 ** 12))
return str(s_int)
def add_new_route_shape(route, shape_id, pot_nodes, G):
# first add nodes to network
kept_nodes = []
for item in pot_nodes:
#print (shape_id,item['id'])
node_id = item['id'] #_make_id(shape_id, item)
# if we can, check last node appended
if len(kept_nodes):
last_node = kept_nodes[-1]
# make sure it isn't at same distance along
# the route
if last_node['dist'] == item['dist']:
continue
# add to the graph
G.add_node(node_id, route=route,
shape_id=shape_id,
osmid=node_id,
x=item['lon'],
y=item['lat'])
# and update list for tracking
kept_nodes.append(item)
# now add edges
for a, b in zip(kept_nodes[:-1], kept_nodes[1:]):
a_id = a['id']#_make_id(shape_id, a)
b_id = b['id']#_make_id(shape_id, b)
length = ox.utils.great_circle_vec(a['lat'],
a['lon'],
b['lat'],
b['lon'])
G.add_edge(a_id, b_id, attr_dict={'length': length,
'route': route,
'shape_id': shape_id})
G_rts = nx.MultiDiGraph(name='all_rts', crs={'init':'epsg:4326'})
all_rts_shapes_keys = list(all_rts_shapes.keys())
for rte_key in all_rts_shapes_keys:
rte_shapes_dict = all_rts_shapes[rte_key]
for rte_shape_id in rte_shapes_dict.keys():
add_new_route_shape(rte_key, rte_shape_id, rte_shapes_dict[rte_shape_id], G_rts)
%time fig, ax = ox.plot_graph(ox.project_graph(G_rts), fig_height=20, node_size=1, dpi=600)
import matplotlib.pyplot as plt
%time G_rts = ox.add_edge_bearings(G_rts)
bearings = pd.Series([data['bearing'] for u, v, k, data in G_rts.edges(keys=True, data=True)])
ax = bearings.hist(bins=30, zorder=2, alpha=0.8)
xlim = ax.set_xlim(0, 360)
ax.set_title('Chicago MTS route bearings')
plt.show()
n = 30
count, division = np.histogram(bearings, bins=[ang*360/n for ang in range(0,n+1)])
division = division[0:-1]
width = 2 * np.pi/n
ax = plt.subplot(111, projection='polar')
ax.set_theta_zero_location('N')
ax.set_theta_direction('clockwise')
bars = ax.bar(division * np.pi/180 - width * 0.5 , count, width=width, bottom=20.0)
ax.set_title('Chicago MTS route bearings', y=1.1)
plt.show()
edge_colors = ox.get_edge_colors_by_attr(G_rts, 'bearing', num_bins=10, cmap='viridis', start=0, stop=1)
fig, ax = ox.plot_graph(ox.project_graph(G_rts), fig_height=20, node_size=1, edge_color=edge_colors, dpi=600)