Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multipage #8

Merged
merged 7 commits into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 58 additions & 38 deletions streamlit_app.py → Home.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,55 @@
# import os,sys
# sys.path.append(os.getcwd())
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from datetime import datetime
from viz import plot_ridership_average, map_bus_routes
from app.viz import plot_ridership_average, map_bus_routes, plot_recovery_over_this_quarter, plot_bar_top_n_for_daterange
from app.load_data import get_rides,get_rides_quarterly, get_route_linestrings
import geopandas as gpd
from streamlit_extras.badges import badge


from streamlit_extras.dataframe_explorer import dataframe_explorer
st.set_page_config(
layout="wide",
page_title="MTA Bus Ridership"
page_icon="🚌",
page_title="Compare MTA Bus Routes by Ridership",
)

@st.cache_data
def get_rides(file_path="data/mta_bus_ridership.parquet"):
"""Get the MTA bus ridership data"""
rides = pd.read_parquet(file_path)
return rides[[ "route","date", "ridership_weekday",'ridership']]

@st.cache_data
def get_rides_quarterly(file_path="data/mta_bus_ridership_quarterly.parquet"):
"""Get the MTA bus ridership data"""
rides = pd.read_parquet(file_path)
return rides[[ "route","date", "ridership_weekday",'quarter_year','ridership']]

@st.cache_data
def get_route_linestrings(file_path="data/mta_bus_route_linestring.geojson"):
"""Get the MTA bus ridership data"""
gdf = gpd.read_file(file_path)
# The geometry column contains many multiline strings , so we need to convert them to single linestrings

return gdf
@st.cache_data
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')




rides = get_rides_quarterly()
route_linestrings = get_route_linestrings()
csv = convert_df(rides)
# Streamlit app
st.title("MTA Bus Ridership")
st.sidebar.title("TransitScope Baltimore")
freq = "Quarterly"
if freq == "Quarterly":
rides = get_rides_quarterly()
csv = convert_df(rides)
else:
rides = get_rides()
csv = convert_df(rides)
# Get the top 5 routes from 2022, group by route number and sum the ridership
top_5_routes = rides[rides["date"] >= datetime(2022, 1, 1)].groupby("route")["ridership"].sum().sort_values(ascending=False).head(5).reset_index()["route"].tolist()
print(type(top_5_routes))
route_numbers = st.sidebar.multiselect(
"Select routes", list(rides["route"].unique()), default=top_5_routes,
)

freq=st.sidebar.selectbox("Choose frequency", ["Quarterly", "Monthly"])
if freq == "Quarterly":
rides = get_rides_quarterly()
csv = convert_df(rides)
else:
rides = get_rides()
csv = convert_df(rides)
highlight_routes=st.sidebar.checkbox("Show unselected bus routes on map", value=False)

if route_numbers:
Expand All @@ -66,6 +63,12 @@ def convert_df(df):
end_date=datetime(2022, 12, 31),
y_axis_zero = True
)
# Add a toggle to set y-axis to start at 0
fig2 = plot_recovery_over_this_quarter(rides,
# Do the top 5 routes from 2022
route_numbers=route_numbers, )


col1, col2 = st.columns([3,2])
with col1:
st.plotly_chart(
Expand All @@ -76,30 +79,47 @@ def convert_df(df):
)
y_axis_zero = st.sidebar.checkbox("Y-axis starts at 0", value=True)


with col2:

# Add 3 blank lines
st.markdown("### ")
st.markdown("### ")
st.markdown("### ")
map_bus_routes(route_linestrings, route_numbers,highlight_routes=highlight_routes)
# st.markdown("### Ridership Data")
# dataframe = (rides[rides["route"].isin(route_numbers)])
# filtered_dataframe = dataframe_explorer(dataframe)
# st.dataframe(filtered_dataframe, use_container_width=True)
st.write("NOTE: This is quarterly data. The quarterly data is calculated by taking the sum of the total ridership in each quarter, and dividing it by the number of weekdays in that quarter.")
# Add a download link for the data
st.download_button(
label="Download full dataset as CSV",
data=csv,
file_name='mta_bus_ridership_quarterly.csv',
mime='text/csv',
)



# Add a date selector, NOT another markdown header
st.plotly_chart(fig2, use_container_width=True, )
st.markdown("### Explore the top routes over a date range")
start_date = st.date_input("Start date", datetime(2022, 1, 1))
end_date = st.date_input("End date", datetime(2022, 12, 31))

fig3 = plot_bar_top_n_for_daterange(rides,top_n=5,col='ridership',daterange=(start_date,end_date))
st.plotly_chart(fig3, use_container_width=True, )
# Show the ridership recovery chart

with st.expander("Data details"):
st.write("NOTE: This is quarterly data. The quarterly data is calculated by taking the sum of the total ridership in each quarter, and dividing it by the number of weekdays in that quarter.")
st.write("The routes displayed on the map do not include the supplemental services that provide service to Baltimore City Schools. These riders **are** included in the ridership data.")
st.markdown(":red[Maps may not reflect service changes. These should be considered as a guide to the general service area only.]")
# Add a download link for the data



else:
# Show a message if no routes are selected
st.warning("Please select at least one route.")



badge(type="twitter", name="willfedder")
badge(type="github", name="fedderw/transitscope-baltimore")
st.sidebar.write("App created by [Will Fedder](https://linkedin.com/in/fedderw).")
st.sidebar.write("Data provided by [MDOT MTA](https://www.arcgis.com/apps/dashboards/1bbc19f2abfe4fde94e4c563f5e8371c). To view a geographic system map in PDF format, visit the [MTA's website](https://s3.amazonaws.com/mta-website-staging/mta-website-staging/files/System%20Maps/Geographic_System_Map_08_2022.pdf).")
st.sidebar.write("Data extracted using this [script](https://github.com/jamespizzurro/mta-bus-ridership-scraper) authored by James Pizzurro.")

# Wait for user to press a button
import time
time.sleep(20)
st.experimental_rerun()

24 changes: 24 additions & 0 deletions app/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from pathlib import Path

data_raw_dir = Path("data/raw")
data_dir = Path("data")

CITYLINK_COLORS = {
"CityLink Red": "#FF0000",
"CityLink Blue": "#4169E1",
"CityLink Green": "#008000",
"CityLink Yellow": "#FFFF00",
"CityLink Brown": "#A52A2A",
"CityLink Orange": "#FF6600",
"CityLink Purple": "#800080",
"CityLink Pink": "#FFC0CB",
"CityLink Lime": "#00FF00",
# "CityLink Navy": "#3A1078",
"CityLink Navy": "#00337C",
"CityLink Navy": "#486581",
# "CityLink Navy": "#13005A",
# "CityLink Navy": "#0F3460",
"CityLink Silver": "#C0C0C0",
"CityLink Gold": "#FFD700",
"Other": "#FFFFFF",
}
76 changes: 76 additions & 0 deletions app/load_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pandas as pd
import geopandas as gpd
import streamlit as st
import numpy as np
from datetime import datetime


def add_ridership_weekday_2019(
df: pd.DataFrame, freq: str = "quarter"
) -> pd.DataFrame:
"""Calculate the ridership recovery over 2019 for a given frequency

Args:
df (pd.DataFrame): DataFrame with ridership and date columns
freq (str, optional): Frequency to group by. Can be 'quarter' or 'month'. Defaults to 'quarter'.

Returns:
pd.DataFrame: DataFrame with additional columns for the ridership recovery over 2019
"""
# Extract quarter and year information from date column

# freq can be 'quarter' or 'month'

df[freq] = (
df["date"].dt.quarter if freq == "quarter" else df["date"].dt.month
)
df["year"] = df["date"].dt.year

# Filter to same quarter in 2019
filter_df = (
df[(df["year"] == 2019) & (df[freq] == df[freq])]
.groupby(["route", freq])[["ridership_weekday"]]
.sum()
.reset_index()
)

# Merge filtered DataFrame with original DataFrame
merged_df = df.merge(
filter_df, on=["route", freq], how="left", suffixes=("", "_2019")
)
merged_df.date = pd.to_datetime(merged_df.date)
merged_df["recovery_over_2019"] = np.where(
merged_df.date.dt.year < 2020,
np.nan,
merged_df["ridership_weekday"] / merged_df["ridership_weekday_2019"],
)

return merged_df


cols = ["route", "date", "ridership_weekday", "ridership", ]


@st.cache_data
def get_rides(file_path="data/mta_bus_ridership.parquet"):
"""Get the MTA bus ridership data"""
rides = pd.read_parquet(file_path)
rides = add_ridership_weekday_2019(rides, freq="month")
return rides


@st.cache_data
def get_rides_quarterly(file_path="data/mta_bus_ridership_quarterly.parquet"):
"""Get the MTA bus ridership data"""
rides = pd.read_parquet(file_path)
rides = add_ridership_weekday_2019(rides, freq="quarter")
return rides


@st.cache_data
def get_route_linestrings(file_path="data/mta_bus_route_linestring.geojson"):
"""Get the MTA bus ridership data"""
gdf = gpd.read_file(file_path)
# The geometry column contains many multiline strings , so we need to convert them to single linestrings

return gdf
Loading