From 9b71aadf3090c4698d5998b78d4ef61b7cc1c490 Mon Sep 17 00:00:00 2001 From: Marc Cenac Date: Thu, 10 Jan 2019 21:32:52 -0600 Subject: [PATCH] Adding initial notebook to explore 311 data --- Getting_Started.ipynb | 158 ++++++++++++++++++++++++++++++++++++++++++ README.md | 36 ++++------ 2 files changed, 171 insertions(+), 23 deletions(-) create mode 100644 Getting_Started.ipynb diff --git a/Getting_Started.ipynb b/Getting_Started.ipynb new file mode 100644 index 0000000..00e01bf --- /dev/null +++ b/Getting_Started.ipynb @@ -0,0 +1,158 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### You can hit the `run` button or press `shift + enter` to run the selected cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# first import the libraries you want to work with\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# then download some data into a pandas data frame\n", + "# df311 = pd.read_csv('https://data.nola.gov/api/views/3iz8-nghx/rows.csv')\n", + "df311 = pd.read_csv('data/nola311_raw.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> If you ran the setup.sh script, you can load the data from data/nola311_raw.csv, otherwise you can download from the URL directly (but it will take a little while)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# now we can use the data frame to explore the data\n", + "# first let's see what columns we have to work with \n", + "df311.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# now you can get more info about the columns with .info()\n", + "df311.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# you can look at the top or bottom rows with .head() and .tail()\n", + "df311.head()\n", + "df311.tail(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# let's inspect a column and see how many issues we have for each issue_type\n", + "df311.issue_type.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# let's use matplotlib to graph this\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df311.issue_type.value_counts().plot.bar()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# maybe it would look better horizontal\n", + "df311.issue_type.value_counts().plot.barh()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# maybe we should make it a little bigger\n", + "from matplotlib.pyplot import figure\n", + "df311.issue_type.value_counts().plot.barh(figsize=(20,10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " # Now go forth and explore more data! There are tons of videos, blog posts, and documentation to get started. Here's [a great site](http://www.firstpythonnotebook.org/index.html) that goes into more detail on working with Python and the Jupyter Notebooks " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md b/README.md index ab48eea..03394a0 100644 --- a/README.md +++ b/README.md @@ -8,31 +8,19 @@ Also, you can submit a 311 ticket online now! https://www.nola.gov/311/ This purpose of this project will now focus on working with the 311 data for data analysis use cases. -As a citizen, -* I want to lookup info about my 311 request (by entering a reference # received from 311 or searching my previous history). -* I want to visualize ticket types with bar charts (counts) and pie graphs (percentage). -* I want to visualize the data on a map around me and filter and sort by ticket type, open/closed, date range. -* I want to browse curated datasets before exploring the data myself (maybe showing less data that's -more recent data will be useful; maybe by sharing my location, I can see more relevant data on a map -zoomed to my address). -* I want to see open requests near me. -* I want to submit issues that integrate with the City's system (the city 311 system can notify the user). -* I want the ability to choose the amount of information to share about myself (email required to submit ticket?) -Other nice to have features: -* Commenting and upvoting on issues nearby me -* Get notified about issues created by others (star/follow) -* See filter of all issues a user has submitted (email required) -* Map feature: Request per district (styled where color gets darker for more requests) -* Frequency: analyze the frequency of 311 incidents (median time, types - that stay open the longest, etc) +## Prerequisites +Ensure you have [Docker](https://www.docker.com/community-edition) b/c that makes the database installation easier. -## prerequisites +You will also need to install Python and Jupiter Notebooks to work with the data. We recommend downloading +[Anaconda](https://www.anaconda.com/download/) to do this. -Ensure you have installed [Node.js](https://nodejs.org/en/download/current/) and [Docker](https://www.docker.com/community-edition). +> You probably want to download the Python 3 version b/c Python 2 will become unsupported [soon](https://pythonclock.org/) -## database setup + + +## Database setup For local development, you can use Docker to setup the database ``` @@ -56,13 +44,15 @@ NOLA311_DB_PORT=5432 \ ./setup.sh ``` -## app setup +### Getting Started + +Once `jupiter` is installed, you can start the notebook server and open the "Getting Started" notebook -For local development, you can use Docker to run the application ``` -docker-compose up -d app +jupyter notebook Getting_Started.ipynb ``` + ## some sample queries on the database Login to the db with psql `psql -h localhost -U nola311` and run some queries: