forked from fivethirtyeight/data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.R
41 lines (27 loc) · 769 Bytes
/
scrape.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Dallas shooting scraping
library(dplyr)
library(rvest)
library(readr)
library(tidyr)
library(lubridate)
library(stringr)
library(ggplot2)
yearly_url <- 'https://www.odmp.org/search/year/'
years <- seq(1791, 2016)
all_data <- data.frame()
# Scrape data
for (year in years){
new_url <- paste0(yearly_url, year)
selector_yearly <- 'p , p a'
raw_data <- read_html(new_url) %>%
html_nodes(selector_yearly) %>%
html_text() %>%
as.data.frame()
names(raw_data) <- c("incident")
clean_data <- raw_data %>%
separate(incident, sep = '\n\t\t\t\t\t\t\t\t\t',
into = c("person", "dept","eow", "cause")) %>%
filter(!is.na(dept))
all_data <- rbind(all_data, clean_data)
}
write_csv(all_data, "all_data_fallen_officers.csv")