Skip to content

Commit c51afbd

Browse files
committed
Merge branch 'master' of https://github.com/CSTARS/nass-summary
2 parents c4072ac + 6ab4273 commit c51afbd

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed

src/budget.r

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
## Budget
2+
## Download area harvested from NASS quick stat
3+
## Yu Pei
4+
## May-2015
5+
setwd('~/Documents/GSR/budget/')
6+
7+
yukey = ""
8+
states = c('06', '16', '30', '41', '53')
9+
## Specify What, Where, and When
10+
getArea = function(mykey = yukey, year= 2005, state = states[1], verbose = FALSE){
11+
12+
# Grab all the data in one state, in one year
13+
res = read.csv(paste0('http://quickstats.nass.usda.gov/api/api_GET/?key=',
14+
mykey,
15+
'&year=',year,'&sector_desc=CROPS','&unit_desc=ACRES',
16+
'&state_fips_code=', state,
17+
'&statisticcat_desc=AREA%20HARVESTED','&freq_desc=ANNUAL',
18+
'&format=CSV')
19+
,stringsAsFactors = F)
20+
21+
if(verbose)print('Completed downloading data.')
22+
23+
# Get rid of useless columns eg country etc
24+
res = res[, c("source_desc", "commodity_desc", "short_desc",
25+
"domain_desc", "domaincat_desc","agg_level_desc",
26+
"state_fips_code", "asd_code", "county_code",
27+
"year", "Value", 'reference_period_desc')]
28+
29+
# Filter to domain_desc to be TOTAL
30+
res = res[res$domain_desc == 'TOTAL' & res$reference_period_desc == 'YEAR' &
31+
res$agg_level_desc %in% c('AGRICULTURAL DISTRICT', 'COUNTY', 'STATE'), ]
32+
33+
## Removing irrigation breakdown, just keep total and irrigated.
34+
# "BARLEY, IRRIGATED"
35+
#"BARLEY, IRRIGATED, ENTIRE CROP"
36+
#"BARLEY, IRRIGATED, NONE OF CROP" "BARLEY, IRRIGATED, PART OF CROP, IRRIGATED PORTION"
37+
#"BARLEY, NON-IRRIGATED"
38+
xx = grepl(', NON-IRRIGATED', res$short_desc)
39+
res = res[!xx, ]
40+
xx = grepl(', IRRIGATED, ENTIRE CROP', res$short_desc)
41+
res = res[!xx, ]
42+
xx = grepl(', IRRIGATED, NONE OF CROP', res$short_desc)
43+
res = res[!xx, ]
44+
xx = grepl(', PART OF CROP, IRRIGATED PORTION', res$short_desc)
45+
res = res[!xx, ]
46+
47+
# Get crop names
48+
crops = gsub('(.*) - ACRES.*', '\\1', res$short_desc)
49+
res$crops = crops
50+
# Convert Value to numbers
51+
res$Value = suppressWarnings(as.numeric(gsub(',', '', res$Value)))
52+
53+
# Add in location column formatC to pad 0
54+
res$state_fips_code = formatC(res$state_fips_code, width = 2, format = 'd', flag = '0')
55+
res$county_code = formatC(res$county_code, width = 3, format = 'd', flag = '0')
56+
res$asd_code = formatC(res$asd_code, width = 2, format = 'd', flag = '0')
57+
58+
## Remove 998(combinded county)
59+
res = res[res$county_code != 998, ]
60+
61+
if(verbose)print('Finished Preprocessing, final step')
62+
res$location = ifelse(res$agg_level_desc == 'STATE', res$state_fips_code,
63+
ifelse(res$agg_level_desc == 'COUNTY',
64+
paste0(res$state_fips_code,res$county_code),
65+
paste0(res$state_fips_code,"ag",res$asd_code)))
66+
67+
# Remove redundant columns
68+
res = res[, c("source_desc","year", 'crops', 'location' , "Value")]
69+
names(res) = c("source_desc","year", 'crops', 'location' , "total")
70+
## TODO split out irrigated part, create new column
71+
idx = grepl(', IRRIGATED', res$crops)
72+
irrigated = res[idx ,]
73+
res = res[!idx , ]
74+
# Change irrigated table, ready to join back to main table(res)
75+
irrigated = irrigated[, c("source_desc",'crops', 'location', 'total')]
76+
names(irrigated) = c('source_desc','crops', 'location', 'irrigated')
77+
irrigated$crops = gsub(', IRRIGATED', '', irrigated$crops)
78+
res = merge(res, irrigated, all.x = TRUE)
79+
80+
# If contain CENSUS data, only take census data
81+
# First split on crops, then within crop, split on location
82+
temp = split(res, res$crops)
83+
temp2 = lapply(temp, function(onecrop){
84+
aa = split(onecrop, onecrop$location) # Split one crop based on location
85+
a = lapply(aa, function(x)x[1,]) # Take first row (CENSUS if have two row)
86+
do.call(rbind, a) #return result
87+
})
88+
dat = do.call(rbind, temp2)
89+
row.names(dat) = 1:nrow(dat)
90+
91+
92+
dat # Return
93+
}
94+
95+
96+
### Wrapper to get all year and all state data
97+
getall = function(curyear = 2005){
98+
s06 = getArea(year = curyear,state = '06')
99+
s16 = getArea(year = curyear,state = '16')
100+
s30 = getArea(year = curyear,state = '30')
101+
s41 = getArea(year = curyear,state = '41')
102+
s53 = getArea(year = curyear,state = '53')
103+
104+
dat = rbind(s06, s16,s30, s41, s53)
105+
dat
106+
}
107+
108+
for(yr in 2005:2014){
109+
dat = getall(yr)
110+
write.csv(dat, paste0('./output/harvest',yr, '.csv'), row.names =F)
111+
print(paste('finished', yr))
112+
}
113+
114+
115+
116+
117+
118+
119+
120+
121+
######### Under construction
122+
123+
124+
## SURVEY vs CENSUS
125+
126+
## Combine rows based on domaincat_desc
127+
128+
129+
## Get yields
130+
res = read.csv(paste0('http://quickstats.nass.usda.gov/api/api_GET/?key=',
131+
mykey,
132+
'&year=',2007,'&sector_desc=CROPS',
133+
'&state_fips_code=', '41',
134+
'&statisticcat_desc=PRICE%20RECEIVED','&freq_desc=ANNUAL',
135+
'&format=CSV')
136+
,stringsAsFactors = F)
137+
res[, c(1,4,9,10,38)]

0 commit comments

Comments
 (0)