|
| 1 | +## Budget |
| 2 | +## Download area harvested from NASS quick stat |
| 3 | +## Yu Pei |
| 4 | +## May-2015 |
| 5 | +setwd('~/Documents/GSR/budget/') |
| 6 | + |
| 7 | +yukey = "" |
| 8 | +states = c('06', '16', '30', '41', '53') |
| 9 | +## Specify What, Where, and When |
| 10 | +getArea = function(mykey = yukey, year= 2005, state = states[1], verbose = FALSE){ |
| 11 | + |
| 12 | + # Grab all the data in one state, in one year |
| 13 | + res = read.csv(paste0('http://quickstats.nass.usda.gov/api/api_GET/?key=', |
| 14 | + mykey, |
| 15 | + '&year=',year,'§or_desc=CROPS','&unit_desc=ACRES', |
| 16 | + '&state_fips_code=', state, |
| 17 | + '&statisticcat_desc=AREA%20HARVESTED','&freq_desc=ANNUAL', |
| 18 | + '&format=CSV') |
| 19 | + ,stringsAsFactors = F) |
| 20 | + |
| 21 | + if(verbose)print('Completed downloading data.') |
| 22 | + |
| 23 | + # Get rid of useless columns eg country etc |
| 24 | + res = res[, c("source_desc", "commodity_desc", "short_desc", |
| 25 | + "domain_desc", "domaincat_desc","agg_level_desc", |
| 26 | + "state_fips_code", "asd_code", "county_code", |
| 27 | + "year", "Value", 'reference_period_desc')] |
| 28 | + |
| 29 | + # Filter to domain_desc to be TOTAL |
| 30 | + res = res[res$domain_desc == 'TOTAL' & res$reference_period_desc == 'YEAR' & |
| 31 | + res$agg_level_desc %in% c('AGRICULTURAL DISTRICT', 'COUNTY', 'STATE'), ] |
| 32 | + |
| 33 | + ## Removing irrigation breakdown, just keep total and irrigated. |
| 34 | + # "BARLEY, IRRIGATED" |
| 35 | + #"BARLEY, IRRIGATED, ENTIRE CROP" |
| 36 | + #"BARLEY, IRRIGATED, NONE OF CROP" "BARLEY, IRRIGATED, PART OF CROP, IRRIGATED PORTION" |
| 37 | + #"BARLEY, NON-IRRIGATED" |
| 38 | + xx = grepl(', NON-IRRIGATED', res$short_desc) |
| 39 | + res = res[!xx, ] |
| 40 | + xx = grepl(', IRRIGATED, ENTIRE CROP', res$short_desc) |
| 41 | + res = res[!xx, ] |
| 42 | + xx = grepl(', IRRIGATED, NONE OF CROP', res$short_desc) |
| 43 | + res = res[!xx, ] |
| 44 | + xx = grepl(', PART OF CROP, IRRIGATED PORTION', res$short_desc) |
| 45 | + res = res[!xx, ] |
| 46 | + |
| 47 | + # Get crop names |
| 48 | + crops = gsub('(.*) - ACRES.*', '\\1', res$short_desc) |
| 49 | + res$crops = crops |
| 50 | + # Convert Value to numbers |
| 51 | + res$Value = suppressWarnings(as.numeric(gsub(',', '', res$Value))) |
| 52 | + |
| 53 | + # Add in location column formatC to pad 0 |
| 54 | + res$state_fips_code = formatC(res$state_fips_code, width = 2, format = 'd', flag = '0') |
| 55 | + res$county_code = formatC(res$county_code, width = 3, format = 'd', flag = '0') |
| 56 | + res$asd_code = formatC(res$asd_code, width = 2, format = 'd', flag = '0') |
| 57 | + |
| 58 | + ## Remove 998(combinded county) |
| 59 | + res = res[res$county_code != 998, ] |
| 60 | + |
| 61 | + if(verbose)print('Finished Preprocessing, final step') |
| 62 | + res$location = ifelse(res$agg_level_desc == 'STATE', res$state_fips_code, |
| 63 | + ifelse(res$agg_level_desc == 'COUNTY', |
| 64 | + paste0(res$state_fips_code,res$county_code), |
| 65 | + paste0(res$state_fips_code,"ag",res$asd_code))) |
| 66 | + |
| 67 | + # Remove redundant columns |
| 68 | + res = res[, c("source_desc","year", 'crops', 'location' , "Value")] |
| 69 | + names(res) = c("source_desc","year", 'crops', 'location' , "total") |
| 70 | + ## TODO split out irrigated part, create new column |
| 71 | + idx = grepl(', IRRIGATED', res$crops) |
| 72 | + irrigated = res[idx ,] |
| 73 | + res = res[!idx , ] |
| 74 | + # Change irrigated table, ready to join back to main table(res) |
| 75 | + irrigated = irrigated[, c("source_desc",'crops', 'location', 'total')] |
| 76 | + names(irrigated) = c('source_desc','crops', 'location', 'irrigated') |
| 77 | + irrigated$crops = gsub(', IRRIGATED', '', irrigated$crops) |
| 78 | + res = merge(res, irrigated, all.x = TRUE) |
| 79 | + |
| 80 | + # If contain CENSUS data, only take census data |
| 81 | + # First split on crops, then within crop, split on location |
| 82 | + temp = split(res, res$crops) |
| 83 | + temp2 = lapply(temp, function(onecrop){ |
| 84 | + aa = split(onecrop, onecrop$location) # Split one crop based on location |
| 85 | + a = lapply(aa, function(x)x[1,]) # Take first row (CENSUS if have two row) |
| 86 | + do.call(rbind, a) #return result |
| 87 | + }) |
| 88 | + dat = do.call(rbind, temp2) |
| 89 | + row.names(dat) = 1:nrow(dat) |
| 90 | + |
| 91 | + |
| 92 | + dat # Return |
| 93 | +} |
| 94 | + |
| 95 | + |
| 96 | +### Wrapper to get all year and all state data |
| 97 | +getall = function(curyear = 2005){ |
| 98 | + s06 = getArea(year = curyear,state = '06') |
| 99 | + s16 = getArea(year = curyear,state = '16') |
| 100 | + s30 = getArea(year = curyear,state = '30') |
| 101 | + s41 = getArea(year = curyear,state = '41') |
| 102 | + s53 = getArea(year = curyear,state = '53') |
| 103 | + |
| 104 | + dat = rbind(s06, s16,s30, s41, s53) |
| 105 | + dat |
| 106 | +} |
| 107 | + |
| 108 | +for(yr in 2005:2014){ |
| 109 | + dat = getall(yr) |
| 110 | + write.csv(dat, paste0('./output/harvest',yr, '.csv'), row.names =F) |
| 111 | + print(paste('finished', yr)) |
| 112 | +} |
| 113 | + |
| 114 | + |
| 115 | + |
| 116 | + |
| 117 | + |
| 118 | + |
| 119 | + |
| 120 | + |
| 121 | +######### Under construction |
| 122 | + |
| 123 | + |
| 124 | +## SURVEY vs CENSUS |
| 125 | + |
| 126 | +## Combine rows based on domaincat_desc |
| 127 | + |
| 128 | + |
| 129 | +## Get yields |
| 130 | +res = read.csv(paste0('http://quickstats.nass.usda.gov/api/api_GET/?key=', |
| 131 | + mykey, |
| 132 | + '&year=',2007,'§or_desc=CROPS', |
| 133 | + '&state_fips_code=', '41', |
| 134 | + '&statisticcat_desc=PRICE%20RECEIVED','&freq_desc=ANNUAL', |
| 135 | + '&format=CSV') |
| 136 | + ,stringsAsFactors = F) |
| 137 | +res[, c(1,4,9,10,38)] |
0 commit comments