Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add state compensation and wage data to replace employment #49

Merged
merged 19 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions R/CommodityFlowFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,19 @@ calculateCommodityFlowRatios <- function(state, year, flow_ratio_type, specs, io
# Determine BEA sectors that need allocation
allocation_sectors <- SCTGtoBEA[duplicated(SCTGtoBEA$SCTG) |
duplicated(SCTGtoBEA$SCTG, fromLast = TRUE), ]
# Use State Emp to allocate
StateEmp <- getStateEmploymentbyBEASummary(year, specs)
# Merge StateEmp with allocation_sectors
Emp <- merge(StateEmp, allocation_sectors, by = BEA_col)
# Use State Compensation data to allocate
# StateDF <- getStateEmploymentbyBEASummary(year, specs)
StateDF <- getStateCompensationbyBEASummary(year, specs)
# Merge with allocation_sectors
state_df <- merge(StateDF, allocation_sectors, by = BEA_col)
# Merge FAF_2r and Emp
FAF_2r <- merge(FAF_2r, Emp[Emp$State == state, ],
FAF_2r <- merge(FAF_2r, state_df[state_df$State == state, ],
by = c("SCTG", BEA_col), all.x = TRUE)
FAF_2r[is.na(FAF_2r$State), "State"] <- state
FAF_2r[is.na(FAF_2r$Emp), "Emp"] <- 1
FAF_2r[is.na(FAF_2r$Value), "Value"] <- 1
for (sctg in unique(FAF_2r$SCTG)) {
# Calculate allocation factor
weight_vector <- FAF_2r[FAF_2r$SCTG == sctg, "Emp"]
weight_vector <- FAF_2r[FAF_2r$SCTG == sctg, "Value"]
allocation_factor <- weight_vector/sum(weight_vector/4, na.rm = TRUE)
# Allocate Value
value <- FAF_2r[FAF_2r$SCTG == sctg, "VALUE"]*allocation_factor
Expand Down
133 changes: 100 additions & 33 deletions R/StateSupplyFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,37 @@ calculateStatetoBEASummaryAllocationFactor <- function(year, allocationweightsou
cw <- loadDatafromUSEEIOR(paste0('MasterCrosswalk', schema), appendSchema = FALSE)
crosswalk <- cw[cw[, BEA_col] %in% allocation_codes, ]
# Generate allocation_weight df based on pre-saved data
if (allocationweightsource == "Employment") {
# Load BEA state Emp
EmpFBS <- getFlowsaData("Employment", year, specs$model_ver)
EmpFBS <- mapFlowBySectorfromNAICStoBEA(EmpFBS, year, "Summary", specs)
EmpFBS$GeoName <- mapFIPS5toLocationNames(EmpFBS$FIPS, "FIPS")
EmpFBS$FIPS <- NULL
names(EmpFBS)[names(EmpFBS) == 'FlowAmount'] <- 'Weight'
allocation_weight <- EmpFBS
if (allocationweightsource == "Employment" || allocationweightsource == "Compensation") {
# Load BEA State data to BEA Summary mapping
DatatoBEAmapping <- loadBEAStateDatatoBEASummaryMapping(allocationweightsource)
sectors <- unique(crosswalk[crosswalk$BEA_2017_Sector_Code %in% c("44RT", "FIRE", "G"), BEA_col])
DatatoBEAmapping <- DatatoBEAmapping[DatatoBEAmapping[, BEA_col] %in% sectors, ]
# For real estate (FIRE) and gov (G) sectors, calculate allocation factors using US GVA by industry
Summary_ValueAdded_IO <- loadDatafromUSEEIOR("Summary_ValueAdded_IO")
allocation_factors <- merge(DatatoBEAmapping,
Summary_ValueAdded_IO[, year_col, drop = FALSE],
by.x = BEA_col, by.y = 0)
for (linecode in unique(allocation_factors$LineCode)) {
weight_vector <- allocation_factors[allocation_factors$LineCode == linecode, year_col]
allocation_factors[allocation_factors$LineCode == linecode, "factor"] <- weight_vector/sum(weight_vector)
}
# Load BEA state data
name <- ifelse(allocationweightsource == "Compensation",
"State_CompensationByIndustry_", "State_Employment_")
BEAStateData <- loadStateIODataFile(paste0(name, year), ver = model_ver)
# Map BEA state data (from LineCode) to BEA Summary
BEAStateData <- merge(BEAStateData[BEAStateData$GeoName %in%
c(state.name, "District of Columbia"),
c("GeoName", "LineCode", year_col)],
allocation_factors[, c(BEA_col, "LineCode", "factor")],
by = "LineCode")
# Adjust BEA state data value based on allocation factor
BEAStateData[, year_col] <- BEAStateData[, year_col]*BEAStateData$factor
allocation_weight <- stats::aggregate(BEAStateData[, year_col],
by = list(BEAStateData$GeoName,
BEAStateData[, BEA_col]),
sum)
colnames(allocation_weight) <- c("GeoName", BEA_col, "Weight")
}
# Add US allocation weight (Summary Gross Output)
Summary_GrossOutput_IO <- loadDatafromUSEEIOR("Summary_GrossOutput_IO")
Expand Down Expand Up @@ -115,7 +138,8 @@ calculateStatetoBEASummaryAllocationFactor <- function(year, allocationweightsou
#' @param statetablename Name of pre-saved state table,
#' can be GVA, Tax, Employment Compensation, and GOS.
#' @param year A numeric value between 2007 and 2017 specifying the year of interest.
#' @param allocationweightsource Source of allocation weight, can be "Employment".
#' @param allocationweightsource Source of allocation weight, can be "Employment"
#' or "Compensation".
#' @param specs A list of model specs including 'BaseIOSchema'
#' @return A data frame contains allocated state value
#' for all states with row names being BEA sector code.
Expand Down Expand Up @@ -185,7 +209,7 @@ calculateStateUSValueAddedRatio <- function(year, specs) {
BEA_col <- paste0("BEA_", schema, "_Summary_Code")
year_col <- as.character(year)
# Generate state GVA (value added) table
StateValueAdded <- allocateStateTabletoBEASummary("GVA", year, "Employment", specs)
StateValueAdded <- allocateStateTabletoBEASummary("GVA", year, "Compensation", specs)
# Extract US value added
US_VA <- StateValueAdded[StateValueAdded$GeoName == "United States *", ]
# Extract state value added
Expand Down Expand Up @@ -298,25 +322,74 @@ estimateStateCommodityOutputRatiofromAlternativeSources <- function(year, specs)
#' Map to BEA Summary sectors.
#' @param year A numeric value between 2007 and 2017 specifying the year of interest.
#' @param specs A list of model specs including 'BaseIOSchema',
#' @param datasource A str to differentiate primary data source for employment
#' @return A data frame contains State Employment by BEA Summary.
getStateEmploymentbyBEASummary <- function(year,specs) {
getStateEmploymentbyBEASummary <- function(year, specs, datasource="BEA") {
# Switch to flowsa from BEA was implemented in 652b3ae
# Define BEA_col
schema <- specs$BaseIOSchema
BEA_col <- paste0("BEA_", schema, "_Summary_Code")
# Employment FlowBySector from flowsa
EmpFBS <- getFlowsaData("Employment", year, specs$model_ver)
EmpFBS <- mapFlowBySectorfromNAICStoBEA(EmpFBS, year, "Summary", specs)
EmpFBS$State <- mapFIPS5toLocationNames(EmpFBS$FIPS, "FIPS")
names(EmpFBS)[names(EmpFBS) == 'FlowAmount'] <- 'Emp'

# Make sure 0 values are expicit
combinations <- expand.grid(State = unique(EmpFBS$State), Summary = unique(EmpFBS[[BEA_col]]))
EmpFBS <- merge(EmpFBS, combinations, by.x = c("State", BEA_col), by.y = c("State", "Summary"), all.y = TRUE)
EmpFBS$Emp[is.na(EmpFBS$Emp)] <- 0
StateEmp <- EmpFBS[, c(BEA_col, "State", "Emp")]
if(datasource == "BEA") {
# BEA State Emp
BEAStateEmp <- loadStateIODataFile(paste0("State_Employment_", year),
ver = specs$model_ver)
EmptoBEAmapping <- loadBEAStateDatatoBEASummaryMapping("Employment")
BEAStateEmp <- merge(BEAStateEmp[, c("GeoName", "LineCode", as.character(year))],
EmptoBEAmapping, by = "LineCode")
# Aggregate StateEmployment by BEA
BEAStateEmp <- stats::aggregate(BEAStateEmp[, as.character(year)],
by = list(BEAStateEmp[[BEA_col]],
BEAStateEmp$GeoName), sum)
colnames(BEAStateEmp) <- c(BEA_col, "State", "Emp")

# Prioritize BEAStateEmp, replace NAs in Emp with values from EmpFBS
StateEmp <- merge(BEAStateEmp[BEAStateEmp$State %in% EmpFBS$State, ],
EmpFBS, by.x = c("State", BEA_col), by.y = c("State",BEA_col), all = TRUE)
StateEmp[is.na(StateEmp$Emp), "Emp"] <- StateEmp[is.na(StateEmp$Emp), "FlowAmount"]
# Replace the remaining NAs in Emp with zero
StateEmp[is.na(StateEmp$Emp), "Emp"] <- 0
# Drop unwanted columns
StateEmp <- StateEmp[, colnames(BEAStateEmp)]
} else if (datasource == "Flowsa") {
names(EmpFBS)[names(EmpFBS) == 'FlowAmount'] <- 'Emp'

# Make sure 0 values are explicit
combinations <- expand.grid(State = unique(EmpFBS$State), Summary = unique(EmpFBS[[BEA_col]]))
EmpFBS <- merge(EmpFBS, combinations, by.x = c("State", BEA_col), by.y = c("State", "Summary"), all.y = TRUE)
EmpFBS$Emp[is.na(EmpFBS$Emp)] <- 0
StateEmp <- EmpFBS[, c(BEA_col, "State", "Emp")]
}
return(StateEmp)
}

#' Load BEA State Compensation data from pre-saved .rds files.
#' Map to BEA Summary sectors.
#' @param year A numeric value between 2007 and 2023 specifying the year of interest.
#' @param specs A list of model specs including 'BaseIOSchema',
#' @return A data frame contains State Compensation by BEA Summary.
getStateCompensationbyBEASummary <- function(year,specs) {
# Define BEA_col
schema <- specs$BaseIOSchema
BEA_col <- paste0("BEA_", schema, "_Summary_Code")
# BEA State Emp
StateDF <- loadStateIODataFile(paste0("State_CompensationByIndustry_", year),
ver = model_ver)
DatatoBEAmapping <- loadBEAStateDatatoBEASummaryMapping("Compensation")
StateDF <- merge(StateDF[, c("GeoName", "LineCode", as.character(year))],
DatatoBEAmapping, by = "LineCode")
# Aggregate by BEA
StateDF <- stats::aggregate(StateDF[, as.character(year)],
by = list(StateDF[[BEA_col]],
StateDF$GeoName), sum)
colnames(StateDF) <- c(BEA_col, "State", "Value")

return(StateDF)
}

#' Estimate state Ag, Fishery and Forestry commodity output ratios
#' @param year A numeric value between 2007 and 2021 specifying the year of interest.
#' @param specs A list of model specs including 'BaseIOSchema',
Expand Down Expand Up @@ -382,14 +455,6 @@ getFAFCommodityOutput <- function(year, specs) {
package = "stateior"))
# Load pre-saved FAF4 commodity flow data
FAF <- loadStateIODataFile(paste("FAF", year, sep = "_"), ver = model_ver)
# Define value_col and origin_col
if (year == 2012) {
value_col <- paste0("value_", year)
} else if (year %in% c(2013:2018)) {
value_col <- paste0("curval_", year)
} else {
value_col <- paste0("current_value_", year)
}
origin_col <- colnames(FAF)[startsWith(colnames(FAF), "dms_orig")]
# Keep domestic and export trade, keep useful columns, then rename
FAF <- FAF[FAF$trade_type %in% c(1, 3),
Expand All @@ -411,10 +476,12 @@ getFAFCommodityOutput <- function(year, specs) {
duplicated(SCTGtoBEA$SCTG, fromLast = TRUE), ]
allocation_sectors <- allocation_sectors[!allocation_sectors[, BEA_col]
%in% c("111CA", "113FF", "311FT"), ]
# Use State Emp to allocate
StateEmp <- getStateEmploymentbyBEASummary(year,specs)
# Merge StateEmp with allocation_sectors
StateEmp <- merge(StateEmp, allocation_sectors, by = BEA_col)
# Use State Compensation data to allocate
# StateDF <- getStateEmploymentbyBEASummary(year,specs)
StateDF <- getStateCompensationbyBEASummary(year,specs)
names(StateDF)[names(StateDF) == 'Value'] <- 'Compensation'
# Merge StateDF with allocation_sectors
StateDF <- merge(StateDF, allocation_sectors, by = BEA_col)
# Process FAF for each state
# Generate AFF
AgFisheryForestry <- getAgFisheryForestryCommodityOutput(year, specs)
Expand All @@ -440,12 +507,12 @@ getFAFCommodityOutput <- function(year, specs) {
sum)
colnames(FAF_state_1) <- c("State", BEA_col, "Value")
# Step 2.2. Allocate FAF_state_2 from SCTG to BEA using BEA state employment
Emp <- StateEmp[StateEmp$State == state, ]
state_df <- StateDF[StateDF$State == state, ]
# Merge with FAF_state_2
FAF_state_2 <- merge(FAF_state_2, Emp, by = c("State", "SCTG", BEA_col))
FAF_state_2 <- merge(FAF_state_2, state_df, by = c("State", "SCTG", BEA_col))
for (sctg in unique(FAF_state_2$SCTG)) {
# Calculate allocation factor
weight_vector <- FAF_state_2[FAF_state_2$SCTG == sctg, "Emp"]
weight_vector <- FAF_state_2[FAF_state_2$SCTG == sctg, "Compensation"]
allocation_factor <- weight_vector/sum(weight_vector, na.rm = TRUE)
# Allocate Value
value <- FAF_state_2[FAF_state_2$SCTG == sctg, "Value"]*allocation_factor
Expand Down
5 changes: 3 additions & 2 deletions R/StateUseFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ calculateStateUSEmpCompensationRatio <- function(year, specs) {
schema <- specs$BaseIOSchema
BEA_col <- paste0("BEA_", schema, "_Summary_Code")
# Generate state Employee Compensation table
StateEmpComp <- allocateStateTabletoBEASummary("EmpCompensation", year, "Employment", specs)
StateEmpComp <- allocateStateTabletoBEASummary("EmpCompensation", year, "Compensation", specs)
# Separate into state Employee Compensation
StateEmpComp <- StateEmpComp[StateEmpComp$GeoName != "United States *", ]
# Map US Employee Compensation to BEA
Expand All @@ -665,7 +665,8 @@ calculateStateUSEmpCompensationRatio <- function(year, specs) {
allocation_sectors <- GVAtoBEAmapping[duplicated(GVAtoBEAmapping$LineCode) |
duplicated(GVAtoBEAmapping$LineCode, fromLast = TRUE), ]
USEmpComp <- merge(USEmpComp, GVAtoBEAmapping, by = "LineCode")
USEmpComp <- merge(USEmpComp,useeior::Summary_GrossOutput_IO[, as.character(year), drop = FALSE],
Summary_GrossOutput_IO <- loadDatafromUSEEIOR("Summary_GrossOutput_IO")
USEmpComp <- merge(USEmpComp, Summary_GrossOutput_IO[, as.character(year), drop = FALSE],
by.x = BEA_col, by.y = 0)
USEmpComp[, as.character(year)] <- USEmpComp[, paste0(year, ".x")]
for (linecode in unique(allocation_sectors$LineCode)) {
Expand Down
Loading