diff --git a/NAMESPACE b/NAMESPACE index 181d46c..f11815a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,13 @@ export(pm_city_detect) export(pm_city_none) export(pm_city_parse) export(pm_city_std) +export(pm_country_all) +export(pm_country_any) +export(pm_country_detect) +export(pm_country_none) +export(pm_country_parse) +export(pm_country_std) +export(pm_country_trim) export(pm_dictionary) export(pm_has_address) export(pm_has_uid) diff --git a/R/build.R b/R/build.R index a7c4457..36afccd 100644 --- a/R/build.R +++ b/R/build.R @@ -162,7 +162,9 @@ pm_rebuild <- function(.data, start, end, new_address, include_commas = FALSE, endQN <- rlang::quo_name(rlang::enquo(end)) if (endQN == "end"){ - if ("pm.zip4" %in% names(.data) == TRUE){ + if ("pm.country" %in% names(.data) == TRUE) { + endQ <- rlang::quo(!! rlang::sym("pm.country")) + } else if ("pm.zip4" %in% names(.data) == TRUE){ endQ <- rlang::quo(!! rlang::sym("pm.zip4")) } else if ("pm.zip4" %in% names(.data) == FALSE){ endQ <- rlang::quo(!! rlang::sym("pm.zip")) @@ -239,7 +241,7 @@ pm_rebuild <- function(.data, start, end, new_address, include_commas = FALSE, "pm.houseFrac", "pm.houseSuf", "pm.preDir", "pm.street", "pm.streetSuf", "pm.sufDir", "pm.unitType", "pm.unitNum", "pm.city", - "pm.state", "pm.zip", "pm.zip4"), + "pm.state", "pm.zip", "pm.zip4", "pm.country"), stringsAsFactors = FALSE ) diff --git a/R/country.R b/R/country.R new file mode 100644 index 0000000..8298931 --- /dev/null +++ b/R/country.R @@ -0,0 +1,381 @@ +#' Does Country Dictionary Return Any Matches +#' +#' @description Determine whether the country dictionary returns any matches. +#' +#' @usage pm_country_any(.data, dictionary) +#' +#' @param .data A postmastr object created with \link{pm_prep} +#' @param dictionary Optional; a tbl created with \code{pm_dictionary} to be used +#' as a master list for countries. If none is specified, the full default +#' country dictionary will be used. +#' +#' @return A logical scalar is returned that is \code{TRUE} if the data contains at +#' least one country name or abbrevation in the given dictionary and \code{FALSE} +#' if they do not. +#' +#' @export +pm_country_any <- function(.data, dictionary){ + + # check for object and key variables + if (pm_has_uid(.data) == FALSE){ + stop("The variable 'pm.uid' is missing from the given object. Create a postmastr object with pm_identify and pm_prep before proceeding.") + } + + if (pm_has_address(.data) == FALSE){ + stop("The variable 'pm.address' is missing from the given object. Create a postmastr object with pm_prep before proceeding.") + } + + # test dictionary + if (missing(dictionary) == TRUE){ + .data <- pm_country_detect(.data) + } else if (missing(dictionary) == FALSE){ + .data <- pm_country_any(.data, dictionary = dictionary) + } + + # create output + out <- any(.data$pm.hasCountry) + + # return output + return(out) + +} + +#' Does Country Dictionary Return a Match for All Observations +#' +#' @description Determine whether the country dictionary returns matches for all observations. +#' +#' @usage pm_country_all(.data, dictionary) +#' +#' @param .data A postmastr object created with \link{pm_prep} +#' @param dictionary Optional; a tbl created with \code{pm_dictionary} to be used +#' as a master list for countries. If none is specified, the full default +#' country dictionary will be used. +#' +#' @return A logical scalar is returned that is \code{TRUE} if the data contains a country +#' name or abbreviation for every observation in the data set and \code{FALSE} otherwise. +#' +#' @export +pm_country_all <- function(.data, dictionary){ + + # check for object and key variables + if (pm_has_uid(.data) == FALSE){ + stop("The variable 'pm.uid' is missing from the given object. Create a postmastr object with pm_identify and pm_prep before proceeding.") + } + + if (pm_has_address(.data) == FALSE){ + stop("The variable 'pm.address' is missing from the given object. Create a postmastr object with pm_prep before proceeding.") + } + + # test dictionary + if (missing(dictionary) == TRUE){ + .data <- pm_country_detect(.data) + } else if (missing(dictionary) == FALSE){ + .data <- pm_country_any(.data, dictionary = dictionary) + } + + # create output + out <- all(.data$pm.hasCountry) + + # return output + return(out) + +} + +#' Detect Presence of Country +#' +#' @description Determine the presence of country names or abbreviations +#' at the end of a string. +#' +#' @usage pm_country_detect(.data, dictionary) +#' +#' @param .data A postmastr object created with \link{pm_prep} +#' @param dictionary Optional; a tbl created with \code{pm_dictionary} to be used +#' as a master list for countries. If none is specified, the full default +#' country dictionary will be used. +#' +#' @return A tibble with a new logical variable \code{pm.hasCountry} that is +#' \code{TRUE} if a country name or abbreviation from the given dictionary is +#' found at the end of the address and \code{FALSE} otherwise. +#' +#' @importFrom dplyr %>% +#' @importFrom dplyr mutate +#' @importFrom stringr str_c +#' @importFrom stringr str_detect +#' +#' @export +pm_country_detect <- function(.data, dictionary){ + + # create bindings for global variables + pm.address = NULL + + # check for object and key variables + if (pm_has_uid(.data) == FALSE){ + stop("The variable 'pm.uid' is missing from the given object. Create a postmastr object with pm_identify and pm_prep before proceeding.") + } + + if (pm_has_address(.data) == FALSE){ + stop("The variable 'pm.address' is missing from the given object. Create a postmastr object with pm_prep before proceeding.") + } + + # load dictionary if not specified + if (missing(dictionary) == TRUE){ + dictionary <- pm_dictionary(type = "country") + } + + # minimize dictionary + dict <- paste(dictionary$con.input, collapse = "|") + + # check observations + .data <- dplyr::mutate(.data, pm.hasCountry = stringr::str_detect(pm.address, + pattern = stringr::str_c("\\b(", dict, ")\\b$"))) + + # return output + return(.data) + +} + + +#' Return Only Unmatched Observations From pm_country_detect +#' +#' @description Automatically subset the results of \link{pm_country_detect} to +#' return only observations that were not found in the dictionary. +#' +#' @usage pm_country_none(.data, dictionary) +#' +#' @param .data A postmastr object created with \link{pm_prep} +#' @param dictionary Optional; a tbl created with \code{pm_dictionary} to be used +#' as a master list for countries. If none is specified, the full default +#' country dictionary will be used. +#' +#' @importFrom dplyr %>% +#' @importFrom dplyr filter +#' @importFrom dplyr select +#' +#' @export +pm_country_none <- function(.data, dictionary){ + + # global bindings + pm.hasCountry = NULL + + # check for object and key variables + if (pm_has_uid(.data) == FALSE){ + stop("The variable 'pm.uid' is missing from the given object. Create a postmastr object with pm_identify and pm_prep before proceeding.") + } + + if (pm_has_address(.data) == FALSE){ + stop("The variable 'pm.address' is missing from the given object. Create a postmastr object with pm_prep before proceeding.") + } + + # load dictionary if not specified + if (missing(dictionary) == TRUE){ + dictionary <- pm_dictionary(type = "country") + } + + # create output + .data %>% + pm_country_detect(dictionary = dictionary) %>% + dplyr::filter(pm.hasCountry == FALSE) %>% + dplyr::select(-pm.hasCountry) -> .data + + # return output + return(.data) + +} + +#' Parse Country +#' +#' @description Parse a country from a string. These data +#' should be at the end of the string (i.e. the last word or words). +#' +#' @usage pm_country_parse(.data, dictionary) +#' +#' @param .data A postmastr object created with \link{pm_prep} +#' @param dictionary Optional; a tbl created with \code{pm_dictionary} to be used +#' as a master list for countries. If none is specified, the full default +#' country dictionary will be used. +#' +#' @importFrom dplyr %>% +#' @importFrom dplyr mutate +#' @importFrom dplyr select +#' @importFrom stringr str_c +#' @importFrom stringr str_count +#' @importFrom stringr str_replace +#' @importFrom stringr word +#' +#' @export +pm_country_parse <- function(.data, dictionary){ + + # create bindings for global variables + pm.address = pm.country = NULL + + # check for object and key variables + if (pm_has_uid(.data) == FALSE){ + stop("The variable 'pm.uid' is missing from the given object. Create a postmastr object with pm_identify and pm_prep before proceeding.") + } + + if (pm_has_address(.data) == FALSE){ + stop("The variable 'pm.address' is missing from the given object. Create a postmastr object with pm_prep before proceeding.") + } + + # load dictionary if not specified + if (missing(dictionary) == TRUE){ + dictionary <- pm_dictionary(type = "country") + } + + # load dictionary if NULL + if (is.null(dictionary) == TRUE){ + dictionary <- pm_dictionary(type = "country") + } + + # minimize dictionary + dict <- paste(dictionary$con.input, collapse = "|") + + # parse countries + ## parse + .data <- dplyr::mutate(.data, pm.country = + stringr::str_extract(pm.address, + pattern = stringr::str_c("\\b(", dict, ")\\b$"))) + + ## clean address data + .data %>% + dplyr::mutate(pm.address = ifelse(is.na(pm.country) == FALSE, + stringr::word(pm.address, start = 1, end = -1-stringr::str_count(pm.country, pattern = "\\w+")), + pm.address)) %>% + pm_country_std(var = pm.country, dictionary = dictionary) -> .data + + # re-order data + vars <- pm_reorder(.data) + .data <- dplyr::select(.data, vars) + + # return output + return(.data) + +} + +#' Standardize Parsed Countries +#' +#' @description Convert countries to USPS preferred two-letter abbreviation. +#' +#' @usage pm_country_std(.data, var, dictionary) +#' +#' @param .data A postmastr object created with \link{pm_prep} +#' @param var A character variable that may contain countries +#' @param dictionary Optional; a tbl created with \code{pm_dictionary} to be used +#' as a master list for countries. If none is specified, the full default +#' country dictionary will be used. +#' +#' @importFrom dplyr %>% +#' @importFrom dplyr left_join +#' @importFrom dplyr mutate +#' @importFrom dplyr select +#' @importFrom dplyr rename +#' @importFrom rlang := +#' @importFrom rlang enquo +#' @importFrom rlang quo +#' @importFrom rlang sym +#' +#' @export +pm_country_std <- function(.data, var, dictionary){ + + # global variables + . = con.input = con.output = NULL + + # save parameters to list + paramList <- as.list(match.call()) + + # unquote + if (!is.character(paramList$var)) { + varQ <- rlang::enquo(var) + } else if (is.character(paramList$var)) { + varQ <- rlang::quo(!! rlang::sym(var)) + } + + varQN <- rlang::quo_name(rlang::enquo(var)) + + # load dictionary if not specified + if (missing(dictionary) == TRUE){ + dictionary <- pm_dictionary(type = "country") + } + + # modify dictionary + dictionary %>% + dplyr::rename(!!varQ := con.input) -> dictionary + + # standardize country names + .data %>% + dplyr::left_join(., dictionary, by = varQN) %>% + dplyr::mutate(!!varQ := ifelse(is.na(con.output) == FALSE, con.output, !!varQ)) %>% + dplyr::select(-con.output) -> out + + # return output + return(out) + +} + +#' Trim Country +#' +#' @description Remove a country from an address without parsing. These data +#' should be at the end of the string (i.e. the last word or words). +#' +#' @usage pm_country_trim(.data, dictionary) +#' +#' @param .data A postmastr object created with \link{pm_prep} +#' @param dictionary Optional; a tbl created with \code{pm_dictionary} to be used +#' as a master list for countries. If none is specified, the full default +#' country dictionary will be used. +#' +#' @importFrom dplyr %>% +#' @importFrom dplyr mutate +#' @importFrom dplyr select +#' @importFrom stringr str_c +#' @importFrom stringr str_count +#' @importFrom stringr str_replace +#' @importFrom stringr word +#' +#' @export +pm_country_trim <- function(.data, dictionary){ + + # create bindings for global variables + pm.address = pm.country = NULL + + # check for object and key variables + if (pm_has_uid(.data) == FALSE){ + stop("The variable 'pm.uid' is missing from the given object. Create a postmastr object with pm_identify and pm_prep before proceeding.") + } + + if (pm_has_address(.data) == FALSE){ + stop("The variable 'pm.address' is missing from the given object. Create a postmastr object with pm_prep before proceeding.") + } + + # load dictionary if not specified + if (missing(dictionary) == TRUE){ + dictionary <- pm_dictionary(type = "country") + } + + # load dictionary if NULL + if (is.null(dictionary) == TRUE){ + dictionary <- pm_dictionary(type = "country") + } + + # minimize dictionary + dict <- paste(dictionary$con.input, collapse = "|") + + # parse countries + ## parse + .data <- dplyr::mutate(.data, pm.country = + stringr::str_extract(pm.address, + pattern = stringr::str_c("\\b(", dict, ")\\b$"))) + + ## clean address data + .data %>% + dplyr::mutate(pm.address = ifelse(is.na(pm.country) == FALSE, + stringr::word(pm.address, start = 1, end = -1-stringr::str_count(pm.country, pattern = "\\w+")), + pm.address)) -> .data + + # re-order data + .data <- dplyr::select(.data, -pm.country) + + # return output + return(.data) + +} diff --git a/R/dictionary.R b/R/dictionary.R index 8199089..472b55d 100644 --- a/R/dictionary.R +++ b/R/dictionary.R @@ -21,7 +21,7 @@ #' @usage pm_dictionary(type, append, filter, case = c("title", "lower", "upper"), locale = "us") #' #' @param type A string indicating the grammatical address element the dictionary -#' should represent. Current options are \code{"state"}, \code{"city"}, +#' should represent. Current options are \code{"country"}, \code{"state"}, \code{"city"}, #' \code{"directional"}, and \code{"suffix"}. #' @param append An optional dictionary appendix object created with \code{\link{pm_append}} #' @param filter An optional character scalar or vector with output elements that should @@ -139,6 +139,20 @@ pm_dictionary <- function(type, append, filter, case = c("title", "lower", "uppe out <- pm_case(working, locale = locale, type = type, case = case) + } else if (type == "country"){ + + if (missing(append) == FALSE & missing(filter) == FALSE){ + working <- pm_dictionary_country(append = append, filter = filter) + } else if (missing(append) == FALSE & missing(filter) == TRUE){ + working <- pm_dictionary_country(append = append) + } else if (missing(append) == TRUE & missing(filter) == FALSE){ + working <- pm_dictionary_country(filter = filter) + } else if (missing(append) == TRUE & missing(filter) == TRUE){ + working <- pm_dictionary_country() + } + + out <- pm_case(working, locale = locale, type = type, case = case) + } } @@ -333,6 +347,36 @@ pm_dictionary_us_suffix <- function(append, filter){ } +# country names +pm_dictionary_country <- function(append, filter){ + + # global bindings + con.output = NULL + + # load data + out <- postmastr::dic_country + + # optionally append + if (missing(append) == FALSE){ + + # bind rows + out <- dplyr::bind_rows(out, append) + + # re-order observations + out <- out[order(out$con.output),] + + } + + # optionally filter + if (missing(filter) == FALSE){ + out <- dplyr::filter(out, con.output %in% filter) + } + + # return output + return(out) + +} + # Dictionary Case pm_case <- function(.data, locale, type, case){ @@ -344,6 +388,8 @@ pm_case <- function(.data, locale, type, case){ out <- pm_convert_case(.data, var = "dir.input", orderVar = "dir.output", case = case) } else if (type == "suffix"){ out <- pm_convert_case(.data, var = "suf.input", orderVar = "suf.output", case = case) + } else if (type == "country"){ + out <- pm_convert_case(.data, var = "con.input", orderVar = "con.output", case = case) } } @@ -405,7 +451,7 @@ pm_convert_case <- function(.data, var, orderVar, case){ #' @usage pm_append(type, input, output, locale = "us") #' #' @param type A string indicating the grammatical address element the dictionary -#' should represent. Current options are \code{"state"}, \code{"city"}, +#' should represent. Current options are \code{"country"}, \code{"state"}, \code{"city"}, #' \code{"street"}, \code{"house suffix"}, \code{"directional"}, and \code{"suffix"}. #' @param input A character scalar or vector containing possible terms existing in #' the data. This should be the same length as \code{output}. @@ -533,6 +579,15 @@ pm_append <- function(type, input, output, locale = "us"){ # re-order observations out <- out[order(out$houseSuf.input),] + } else if (type == "country"){ + + out <- dplyr::tibble( + con.output = c(output), + con.input = c(input)) + + # re-order observations + out <- out[order(out$houseSuf.input),] + } } @@ -610,3 +665,22 @@ pm_append <- function(type, input, output, locale = "us"){ #' head(dic_us_suffix) #' "dic_us_suffix" + +#' Country Dictionary +#' +#' @description A list of abbreviations for the United States. +#' +#' @docType data +#' +#' @usage data(dic_country) +#' +#' @format A tibble with 502 rows and 3 variables: +#' \describe{ +#' \item{con.output}{standard output} +#' \item{con.input}{full names abbreviations} +#' } +#' +#' @examples +#' head(dic_country) +#' +"dic_country" diff --git a/R/parse.R b/R/parse.R index 3262f5e..ed3ab70 100644 --- a/R/parse.R +++ b/R/parse.R @@ -2,7 +2,8 @@ #' #' @description A wrapper around the parse functions that can be used to shorten all #' of \code{postmastr}'s core code down to a single function call once dictionaries -#' have been created and tested against the data. +#' have been created and tested against the data. By default, any country indicators +#' like "US" or "USA" will be removed from output addresses. #' #' @usage pm_parse(.data, input, address, output, new_address, ordinal = TRUE, #' unnest = FALSE, include_commas = FALSE, include_unit = TRUE, @@ -205,6 +206,7 @@ pm_parse <- function(.data, input, address, output, new_address, ordinal = TRUE, source %>% pm_prep(var = "address") %>% + pm_country_trim() %>% pm_postal_parse(locale = locale) %>% pm_state_parse(dictionary = state_dict, locale = locale) %>% pm_city_parse(dictionary = city_dict, locale = locale) %>% @@ -224,6 +226,7 @@ pm_parse <- function(.data, input, address, output, new_address, ordinal = TRUE, source %>% pm_prep(var = "address") %>% + pm_country_trim() %>% pm_postal_parse(locale = locale) %>% pm_state_parse(dictionary = state_dict, locale = locale) %>% pm_city_parse(dictionary = city_dict, locale = locale) %>% diff --git a/R/utils.R b/R/utils.R index baff4ed..bb96dcf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -12,7 +12,7 @@ pm_reorder <- function(.data, locale = "us"){ "pm.hasHouseFrac", "pm.houseFrac", "pm.hasAlpha", "pm.hasHouseSuf", "pm.houseSuf", "pm.hasDir", "pm.preDir", "pm.street", "pm.hasStreetSuf", "pm.streetSuf", "pm.sufDir", "pm.hasUnit", "pm.unitType", "pm.unitNum", "pm.hasCity", "pm.city", - "pm.hasState", "pm.state", "pm.hasZip", "pm.zip", "pm.zip4"), + "pm.hasState", "pm.state", "pm.hasZip", "pm.zip", "pm.zip4", "pm.hasCountry", "pm.country"), stringsAsFactors = FALSE ) diff --git a/_pkgdown.yml b/_pkgdown.yml index 55296e3..096635e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -27,6 +27,17 @@ reference: - pm_has_address - pm_has_uid + - title: "Countries" + desc: "Work with Country Names and Abbreviations" + contents: + - pm_country_any + - pm_country_all + - pm_country_detect + - pm_country_none + - pm_country_parse + - pm_country_trim + - pm_country_std + - title: "Postal Codes" desc: "Work with Postal (Zip) Codes" contents: diff --git a/data/dic_country.rda b/data/dic_country.rda new file mode 100644 index 0000000..6d80a5b Binary files /dev/null and b/data/dic_country.rda differ diff --git a/docs/articles/postmastr.html b/docs/articles/postmastr.html index d1c1f3e..97b0dd8 100644 --- a/docs/articles/postmastr.html +++ b/docs/articles/postmastr.html @@ -75,7 +75,7 @@

Address Parsing in R

Christopher Prener, Ph.D.

-

2019-03-12

+

2019-03-21

Source: vignettes/postmastr.Rmd diff --git a/docs/index.html b/docs/index.html index 7a8624a..bfb5e3b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,10 +10,10 @@ - diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 72cdd14..791d87e 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,4 +1,4 @@ -pandoc: 2.3.1 +pandoc: 2.2.3.2 pkgdown: 1.3.0 pkgdown_sha: ~ articles: diff --git a/docs/reference/dic_country.html b/docs/reference/dic_country.html new file mode 100644 index 0000000..ee6c2e1 --- /dev/null +++ b/docs/reference/dic_country.html @@ -0,0 +1,170 @@ + + + + + + + + +Country Dictionary — dic_country • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+ +

A list of abbreviations for the United States.

+ +
+ +
data(dic_country)
+ +

Format

+ +

A tibble with 502 rows and 3 variables:

+
con.output

standard output

+
con.input

full names abbreviations

+
+ + +

Examples

+
head(dic_country)
#> # A tibble: 6 x 2 +#> con.output con.input +#> <chr> <chr> +#> 1 US US +#> 2 US USA +#> 3 US United States +#> 4 US United States of America +#> 5 US The United States of America +#> 6 US America
+
+
+ +
+ + +
+ + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index 94d650d..a7222f5 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -200,6 +200,56 @@

Validate postmastr pm.uid Variable

+ + + +

Countries

+

Work with Country Names and Abbreviations

+ + + + + +

pm_country_any()

+ +

Does Country Dictionary Return Any Matches

+ + + +

pm_country_all()

+ +

Does Country Dictionary Return a Match for All Observations

+ + + +

pm_country_detect()

+ +

Detect Presence of Country

+ + + +

pm_country_none()

+ +

Return Only Unmatched Observations From pm_country_detect

+ + + +

pm_country_parse()

+ +

Parse Country

+ + + +

pm_country_trim()

+ +

Trim Country

+ + + +

pm_country_std()

+ +

Standardize Parsed Countries

+ @@ -670,6 +720,7 @@

Contents

  • Parse Data
  • Dictionaries
  • Prepare Data
  • +
  • Countries
  • Postal Codes
  • States
  • Cities
  • diff --git a/docs/reference/pm_append.html b/docs/reference/pm_append.html index 8a94465..e4c33b2 100644 --- a/docs/reference/pm_append.html +++ b/docs/reference/pm_append.html @@ -142,7 +142,7 @@

    Arg type

    A string indicating the grammatical address element the dictionary -should represent. Current options are "state", "city", +should represent. Current options are "country", "state", "city", "street", "house suffix", "directional", and "suffix".

    diff --git a/docs/reference/pm_country_all.html b/docs/reference/pm_country_all.html new file mode 100644 index 0000000..e86a356 --- /dev/null +++ b/docs/reference/pm_country_all.html @@ -0,0 +1,171 @@ + + + + + + + + +Does Country Dictionary Return a Match for All Observations — pm_country_all • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Determine whether the country dictionary returns matches for all observations.

    + +
    + +
    pm_country_all(.data, dictionary)
    + +

    Arguments

    + + + + + + + + + + +
    .data

    A postmastr object created with pm_prep

    dictionary

    Optional; a tbl created with pm_dictionary to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.

    + +

    Value

    + +

    A logical scalar is returned that is TRUE if the data contains a country + name or abbreviation for every observation in the data set and FALSE otherwise.

    + + +
    + +
    + + +
    + + + + + + diff --git a/docs/reference/pm_country_any.html b/docs/reference/pm_country_any.html new file mode 100644 index 0000000..29afe6d --- /dev/null +++ b/docs/reference/pm_country_any.html @@ -0,0 +1,172 @@ + + + + + + + + +Does Country Dictionary Return Any Matches — pm_country_any • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Determine whether the country dictionary returns any matches.

    + +
    + +
    pm_country_any(.data, dictionary)
    + +

    Arguments

    + + + + + + + + + + +
    .data

    A postmastr object created with pm_prep

    dictionary

    Optional; a tbl created with pm_dictionary to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.

    + +

    Value

    + +

    A logical scalar is returned that is TRUE if the data contains at + least one country name or abbrevation in the given dictionary and FALSE + if they do not.

    + + +
    + +
    + + +
    + + + + + + diff --git a/docs/reference/pm_country_detect.html b/docs/reference/pm_country_detect.html new file mode 100644 index 0000000..a9520a4 --- /dev/null +++ b/docs/reference/pm_country_detect.html @@ -0,0 +1,174 @@ + + + + + + + + +Detect Presence of Country — pm_country_detect • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Determine the presence of country names or abbreviations + at the end of a string.

    + +
    + +
    pm_country_detect(.data, dictionary)
    + +

    Arguments

    + + + + + + + + + + +
    .data

    A postmastr object created with pm_prep

    dictionary

    Optional; a tbl created with pm_dictionary to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.

    + +

    Value

    + +

    A tibble with a new logical variable pm.hasCountry that is + TRUE if a country name or abbreviation from the given dictionary is + found at the end of the address and FALSE otherwise.

    + + +
    + +
    + + +
    + + + + + + diff --git a/docs/reference/pm_country_none.html b/docs/reference/pm_country_none.html new file mode 100644 index 0000000..7bdbee3 --- /dev/null +++ b/docs/reference/pm_country_none.html @@ -0,0 +1,166 @@ + + + + + + + + +Return Only Unmatched Observations From pm_country_detect — pm_country_none • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Automatically subset the results of pm_country_detect to + return only observations that were not found in the dictionary.

    + +
    + +
    pm_country_none(.data, dictionary)
    + +

    Arguments

    + + + + + + + + + + +
    .data

    A postmastr object created with pm_prep

    dictionary

    Optional; a tbl created with pm_dictionary to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.

    + + +
    + +
    + + +
    + + + + + + diff --git a/docs/reference/pm_country_parse.html b/docs/reference/pm_country_parse.html new file mode 100644 index 0000000..25bd9a1 --- /dev/null +++ b/docs/reference/pm_country_parse.html @@ -0,0 +1,166 @@ + + + + + + + + +Parse Country — pm_country_parse • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Parse a country from a string. These data + should be at the end of the string (i.e. the last word or words).

    + +
    + +
    pm_country_parse(.data, dictionary)
    + +

    Arguments

    + + + + + + + + + + +
    .data

    A postmastr object created with pm_prep

    dictionary

    Optional; a tbl created with pm_dictionary to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.

    + + +
    + +
    + + +
    + + + + + + diff --git a/docs/reference/pm_country_std.html b/docs/reference/pm_country_std.html new file mode 100644 index 0000000..1fd10ab --- /dev/null +++ b/docs/reference/pm_country_std.html @@ -0,0 +1,168 @@ + + + + + + + + +Standardize Parsed Countries — pm_country_std • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Convert countries to USPS preferred two-letter abbreviation.

    + +
    + +
    pm_country_std(.data, var, dictionary)
    + +

    Arguments

    + + + + + + + + + + + + + + +
    .data

    A postmastr object created with pm_prep

    var

    A character variable that may contain countries

    dictionary

    Optional; a tbl created with pm_dictionary to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.

    + + +
    + +
    + + +
    + + + + + + diff --git a/docs/reference/pm_country_trim.html b/docs/reference/pm_country_trim.html new file mode 100644 index 0000000..b2126dc --- /dev/null +++ b/docs/reference/pm_country_trim.html @@ -0,0 +1,166 @@ + + + + + + + + +Trim Country — pm_country_trim • postmastr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + +
    + +
    +
    + + +
    + +

    Remove a country from an address without parsing. These data + should be at the end of the string (i.e. the last word or words).

    + +
    + +
    pm_country_trim(.data, dictionary)
    + +

    Arguments

    + + + + + + + + + + +
    .data

    A postmastr object created with pm_prep

    dictionary

    Optional; a tbl created with pm_dictionary to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.

    + + +
    + +
    + + +
    + + + + + + diff --git a/docs/reference/pm_dictionary.html b/docs/reference/pm_dictionary.html index fcf40e4..474c731 100644 --- a/docs/reference/pm_dictionary.html +++ b/docs/reference/pm_dictionary.html @@ -144,7 +144,7 @@

    Arg type

    A string indicating the grammatical address element the dictionary -should represent. Current options are "state", "city", +should represent. Current options are "country", "state", "city", "directional", and "suffix".

    diff --git a/docs/reference/pm_parse.html b/docs/reference/pm_parse.html index f1c05f6..aff4b6b 100644 --- a/docs/reference/pm_parse.html +++ b/docs/reference/pm_parse.html @@ -34,7 +34,8 @@ + have been created and tested against the data. By default, any country indicators + like "US" or "USA" will be removed from output addresses." /> @@ -118,7 +119,8 @@

    Parse Street Addresses

    A wrapper around the parse functions that can be used to shorten all of postmastr's core code down to a single function call once dictionaries - have been created and tested against the data.

    + have been created and tested against the data. By default, any country indicators + like "US" or "USA" will be removed from output addresses.

    diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 3caeaeb..e814900 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,6 +3,9 @@ https://slu-openGIS.github.io/postmastr//index.html + + https://slu-openGIS.github.io/postmastr//reference/dic_country.html + https://slu-openGIS.github.io/postmastr//reference/dic_us_dir.html @@ -33,6 +36,27 @@ https://slu-openGIS.github.io/postmastr//reference/pm_city_std.html + + https://slu-openGIS.github.io/postmastr//reference/pm_country_all.html + + + https://slu-openGIS.github.io/postmastr//reference/pm_country_any.html + + + https://slu-openGIS.github.io/postmastr//reference/pm_country_detect.html + + + https://slu-openGIS.github.io/postmastr//reference/pm_country_none.html + + + https://slu-openGIS.github.io/postmastr//reference/pm_country_parse.html + + + https://slu-openGIS.github.io/postmastr//reference/pm_country_std.html + + + https://slu-openGIS.github.io/postmastr//reference/pm_country_trim.html + https://slu-openGIS.github.io/postmastr//reference/pm_dictionary.html diff --git a/inst/extdata/countryData.R b/inst/extdata/countryData.R new file mode 100644 index 0000000..f500cdc --- /dev/null +++ b/inst/extdata/countryData.R @@ -0,0 +1,10 @@ +# Create Country Dictionary + +dic_country <- dplyr::tibble( + con.output = c("US", "US", "US", "US", "US", "US"), + con.input = c("US", "USA", "United States", "United States of America", "The United States of America", "America") +) + +dic_country <- dic_country[order(dic_country$con.output),] + +usethis::use_data(dic_country, overwrite = TRUE) diff --git a/inst/extdata/testCountry.R b/inst/extdata/testCountry.R new file mode 100644 index 0000000..a99a574 --- /dev/null +++ b/inst/extdata/testCountry.R @@ -0,0 +1,6 @@ +postmastr::sushi1 %>% + filter(name != "Drunken Fish - Ballpark Village") %>% + mutate(address = stringr::str_c(address, "USA", sep = " ")) %>% + pm_identify(var = address) %>% + pm_prep(var = "address") %>% + pm_country_trim() diff --git a/man/dic_country.Rd b/man/dic_country.Rd new file mode 100644 index 0000000..c418669 --- /dev/null +++ b/man/dic_country.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dictionary.R +\docType{data} +\name{dic_country} +\alias{dic_country} +\title{Country Dictionary} +\format{A tibble with 502 rows and 3 variables: +\describe{ + \item{con.output}{standard output} + \item{con.input}{full names abbreviations} +}} +\usage{ +data(dic_country) +} +\description{ +A list of abbreviations for the United States. +} +\examples{ +head(dic_country) + +} +\keyword{datasets} diff --git a/man/pm_append.Rd b/man/pm_append.Rd index 866cd46..e683a4f 100644 --- a/man/pm_append.Rd +++ b/man/pm_append.Rd @@ -8,7 +8,7 @@ pm_append(type, input, output, locale = "us") } \arguments{ \item{type}{A string indicating the grammatical address element the dictionary -should represent. Current options are \code{"state"}, \code{"city"}, +should represent. Current options are \code{"country"}, \code{"state"}, \code{"city"}, \code{"street"}, \code{"house suffix"}, \code{"directional"}, and \code{"suffix"}.} \item{input}{A character scalar or vector containing possible terms existing in diff --git a/man/pm_country_all.Rd b/man/pm_country_all.Rd new file mode 100644 index 0000000..517191f --- /dev/null +++ b/man/pm_country_all.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/country.R +\name{pm_country_all} +\alias{pm_country_all} +\title{Does Country Dictionary Return a Match for All Observations} +\usage{ +pm_country_all(.data, dictionary) +} +\arguments{ +\item{.data}{A postmastr object created with \link{pm_prep}} + +\item{dictionary}{Optional; a tbl created with \code{pm_dictionary} to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.} +} +\value{ +A logical scalar is returned that is \code{TRUE} if the data contains a country + name or abbreviation for every observation in the data set and \code{FALSE} otherwise. +} +\description{ +Determine whether the country dictionary returns matches for all observations. +} diff --git a/man/pm_country_any.Rd b/man/pm_country_any.Rd new file mode 100644 index 0000000..a10446a --- /dev/null +++ b/man/pm_country_any.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/country.R +\name{pm_country_any} +\alias{pm_country_any} +\title{Does Country Dictionary Return Any Matches} +\usage{ +pm_country_any(.data, dictionary) +} +\arguments{ +\item{.data}{A postmastr object created with \link{pm_prep}} + +\item{dictionary}{Optional; a tbl created with \code{pm_dictionary} to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.} +} +\value{ +A logical scalar is returned that is \code{TRUE} if the data contains at + least one country name or abbrevation in the given dictionary and \code{FALSE} + if they do not. +} +\description{ +Determine whether the country dictionary returns any matches. +} diff --git a/man/pm_country_detect.Rd b/man/pm_country_detect.Rd new file mode 100644 index 0000000..6e7e60f --- /dev/null +++ b/man/pm_country_detect.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/country.R +\name{pm_country_detect} +\alias{pm_country_detect} +\title{Detect Presence of Country} +\usage{ +pm_country_detect(.data, dictionary) +} +\arguments{ +\item{.data}{A postmastr object created with \link{pm_prep}} + +\item{dictionary}{Optional; a tbl created with \code{pm_dictionary} to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.} +} +\value{ +A tibble with a new logical variable \code{pm.hasCountry} that is + \code{TRUE} if a country name or abbreviation from the given dictionary is + found at the end of the address and \code{FALSE} otherwise. +} +\description{ +Determine the presence of country names or abbreviations + at the end of a string. +} diff --git a/man/pm_country_none.Rd b/man/pm_country_none.Rd new file mode 100644 index 0000000..3be1e36 --- /dev/null +++ b/man/pm_country_none.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/country.R +\name{pm_country_none} +\alias{pm_country_none} +\title{Return Only Unmatched Observations From pm_country_detect} +\usage{ +pm_country_none(.data, dictionary) +} +\arguments{ +\item{.data}{A postmastr object created with \link{pm_prep}} + +\item{dictionary}{Optional; a tbl created with \code{pm_dictionary} to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.} +} +\description{ +Automatically subset the results of \link{pm_country_detect} to + return only observations that were not found in the dictionary. +} diff --git a/man/pm_country_parse.Rd b/man/pm_country_parse.Rd new file mode 100644 index 0000000..b9ab341 --- /dev/null +++ b/man/pm_country_parse.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/country.R +\name{pm_country_parse} +\alias{pm_country_parse} +\title{Parse Country} +\usage{ +pm_country_parse(.data, dictionary) +} +\arguments{ +\item{.data}{A postmastr object created with \link{pm_prep}} + +\item{dictionary}{Optional; a tbl created with \code{pm_dictionary} to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.} +} +\description{ +Parse a country from a string. These data + should be at the end of the string (i.e. the last word or words). +} diff --git a/man/pm_country_std.Rd b/man/pm_country_std.Rd new file mode 100644 index 0000000..a1314e4 --- /dev/null +++ b/man/pm_country_std.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/country.R +\name{pm_country_std} +\alias{pm_country_std} +\title{Standardize Parsed Countries} +\usage{ +pm_country_std(.data, var, dictionary) +} +\arguments{ +\item{.data}{A postmastr object created with \link{pm_prep}} + +\item{var}{A character variable that may contain countries} + +\item{dictionary}{Optional; a tbl created with \code{pm_dictionary} to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.} +} +\description{ +Convert countries to USPS preferred two-letter abbreviation. +} diff --git a/man/pm_country_trim.Rd b/man/pm_country_trim.Rd new file mode 100644 index 0000000..59bcee0 --- /dev/null +++ b/man/pm_country_trim.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/country.R +\name{pm_country_trim} +\alias{pm_country_trim} +\title{Trim Country} +\usage{ +pm_country_trim(.data, dictionary) +} +\arguments{ +\item{.data}{A postmastr object created with \link{pm_prep}} + +\item{dictionary}{Optional; a tbl created with \code{pm_dictionary} to be used +as a master list for countries. If none is specified, the full default +country dictionary will be used.} +} +\description{ +Remove a country from an address without parsing. These data + should be at the end of the string (i.e. the last word or words). +} diff --git a/man/pm_dictionary.Rd b/man/pm_dictionary.Rd index 7ae210f..c33ff34 100644 --- a/man/pm_dictionary.Rd +++ b/man/pm_dictionary.Rd @@ -8,7 +8,7 @@ pm_dictionary(type, append, filter, case = c("title", "lower", "upper"), locale } \arguments{ \item{type}{A string indicating the grammatical address element the dictionary -should represent. Current options are \code{"state"}, \code{"city"}, +should represent. Current options are \code{"country"}, \code{"state"}, \code{"city"}, \code{"directional"}, and \code{"suffix"}.} \item{append}{An optional dictionary appendix object created with \code{\link{pm_append}}} diff --git a/man/pm_parse.Rd b/man/pm_parse.Rd index 2854b1d..16ca214 100644 --- a/man/pm_parse.Rd +++ b/man/pm_parse.Rd @@ -90,7 +90,8 @@ An updated version of the source data with, at a minimum, a new variable contain \description{ A wrapper around the parse functions that can be used to shorten all of \code{postmastr}'s core code down to a single function call once dictionaries - have been created and tested against the data. + have been created and tested against the data. By default, any country indicators + like "US" or "USA" will be removed from output addresses. } \examples{ # construct dictionaries