Added test for grouped and other extended data structures.

wangyuchen · Feb 26, 2017 · deb86a0 · deb86a0
1 parent 03ef09a
commit deb86a0
Show file tree

Hide file tree

Showing 7 changed files with 180 additions and 4 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: extdplyr
 Type: Package
-Title: Data Manipulation Extension Based on 'Dplyr' and 'Tidyr'
-Version: 0.1.3
+Title: Data Manipulation Extensions of 'Dplyr' and 'Tidyr'
+Version: 0.1.3.9000
 Authors@R: person("Yuchen", "Wang", email = "[email protected]",
     role = c("aut", "cre"))
 Description: If 'dplyr' is a grammar for data manipulation, 'extdplyr' is like
@@ -17,3 +17,4 @@ Imports:
     tidyr,
     lazyeval
 RoxygenNote: 6.0.1
+Suggests: testthat
diff --git a/R/grp_routine.R b/R/grp_routine.R
@@ -70,6 +70,7 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,
   # check if it's indicator. Indicators should be integer 0 or 1.
   # According to coercion rule, logical - integer - double - character,
   # Here convert to logical first for safety.
+
   int_df <- data[from]
   int_df[] <- lapply(int_df, function(x) as.integer(as.logical(x)))
 
@@ -94,9 +95,14 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,
 
   if (ret_factor) char_vec <- as.factor(char_vec)
 
-  ret <- dplyr::mutate_(data, .dots = named_expr(col, ~ char_vec))
+  first_col <- which(names(data) %in% from)[1]
+  ret <- append_col(data, char_vec, col, first_col - 1)
+
+  # Give back groups
+  if (dplyr::is.grouped_df(data))
+    ret <- dplyr::group_by_(ret, .dots = dplyr::groups(data))
 
-  if (remove) ret <- dplyr::select(ret, -dplyr::one_of(from))
+  if (remove) ret <- ret[setdiff(names(ret), from)]
 
   ret
 }

diff --git a/R/utilities.R b/R/utilities.R
@@ -25,3 +25,19 @@ check_se_column <- function(col_name) {
 common_params <- function(data, col, .dots) {
   NULL
 }
+
+
+# Adapted from tidyr
+append_df <- function (x, values, after = length(x)) {
+  y <- append(x, values, after = after)
+  class(y) <- class(x)
+  attr(y, "row.names") <- attr(x, "row.names")
+  y
+}
+
+append_col <- function (x, col, name, after = length(x)) {
+  name <- enc2utf8(name)
+  append_df(x, named_expr(name, col), after = after)
+}
+
+
diff --git a/examples/ind_to_char_ex.R b/examples/ind_to_char_ex.R
@@ -1,3 +1,21 @@
+# Supports converting the following atomic types to indicator
+
+df <- data.frame(integer_ind = c(2L, 0L, 0L, 0L, 0L, 0L),
+                 # non-zero integer is 1, otherwise 0.
+                 logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
+                 # TRUE is 1.
+                 double_ind = c(0, 0, 2.0, 0, 0, 0),
+                 # non-zero double is 1.
+                 char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
+                 # "T" and "TRUE" converts to 1.
+                 factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
+                                     labels = c(TRUE, FALSE)),
+                 # Factors are converted based on levels.
+                 stringsAsFactors = FALSE)
+
+ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)
+
+
 # ind_to_char as complement to use model.matrix on a factor
 df <- data.frame(x = 1:6, y = factor(c(letters[1:5], NA)))
 ind_df <- as.data.frame(model.matrix(~ x + y - 1,

diff --git a/man/ind_to_char.Rd b/man/ind_to_char.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(extdplyr)
+
+test_check("extdplyr")
diff --git a/tests/testthat/test_grp_routine.R b/tests/testthat/test_grp_routine.R
@@ -0,0 +1,113 @@
+library(extdplyr)
+context("ind_to_char_ and grp_routine_")
+
+test_that("ind_to_char_ works with regular data.frame", {
+  df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+  ind_df <- as.data.frame(model.matrix(~ x + y - 1, df))
+
+  # Using SE
+  df_ret <- ind_to_char_(ind_df, col = "new_y",
+                         from = c("ya", "yb", "yc", "yd", "ye"))
+
+  expect_equal(ncol(df_ret), 2)
+  expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+  df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                          from = c("ya", "yb", "yc", "yd", "ye"),
+                          remove = FALSE)
+  expect_equal(ncol(df_ret2), 7)
+  expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+})
+
+
+test_that("ind_to_char_ works with non-integer indicators", {
+  df <- data.frame(integer_ind = c(1L, 0L, 0L, 0L, 0L, 0L),
+                   logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
+                   double_ind = c(0, 0, 2.0, 0, 0, 0),
+                   char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
+                   factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
+                                       labels = c(TRUE, FALSE)),
+                   stringsAsFactors = FALSE)
+
+  # Using SE
+  df_ret <- ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)
+
+  expect_equal(ncol(df_ret), 6)
+  expect_equal(which(names(df_ret) == "new_y"), 1)
+  expect_equal(df_ret[['new_y']],
+               c("integer_ind", "logcal_ind", "double_ind", "char_ind",
+                 "char_ind", "factor_ind"))
+
+})
+
+
+
+library(dplyr)
+test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
+  df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+  ind_df <- as_data_frame(model.matrix(~ x + y - 1, df))
+
+  # Using SE
+  df_ret <- ind_to_char_(ind_df, col = "new_y",
+                         from = c("ya", "yb", "yc", "yd", "ye"))
+
+  expect_equal(class(ind_df), class(df_ret))
+  expect_equal(ncol(df_ret), 2)
+  expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+  df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                          from = c("ya", "yb", "yc", "yd", "ye"),
+                          remove = FALSE)
+
+  expect_equal(class(ind_df), class(df_ret2))
+  expect_equal(ncol(df_ret2), 7)
+  expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+})
+
+
+test_that("ind_to_char_ works with grouped_df, tbl_df, tbl, data.frame", {
+  df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+  ind_df <- as_data_frame(model.matrix(~ x + y - 1, df)) %>%
+    group_by(z = x > 3)
+
+  # Using SE
+  df_ret <- ind_to_char_(ind_df, col = "new_y",
+                         from = c("ya", "yb", "yc", "yd", "ye"))
+
+  expect_equal(class(ind_df), class(df_ret))
+  expect_equal(ncol(df_ret), 3)
+  expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+  df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                          from = c("ya", "yb", "yc", "yd", "ye"),
+                          remove = FALSE)
+
+  expect_equal(class(ind_df), class(df_ret2))
+  expect_equal(ncol(df_ret2), 8)
+  expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+})
+
+
+library(data.table)
+test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
+  df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+  ind_df <- data.table(model.matrix(~ x + y - 1, df))
+
+  # Using SE
+  df_ret <- ind_to_char_(ind_df, col = "new_y",
+                         from = c("ya", "yb", "yc", "yd", "ye"))
+
+  expect_equal(class(ind_df), class(df_ret))
+  expect_equal(ncol(df_ret), 2)
+  expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+  df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                          from = c("ya", "yb", "yc", "yd", "ye"),
+                          remove = FALSE)
+
+  expect_equal(class(ind_df), class(df_ret2))
+  expect_equal(ncol(df_ret2), 7)
+  expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+})
+
+