Skip to content

Commit

Permalink
Added test for grouped and other extended data structures.
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyuchen committed Feb 26, 2017
1 parent 03ef09a commit deb86a0
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 4 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: extdplyr
Type: Package
Title: Data Manipulation Extension Based on 'Dplyr' and 'Tidyr'
Version: 0.1.3
Title: Data Manipulation Extensions of 'Dplyr' and 'Tidyr'
Version: 0.1.3.9000
Authors@R: person("Yuchen", "Wang", email = "[email protected]",
role = c("aut", "cre"))
Description: If 'dplyr' is a grammar for data manipulation, 'extdplyr' is like
Expand All @@ -17,3 +17,4 @@ Imports:
tidyr,
lazyeval
RoxygenNote: 6.0.1
Suggests: testthat
10 changes: 8 additions & 2 deletions R/grp_routine.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,
# check if it's indicator. Indicators should be integer 0 or 1.
# According to coercion rule, logical - integer - double - character,
# Here convert to logical first for safety.

int_df <- data[from]
int_df[] <- lapply(int_df, function(x) as.integer(as.logical(x)))

Expand All @@ -94,9 +95,14 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,

if (ret_factor) char_vec <- as.factor(char_vec)

ret <- dplyr::mutate_(data, .dots = named_expr(col, ~ char_vec))
first_col <- which(names(data) %in% from)[1]
ret <- append_col(data, char_vec, col, first_col - 1)

# Give back groups
if (dplyr::is.grouped_df(data))
ret <- dplyr::group_by_(ret, .dots = dplyr::groups(data))

if (remove) ret <- dplyr::select(ret, -dplyr::one_of(from))
if (remove) ret <- ret[setdiff(names(ret), from)]

ret
}
Expand Down
16 changes: 16 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,19 @@ check_se_column <- function(col_name) {
common_params <- function(data, col, .dots) {
NULL
}


# Adapted from tidyr
append_df <- function (x, values, after = length(x)) {
y <- append(x, values, after = after)
class(y) <- class(x)
attr(y, "row.names") <- attr(x, "row.names")
y
}

append_col <- function (x, col, name, after = length(x)) {
name <- enc2utf8(name)
append_df(x, named_expr(name, col), after = after)
}


18 changes: 18 additions & 0 deletions examples/ind_to_char_ex.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# Supports converting the following atomic types to indicator

df <- data.frame(integer_ind = c(2L, 0L, 0L, 0L, 0L, 0L),
# non-zero integer is 1, otherwise 0.
logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
# TRUE is 1.
double_ind = c(0, 0, 2.0, 0, 0, 0),
# non-zero double is 1.
char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
# "T" and "TRUE" converts to 1.
factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
labels = c(TRUE, FALSE)),
# Factors are converted based on levels.
stringsAsFactors = FALSE)

ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)


# ind_to_char as complement to use model.matrix on a factor
df <- data.frame(x = 1:6, y = factor(c(letters[1:5], NA)))
ind_df <- as.data.frame(model.matrix(~ x + y - 1,
Expand Down
18 changes: 18 additions & 0 deletions man/ind_to_char.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(extdplyr)

test_check("extdplyr")
113 changes: 113 additions & 0 deletions tests/testthat/test_grp_routine.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
library(extdplyr)
context("ind_to_char_ and grp_routine_")

test_that("ind_to_char_ works with regular data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- as.data.frame(model.matrix(~ x + y - 1, df))

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(ncol(df_ret), 2)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)
expect_equal(ncol(df_ret2), 7)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})


test_that("ind_to_char_ works with non-integer indicators", {
df <- data.frame(integer_ind = c(1L, 0L, 0L, 0L, 0L, 0L),
logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
double_ind = c(0, 0, 2.0, 0, 0, 0),
char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
labels = c(TRUE, FALSE)),
stringsAsFactors = FALSE)

# Using SE
df_ret <- ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)

expect_equal(ncol(df_ret), 6)
expect_equal(which(names(df_ret) == "new_y"), 1)
expect_equal(df_ret[['new_y']],
c("integer_ind", "logcal_ind", "double_ind", "char_ind",
"char_ind", "factor_ind"))

})



library(dplyr)
test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- as_data_frame(model.matrix(~ x + y - 1, df))

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(class(ind_df), class(df_ret))
expect_equal(ncol(df_ret), 2)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)

expect_equal(class(ind_df), class(df_ret2))
expect_equal(ncol(df_ret2), 7)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})


test_that("ind_to_char_ works with grouped_df, tbl_df, tbl, data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- as_data_frame(model.matrix(~ x + y - 1, df)) %>%
group_by(z = x > 3)

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(class(ind_df), class(df_ret))
expect_equal(ncol(df_ret), 3)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)

expect_equal(class(ind_df), class(df_ret2))
expect_equal(ncol(df_ret2), 8)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})


library(data.table)
test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- data.table(model.matrix(~ x + y - 1, df))

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(class(ind_df), class(df_ret))
expect_equal(ncol(df_ret), 2)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)

expect_equal(class(ind_df), class(df_ret2))
expect_equal(ncol(df_ret2), 7)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})


0 comments on commit deb86a0

Please sign in to comment.