Skip to content

Commit

Permalink
more data sets
Browse files Browse the repository at this point in the history
  • Loading branch information
dicook committed Dec 5, 2024
1 parent 89c5f25 commit b9c45b6
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 2 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mulgar
Title: Functions for Pre-Processing Data for Multivariate Data Visualisation using Tours
Version: 1.0.2
Version: 1.0.3
Authors@R: c(
person("Dianne", "Cook",
role = c("aut", "cre"),
Expand Down Expand Up @@ -36,6 +36,6 @@ Encoding: UTF-8
LazyData: true
LazyDataCompression: bzip2
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
URL: https://dicook.github.io/mulgar/, https://github.com/dicook/mulgar
BugReports: https://github.com/dicook/mulgar/issues
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# mulgar 1.0.3

* additional data sets

# mulgar 1.0.2

* Fix with is_tibble
Expand Down
19 changes: 19 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,22 @@ NULL
#' ggplot(c1, aes(x=x1, y=x2)) +
#' geom_point() + theme(aspect.ratio=1)
NULL

#' Data sets with anomalies
#'
#' Simulated data with anomalies
#'
#' \describe{
#' \item{x1, x2, x3, x4}{numeric variables}
#' }
#'
#' @name anomaly1
#' @aliases anomaly2, anomaly3, anomaly4, anomaly5
#' @docType data
#' @format A datasets with anomalies
#' @keywords datasets
#' @examples
#' require(GGally)
#' ggscatmat(anomaly1)
#' ggscatmat(anomaly2)
NULL
69 changes: 69 additions & 0 deletions data-raw/simulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,72 @@ ggplot(as.data.frame(df_tsne$Y), aes(x=V1, y=V2)) +

clusters_nonlin <- df
save(clusters_nonlin, file="data/clusters_nonlin")

# Hiding anomalies
set.seed(946)
d <- tibble(x1=runif(200, -1, 1),
x2=runif(200, -1, 1),
x3=runif(200, -1, 1))
d <- d %>%
mutate(x4 = x3 + runif(200, -0.1, 0.1))
d <- bind_rows(d, c(x1=0, x2=0, x3=-0.5, x4=0.5))

d_r <- d %>%
mutate(x1 = cos(pi/6)*x1 + sin(pi/6)*x3,
x3 = -sin(pi/6)*x1 + cos(pi/6)*x3,
x2 = cos(pi/6)*x2 + sin(pi/6)*x4,
x4 = -sin(pi/6)*x2 + cos(pi/6)*x4)

# Check data
library(GGally)
ggscatmat(d)
ggscatmat(d_r)
animate_xy(d)
animate_xy(d_r)

# Save
anomaly1 <- d
anomaly2 <- d_r
save(anomaly1, file="data/anomaly1.rda")
save(anomaly2, file="data/anomaly2.rda")

# 4D sphere with point in middle
library(geozoo)

set.seed(626)
d <- sphere.hollow(p = 4, n = 96)$points |>
as_tibble() |>
rename(x1 = V1, x2 = V2, x3 = V3, x4 = V4)
d <- d |>
bind_rows(c(x1 = 0, x2 = 0, x3 = 0, x4 = 0))
animate_xy(d, axes="off")

# Save
anomaly3 <- d[sample(1:97),]
save(anomaly3, file="data/anomaly3.rda")

# 2D parabola with point
set.seed(946)
d <- tibble(x1=runif(200, -1, 1),
x2=runif(200, -1, 1),
x3=runif(200, -1, 1))
d <- d %>%
mutate(x4 = (x3^2-0.5)*2 + runif(200, -0.1, 0.1))
d <- bind_rows(d, c(x1=0, x2=0, x3=0, x4=0.7))

d_r <- d %>%
mutate(x1 = cos(pi/6)*x1 + sin(pi/6)*x3,
x3 = -sin(pi/6)*x1 + cos(pi/6)*x3,
x2 = cos(pi/6)*x2 + sin(pi/6)*x4,
x4 = -sin(pi/6)*x2 + cos(pi/6)*x4)
ggscatmat(d)
ggscatmat(d_r)
animate_xy(d_r, axes="off")

anomaly4 <- d[sample(1:nrow(d)),]
anomaly5 <- d_r[sample(1:nrow(d_r)),]
save(anomaly4, file="data/anomaly4.rda")
save(anomaly5, file="data/anomaly5.rda")


# Simulating different association
Binary file added data/anomaly1.rda
Binary file not shown.
Binary file added data/anomaly2.rda
Binary file not shown.
Binary file added data/anomaly3.rda
Binary file not shown.
Binary file added data/anomaly4.rda
Binary file not shown.
Binary file added data/anomaly5.rda
Binary file not shown.
27 changes: 27 additions & 0 deletions man/anomaly1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b9c45b6

Please sign in to comment.