mlverse
diff --git a/‎DESCRIPTION‎
Lines changed: 2 additions & 0 deletions b/‎DESCRIPTION‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions b/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/dataset-caltech.R‎
Lines changed: 2 additions & 2 deletions b/‎R/dataset-caltech.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/dataset-coco.R‎
Lines changed: 4 additions & 4 deletions b/‎R/dataset-coco.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/dataset-eurosat.R‎
Lines changed: 3 additions & 5 deletions b/‎R/dataset-eurosat.R‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎R/dataset-fer.R‎
Lines changed: 28 additions & 12 deletions b/‎R/dataset-fer.R‎
Lines changed: 28 additions & 12 deletions
diff --git a/‎R/dataset-fgvc.R‎
Lines changed: 4 additions & 4 deletions b/‎R/dataset-fgvc.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/dataset-flickr.R‎
Lines changed: 7 additions & 7 deletions b/‎R/dataset-flickr.R‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎R/dataset-lfw.R‎
Lines changed: 2 additions & 2 deletions b/‎R/dataset-lfw.R‎
Lines changed: 2 additions & 2 deletions
@@ -48,7 +48,9 @@ Imports:
     glue,
     zeallot
 Suggests:
+    arrow,
     magick,
+    prettyunits,
     testthat,
     coro,
     R.matlab,
 
@@ -188,6 +188,8 @@ export(transform_ten_crop)
 export(transform_to_tensor)
 export(transform_vflip)
 export(vision_make_grid)
+export(whoi_plankton_dataset)
+export(whoi_small_plankton_dataset)
 importFrom(grDevices,dev.off)
 importFrom(graphics,polygon)
 importFrom(jsonlite,fromJSON)
 
@@ -5,6 +5,7 @@
 * Added `lfw_people_dataset()` and `lfw_pairs_dataset()` for loading Labelled Faces in the Wild (LFW) datasets (@DerrickUnleashed, #203).
 * Added `places365_dataset()`for loading the Places365 dataset (@koshtiakanksha, #196).
 * Added `pascal_segmentation_dataset()`, and `pascal_detection_dataset()` for loading the Pascal Visual Object Classes datasets (@DerrickUnleashed, #209).
+* Added `whoi_plankton_dataset()`, and `whoi_small_plankton_dataset()` (@cregouby, #236).
 
 ## New models
 
 
@@ -81,7 +81,7 @@ caltech101_dataset <- torch::dataset(
       self$image_indices <- c(self$image_indices, seq_along(imgs))
     }
 
-    cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$img_path)} images across {length(self$classes)} classes.")
+    cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across {length(self$classes)} classes.")
   },
 
   .getitem = function(index) {
@@ -205,7 +205,7 @@ caltech256_dataset <- torch::dataset(
     }, seq_along(self$classes), images_per_class, SIMPLIFY = FALSE),
     use.names = FALSE
   )
-  cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$img_path)} images across {length(self$classes)} classes.")
+  cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across {length(self$classes)} classes.")
   },
 
   check_exists = function() {
 
@@ -64,7 +64,7 @@ coco_detection_dataset <- torch::dataset(
   ) {
 
     year <- match.arg(year)
-    split <- if (train) "train" else "val"
+    split <- ifelse(train, "train", "val")
 
     root <- fs::path_expand(root)
     self$root <- root
@@ -76,7 +76,7 @@ coco_detection_dataset <- torch::dataset(
 
     self$data_dir <- fs::path(root, glue::glue("coco{year}"))
 
-    image_year <- if (year == "2016") "2014" else year
+    image_year <- ifelse(year == "2016", "2014", year)
     self$image_dir <- fs::path(self$data_dir, glue::glue("{split}{image_year}"))
     self$annotation_file <- fs::path(self$data_dir, "annotations",
                                      glue::glue("instances_{split}{year}.json"))
@@ -288,7 +288,7 @@ coco_caption_dataset <- torch::dataset(
   ) {
 
     year <- match.arg(year)
-    split <- if (train) "train" else "val"
+    split <- ifelse(train, "train", "val")
 
     root <- fs::path_expand(root)
     self$root <- root
@@ -329,7 +329,7 @@ coco_caption_dataset <- torch::dataset(
     image_id <- ann$image_id
     y <- ann$caption
 
-    prefix <- if (self$split == "train") "COCO_train2014_" else "COCO_val2014_"
+    prefix <- ifelse(self$split == "train", "COCO_train2014_", "COCO_val2014_")
     filename <- paste0(prefix, sprintf("%012d", image_id), ".jpg")
     image_path <- fs::path(self$image_dir, filename)
 
 
@@ -12,7 +12,7 @@
 #' @inheritParams mnist_dataset
 #' @param root (Optional) Character. The root directory where the dataset will be stored.
 #'  if empty, will use the default `rappdirs::user_cache_dir("torch")`.
-#' @param split Character. Must be one of `train`, `val`, or `test`.
+#' @param split One of `"train"`, `"val"`, or `"test"`. Default is `"val"`.
 #'
 #' @return A `torch::dataset` object. Each item is a list with:
 #' * `x`: a 64x64 image tensor with 3 (RGB) or 13 (all bands) channels
@@ -39,7 +39,7 @@ eurosat_dataset <- torch::dataset(
 
   initialize = function(
     root = tempdir(),
-    split = "train",
+    split = "val",
     download = FALSE,
     transform = NULL,
     target_transform = NULL
@@ -53,7 +53,7 @@ eurosat_dataset <- torch::dataset(
     self$images_dir <- file.path(self$root, class(self)[1], "images")
     self$split_file <- file.path(self$root, fs::path_ext_remove(basename(self$split_url)))
 
-    if (download){
+    if (download) {
       cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
       self$download()
     }
@@ -184,5 +184,3 @@ eurosat100_dataset <- torch::dataset(
   split_url = "https://huggingface.co/datasets/torchgeo/eurosat/resolve/main/eurosat-100-{split}.txt?download=true",
   archive_size = "7 MB"
 )
-
-
@@ -31,6 +31,17 @@
 fer_dataset <- dataset(
   name = "fer_dataset",
   archive_size = "90 MB",
+  url = "https://huggingface.co/datasets/JimmyUnleashed/FER-2013/resolve/main/fer2013.tar.gz",
+  md5 = "ca95d94fe42f6ce65aaae694d18c628a",
+  classes = c(
+    "Angry",
+    "Disgust",
+    "Fear",
+    "Happy",
+    "Sad",
+    "Surprise",
+    "Neutral"
+  ),
 
   initialize = function(
     root = tempdir(),
@@ -39,25 +50,25 @@ fer_dataset <- dataset(
     target_transform = NULL,
     download = FALSE
   ) {
-
     self$root <- root
     self$train <- train
     self$transform <- transform
     self$target_transform <- target_transform
-    self$split <- if (train) "Train" else "Test"
+    self$split <- ifelse(train, "Train", "Test")
     self$folder_name <- "fer2013"
-    self$url <- "https://huggingface.co/datasets/JimmyUnleashed/FER-2013/resolve/main/fer2013.tar.gz"
-    self$md5 <- "ca95d94fe42f6ce65aaae694d18c628a"
-    self$classes <- c("Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral")
     self$class_to_idx <- setNames(seq_along(self$classes), self$classes)
 
-    if (download){
-      cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
+    if (download) {
+      cli_inform(
+        "Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available."
+      )
       self$download()
     }
 
     if (!self$check_files()) {
-      runtime_error("Dataset not found. You can use `download = TRUE` to download it.")
+      runtime_error(
+        "Dataset not found. You can use `download = TRUE` to download it."
+      )
     }
 
     csv_file <- file.path(self$root, self$folder_name, "fer2013.csv")
@@ -87,11 +98,13 @@ fer_dataset <- dataset(
 
     y <- self$y[i]
 
-    if (!is.null(self$transform))
+    if (!is.null(self$transform)) {
       x <- self$transform(x)
+    }
 
-    if (!is.null(self$target_transform))
+    if (!is.null(self$target_transform)) {
       y <- self$target_transform(y)
+    }
 
     list(x = x, y = y)
   },
@@ -112,11 +125,14 @@ fer_dataset <- dataset(
 
     archive <- download_and_cache(self$url)
 
-    if (!tools::md5sum(archive) == self$md5)
+    if (!tools::md5sum(archive) == self$md5) {
       runtime_error("Corrupt file! Delete the file in {archive} and try again.")
+    }
 
     untar(archive, exdir = self$root)
-    cli_inform("Dataset {.cls {class(self)[[1]]}} downloaded and extracted successfully.")
+    cli_inform(
+      "Dataset {.cls {class(self)[[1]]}} downloaded and extracted successfully."
+    )
   },
 
   check_files = function() {
 
@@ -78,7 +78,7 @@ fgvc_aircraft_dataset <- dataset(
     target_transform = NULL,
     download = FALSE
   ) {
-    
+
     self$root <- root
     self$split <- split
     self$annotation_level <- annotation_level
@@ -132,10 +132,10 @@ fgvc_aircraft_dataset <- dataset(
   .getitem = function(index) {
     x <- jpeg::readJPEG(self$image_paths[index]) * 255
 
-    y <- if (self$annotation_level == "all") {
-      as.integer(self$labels_df[index, ])
+    if (self$annotation_level == "all") {
+      y <- as.integer(self$labels_df[index, ])
     } else {
-      self$labels_df[[self$annotation_level]][index]
+      y <- self$labels_df[[self$annotation_level]][index]
     }
 
     if (!is.null(self$transform)) {
 
@@ -63,8 +63,8 @@ flickr8k_caption_dataset <- torch::dataset(
     self$transform <- transform
     self$target_transform <- target_transform
     self$train <- train
-    self$split <- if (train) "train" else "test"
-    
+    self$split <- ifelse(train, "train", "test")
+
     if (download)
       cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
       self$download()
@@ -130,7 +130,7 @@ flickr8k_caption_dataset <- torch::dataset(
 
   download = function() {
 
-    if (self$check_exists()) 
+    if (self$check_exists())
       return()
 
     cli_inform("Downloading {.cls {class(self)[[1]]}}...")
@@ -173,10 +173,10 @@ flickr8k_caption_dataset <- torch::dataset(
     caption_index <- self$captions[[index]]
     y <- self$classes[[caption_index]]
 
-    if (!is.null(self$transform)) 
+    if (!is.null(self$transform))
       x <- self$transform(x)
 
-    if (!is.null(self$target_transform)) 
+    if (!is.null(self$target_transform))
       y <- self$target_transform(y)
 
     list(x = x, y = y)
@@ -225,13 +225,13 @@ flickr30k_caption_dataset <- torch::dataset(
     self$transform <- transform
     self$target_transform <- target_transform
     self$train <- train
-    self$split <- if (train) "train" else "test"
+    self$split <- ifelse(train, "train", "test")
 
     if (download)
       cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
       self$download()
 
-    if (!self$check_exists()) 
+    if (!self$check_exists())
       cli_abort("Dataset not found. Use `download = TRUE` to download it.")
 
     captions_path <- file.path(self$raw_folder, "dataset_flickr30k.json")
 
@@ -132,7 +132,7 @@ lfw_people_dataset <- torch::dataset(
     self$classes <- class_names
     self$class_to_idx <- class_to_idx
 
-    cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$img_path)} images across {length(self$classes)} classes.")
+    cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across {length(self$classes)} classes.")
   },
 
   download = function() {
@@ -283,7 +283,7 @@ lfw_pairs_dataset <- torch::dataset(
     self$pairs <- do.call(rbind, pair_list)
     self$img_path <- c(self$pairs$img1, self$pairs$img2)
 
-    cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$img_path)} images across {length(self$classes)} classes.")
+    cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across {length(self$classes)} classes.")
   },
 
   .getitem = function(index) {
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ caltech101_dataset <- torch::dataset(`
`81`	`81`	`self$image_indices <- c(self$image_indices, seq_along(imgs))`
`82`	`82`	`}`
`83`	`83`
`84`		`- cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$img_path)} images across {length(self$classes)} classes.")`
	`84`	`+ cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across {length(self$classes)} classes.")`
`85`	`85`	`},`
`86`	`86`
`87`	`87`	`.getitem = function(index) {`
`@@ -205,7 +205,7 @@ caltech256_dataset <- torch::dataset(`
`205`	`205`	`}, seq_along(self$classes), images_per_class, SIMPLIFY = FALSE),`
`206`	`206`	`use.names = FALSE`
`207`	`207`	`)`
`208`		`- cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$img_path)} images across {length(self$classes)} classes.")`
	`208`	`+ cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across {length(self$classes)} classes.")`
`209`	`209`	`},`
`210`	`210`
`211`	`211`	`check_exists = function() {`