diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index 7d3a854cf..73cbece60 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -62,8 +62,9 @@ use harp::exec::RFunction; use harp::exec::RFunctionExt; use harp::object::RObject; use harp::r_symbol; +use harp::table_kind; use harp::tbl_get_column; -use harp::TableInfo; +use harp::ColumnNames; use harp::TableKind; use itertools::Itertools; use libr::*; @@ -562,22 +563,27 @@ impl RDataExplorer { fn r_get_shape(table: RObject) -> anyhow::Result { unsafe { let table = table.clone(); - let object = *table; - let info = table_info_or_bail(object)?; + let Some(kind) = table_kind(table.sexp) else { + return Err(anyhow!("Unsupported type for the data viewer")); + }; - let harp::TableInfo { - kind, - dims: - harp::TableDim { - num_rows, - num_cols: total_num_columns, - }, - col_names: column_names, - } = info; + // `DataFrame::n_row()` will materialize duckplyr compact row names, but we + // are ok with that for the data explorer and don't provide a hook to opt out. + let (n_row, n_col, column_names) = match kind { + TableKind::Dataframe => ( + harp::DataFrame::n_row(table.sexp)?, + harp::DataFrame::n_col(table.sexp)?, + ColumnNames::from_data_frame(table.sexp)?, + ), + TableKind::Matrix => { + let (n_row, n_col) = harp::Matrix::dim(table.sexp)?; + (n_row, n_col, ColumnNames::from_matrix(table.sexp)?) + }, + }; let mut column_schemas = Vec::::new(); - for i in 0..(total_num_columns as isize) { + for i in 0..(n_col as isize) { let column_name = match column_names.get_unchecked(i) { Some(name) => name, None => String::from(""), @@ -586,8 +592,8 @@ impl RDataExplorer { // TODO: handling for nested data frame columns let col = match kind { - harp::TableKind::Dataframe => VECTOR_ELT(object, i), - harp::TableKind::Matrix => object, + harp::TableKind::Dataframe => VECTOR_ELT(table.sexp, i), + harp::TableKind::Matrix => table.sexp, }; let type_name = WorkspaceVariableDisplayType::from(col, false).display_type; @@ -610,7 +616,7 @@ impl RDataExplorer { Ok(DataObjectShape { columns: column_schemas, kind, - num_rows, + num_rows: n_row, }) } } @@ -1073,10 +1079,6 @@ impl RDataExplorer { } } -fn table_info_or_bail(x: SEXP) -> anyhow::Result { - harp::table_info(x).ok_or(anyhow!("Unsupported type for data viewer")) -} - /// Open an R object in the data viewer. /// /// This function is called from the R side to open an R object in the data viewer. diff --git a/crates/ark/src/modules/positron/methods.R b/crates/ark/src/modules/positron/methods.R index 31128fa38..7408697d9 100644 --- a/crates/ark/src/modules/positron/methods.R +++ b/crates/ark/src/modules/positron/methods.R @@ -24,7 +24,7 @@ ark_methods_table$ark_positron_variable_get_children <- new.env( ) lockEnvironment(ark_methods_table, TRUE) -ark_methods_allowed_packages <- c("torch", "reticulate") +ark_methods_allowed_packages <- c("torch", "reticulate", "duckplyr") # check if the calling package is allowed to touch the methods table check_caller_allowed <- function() { @@ -77,7 +77,9 @@ check_register_args <- function(generic, class) { check_register_args(generic, class) for (cls in class) { - if (exists(cls, envir = ark_methods_table[[generic]], inherits = FALSE)) { + if ( + exists(cls, envir = ark_methods_table[[generic]], inherits = FALSE) + ) { remove(list = cls, envir = ark_methods_table[[generic]]) } } diff --git a/crates/ark/src/srcref.rs b/crates/ark/src/srcref.rs index 290c7dda6..fe29ff716 100644 --- a/crates/ark/src/srcref.rs +++ b/crates/ark/src/srcref.rs @@ -139,7 +139,7 @@ fn generate_source( } // Ignore functions that already have sources - if let Some(_) = old.attr("srcref") { + if let Some(_) = old.get_attribute("srcref") { return Ok(None); } diff --git a/crates/ark/src/variables/variable.rs b/crates/ark/src/variables/variable.rs index 48cb9cee2..8a528ed97 100644 --- a/crates/ark/src/variables/variable.rs +++ b/crates/ark/src/variables/variable.rs @@ -25,7 +25,6 @@ use harp::object::RObject; use harp::r_null; use harp::r_symbol; use harp::symbol::RSymbol; -use harp::table_info; use harp::utils::pairlist_size; use harp::utils::r_altrep_class; use harp::utils::r_assert_type; @@ -51,7 +50,7 @@ use harp::vector::CharacterVector; use harp::vector::IntegerVector; use harp::vector::Vector; use harp::List; -use harp::TableDim; +use harp::TableKind; use itertools::Itertools; use libr::*; use stdext::local; @@ -90,7 +89,7 @@ impl WorkspaceVariableDisplayValue { let out = match r_typeof(value) { NILSXP => Self::new(String::from("NULL"), false), - VECSXP if r_inherits(value, "data.frame") => Self::from_data_frame(value), + VECSXP if r_is_data_frame(value) => Self::from_data_frame(value)?, VECSXP if !r_inherits(value, "POSIXlt") => Self::from_list(value), LISTSXP => Self::empty(), SYMSXP if value == unsafe { R_MissingArg } => { @@ -157,15 +156,17 @@ impl WorkspaceVariableDisplayValue { Ok(Self::new(display_value, truncated)) } - fn from_data_frame(value: SEXP) -> Self { - let dim = match unsafe { harp::df_dim(value) } { - Ok(dim) => dim, - // FIXME: Needs more type safety - Err(_) => TableDim { - num_rows: -1, - num_cols: -1, - }, - }; + fn from_data_frame(value: SEXP) -> anyhow::Result { + // Classes should provide an `ark_positron_variable_display_value()` method + // if they need to opt out of ALTREP materialization here. + let n_row = harp::DataFrame::n_row(value)?; + let n_col = harp::DataFrame::n_col(value)?; + + let row_word = plural("row", n_row); + let col_word = plural("column", n_col); + + let n_row = n_row.to_string(); + let n_col = n_col.to_string(); let class = match r_classes(value) { None => String::from(""), @@ -175,15 +176,9 @@ impl WorkspaceVariableDisplayValue { }, }; - let value = format!( - "[{} {} x {} {}]{}", - dim.num_rows, - plural("row", dim.num_rows), - dim.num_cols, - plural("column", dim.num_cols), - class - ); - Self::new(value, false) + let value = format!("[{n_row} {row_word} x {n_col} {col_word}]{class}"); + + Ok(Self::new(value, false)) } fn from_list(value: SEXP) -> Self { @@ -305,13 +300,7 @@ impl WorkspaceVariableDisplayValue { } fn from_matrix(value: SEXP) -> anyhow::Result { - let (n_row, n_col) = match harp::table_info(value) { - Some(info) => (info.dims.num_rows, info.dims.num_cols), - None => { - log::error!("Failed to get matrix dimensions"); - (-1, -1) - }, - }; + let (n_row, n_col) = harp::Matrix::dim(value)?; let class = match r_classes(value) { None => String::from(" "), @@ -496,13 +485,23 @@ impl WorkspaceVariableDisplayType { let dfclass = classes.get_unchecked(0).unwrap(); match include_length { true => { - let dim = table_info(value); - let shape = match dim { - Some(info) => { - format!("{}, {}", info.dims.num_rows, info.dims.num_cols) + // Classes should provide an `ark_positron_variable_display_type()` method + // if they need to opt out of ALTREP materialization here. + let n_row: String = match harp::DataFrame::n_row(value) { + Ok(n_row) => n_row.to_string(), + Err(error) => { + log::error!("Can't compute number of rows: {error}"); + String::from("?") }, - None => String::from("?, ?"), }; + let n_col = match harp::DataFrame::n_col(value) { + Ok(n_col) => n_col.to_string(), + Err(error) => { + log::error!("Can't compute number of columns: {error}"); + String::from("?") + }, + }; + let shape = format!("{n_row}, {n_col}"); let display_type = format!("{} [{}]", dfclass, shape); Self::simple(display_type) }, @@ -731,8 +730,26 @@ impl PositronVariable { fn variable_length(x: SEXP) -> usize { // Check for tabular data - if let Some(info) = harp::table_info(x) { - return info.dims.num_cols as usize; + // We don't currently provide an ark hook for variable length, so we are somewhat + // careful to only query n-col and never n-row for data frames, to avoid duckplyr + // query materialization. + if let Some(kind) = harp::table_kind(x) { + return match kind { + TableKind::Dataframe => match harp::DataFrame::n_col(x) { + Ok(n_col) => n_col as usize, + Err(error) => { + log::error!("Can't compute number of data frame columns: {error}"); + 0 + }, + }, + TableKind::Matrix => match harp::Matrix::dim(x) { + Ok((_n_row, n_col)) => n_col as usize, + Err(error) => { + log::error!("Can't compute matrix dimensions: {error}"); + 0 + }, + }, + }; } // Otherwise treat as vector @@ -1201,14 +1218,7 @@ impl PositronVariable { fn inspect_matrix(matrix: SEXP) -> anyhow::Result> { let matrix = RObject::new(matrix); - - let n_col = match harp::table_info(matrix.sexp) { - Some(info) => info.dims.num_cols, - None => { - log::warn!("Unexpected matrix object. Couldn't get dimensions."); - return Ok(vec![]); - }, - }; + let (_n_row, n_col) = harp::Matrix::dim(matrix.sexp)?; let make_variable = |access_key, display_name, display_value, is_truncated| Variable { access_key, diff --git a/crates/harp/src/attrib.rs b/crates/harp/src/attrib.rs index 59c93c6c6..2d6209c4f 100644 --- a/crates/harp/src/attrib.rs +++ b/crates/harp/src/attrib.rs @@ -20,7 +20,7 @@ fn zap_srcref_fn(x: SEXP) -> RObject { unsafe { let x = RObject::view(x).shallow_duplicate(); - x.set_attr("srcref", r_null()); + x.set_attribute("srcref", r_null()); libr::SET_BODY(x.sexp, zap_srcref(libr::R_ClosureExpr(x.sexp)).sexp); x @@ -64,7 +64,7 @@ fn zap_srcref_expr(x: SEXP) -> RObject { fn zap_srcref_attrib(x: SEXP) { let x = RObject::view(x); - x.set_attr("srcfile", r_null()); - x.set_attr("srcref", r_null()); - x.set_attr("wholeSrcref", r_null()); + x.set_attribute("srcfile", r_null()); + x.set_attribute("srcref", r_null()); + x.set_attribute("wholeSrcref", r_null()); } diff --git a/crates/harp/src/column_names.rs b/crates/harp/src/column_names.rs new file mode 100644 index 000000000..37a15dd7a --- /dev/null +++ b/crates/harp/src/column_names.rs @@ -0,0 +1,57 @@ +use libr::*; + +use crate::exec::RFunction; +use crate::exec::RFunctionExt; +use crate::utils::*; +use crate::vector::Vector; +use crate::CharacterVector; +use crate::RObject; + +/// Column names +/// +/// Column names represent an optional character vector of names. This class is mostly +/// useful for ergonomics, since [ColumnNames::get_unchecked()] will propagate [None] +/// when used on a vector without column names. +pub struct ColumnNames { + names: Option, +} + +impl ColumnNames { + pub fn new(names: SEXP) -> Self { + let names = if r_typeof(names) == STRSXP { + Some(unsafe { CharacterVector::new_unchecked(names) }) + } else { + None + }; + Self { names } + } + + pub fn from_data_frame(x: SEXP) -> crate::Result { + if !r_is_data_frame(x) { + return Err(crate::anyhow!("`x` must be a data frame.")); + } + let Some(column_names) = RObject::view(x).get_attribute_names() else { + return Err(crate::anyhow!("`x` must have column names.")); + }; + Ok(Self::new(column_names.sexp)) + } + + pub fn from_matrix(x: SEXP) -> crate::Result { + if !r_is_matrix(x) { + return Err(crate::anyhow!("`x` must be a matrix.")); + } + let column_names = RFunction::from("colnames").add(x).call()?; + Ok(Self::new(column_names.sexp)) + } + + pub fn get_unchecked(&self, index: isize) -> Option { + if let Some(names) = &self.names { + if let Some(name) = names.get_unchecked(index) { + if name.len() > 0 { + return Some(name); + } + } + } + None + } +} diff --git a/crates/harp/src/data_frame.rs b/crates/harp/src/data_frame.rs index caed5c7c3..600246956 100644 --- a/crates/harp/src/data_frame.rs +++ b/crates/harp/src/data_frame.rs @@ -1,3 +1,7 @@ +use libr::*; + +use crate::r_length; +use crate::utils::*; use crate::vector::Vector; use crate::List; use crate::RObject; @@ -23,9 +27,11 @@ impl DataFrame { let list = List::new(sexp)?; harp::assert_class(sexp, "data.frame")?; - let dim = unsafe { harp::df_dim(list.obj.sexp) }?; - let nrow = dim.num_rows as usize; - let ncol = list.obj.length() as usize; + // This materializes ALTREP compact row names (duckplyr) and we are okay with + // that. If you just need the number of columns without full validation, use + // the static method [DataFrame::n_col()]. + let nrow = Self::n_row(list.obj.sexp)? as usize; + let ncol = Self::n_col(list.obj.sexp)? as usize; let Some(names) = list.obj.names() else { return Err(harp::anyhow!("Data frame must have names")); @@ -71,6 +77,67 @@ impl DataFrame { .get(idx as isize)? .ok_or_else(|| harp::unreachable!("missing column")) } + + /// Compute the number of columns of a data frame + /// + /// # Notes + /// + /// In general, prefer [DataFrame::new()] followed by accessing the `ncol` field, + /// as that validates the data frame on the way in. Use this static method if you + /// need maximal performance, or if you only need the number of columns, and computing + /// the number of rows would materialize ALTREP objects unnecessarily. + pub fn n_col(x: libr::SEXP) -> crate::Result { + if !r_is_data_frame(x) { + return Err(crate::anyhow!("`x` must be a data frame")); + } + + match i32::try_from(r_length(x)) { + Ok(n_col) => Ok(n_col), + Err(_) => Err(crate::anyhow!( + "Number of columns of `x` must fit in a `i32`." + )), + } + } + + /// Compute the number of rows of a data frame + /// + /// # Notes + /// + /// In general, prefer [DataFrame::new()] followed by accessing the `nrow` field, + /// as that validates the data frame on the way in. Use this static method if you + /// need maximal performance. + pub fn n_row(x: SEXP) -> crate::Result { + if !r_is_data_frame(x) { + return Err(crate::anyhow!("`x` must be a data frame")); + } + + // Note that this turns compact row names of the form `c(NA, -5)` into ALTREP compact + // intrange objects. This is fine for our purposes because the row names are never + // fully expanded as we determine their length. + // + // There is a special case with duckplyr where the row names object can be an ALTREP + // integer vector that looks like an instance of compact row names like `c(NA, -5)`. + // Touching this with `INTEGER()` or `INTEGER_ELT()` to determine the number of rows + // will materialize the whole query (and run arbitrary R code). We've determined the + // only maintainable strategy for classes like this is to provide higher level ark + // hooks where packages like duckplyr can intercede before we even get here, providing + // their own custom methods (like for the variables pane). That keeps our hot path + // simpler, as we unconditionally materialize ALTREP vectors, while still providing a + // way to opt out. + let row_names = RObject::view(x).get_attribute_row_names(); + + let Some(row_names) = row_names else { + return Err(crate::anyhow!("`x` must have row names")); + }; + + // The row names object is typically an integer vector (possibly ALTREP compact + // intrange that knows its length) or character vector, and we just take the length of + // that to get the number of rows + match i32::try_from(r_length(row_names.sexp)) { + Ok(n_row) => Ok(n_row), + Err(_) => Err(crate::anyhow!("Number of rows of `x` must fit in a `i32`.")), + } + } } #[cfg(test)] @@ -80,6 +147,7 @@ mod tests { use crate::r_alloc_integer; use crate::r_chr_poke; use crate::r_list_poke; + use crate::r_null; use crate::vector::Vector; use crate::DataFrame; use crate::List; @@ -104,7 +172,7 @@ mod tests { fn test_data_frame_no_names() { crate::r_task(|| { let df = harp::parse_eval_base("data.frame(x = 1:2, y = 3:4)").unwrap(); - df.set_attr("names", RObject::null().sexp); + df.set_attribute("names", r_null()); let df = DataFrame::new(df.sexp); assert_match!(df, harp::Result::Err(err) => { @@ -117,7 +185,7 @@ mod tests { fn test_data_frame_bad_names() { crate::r_task(|| { let df = harp::parse_eval_base("data.frame(x = 1:2, y = 3:4)").unwrap(); - let nms = df.attr("names").unwrap(); + let nms = df.get_attribute("names").unwrap(); unsafe { r_chr_poke(nms.sexp, 0, libr::R_NaString); } diff --git a/crates/harp/src/lib.rs b/crates/harp/src/lib.rs index 00099ab0d..98df67010 100644 --- a/crates/harp/src/lib.rs +++ b/crates/harp/src/lib.rs @@ -6,6 +6,7 @@ // pub mod attrib; pub mod call; +mod column_names; pub mod command; pub mod data_frame; pub mod environment; @@ -20,6 +21,7 @@ pub mod format; pub mod json; pub mod library; pub mod line_ending; +mod matrix; pub mod modules; pub mod object; pub mod parse; @@ -41,8 +43,10 @@ pub mod vec_format; pub mod vector; // Reexport API +pub use column_names::*; pub use data_frame::*; pub use eval::*; +pub use matrix::*; pub use object::*; pub use parse::*; pub use parser::*; diff --git a/crates/harp/src/matrix.rs b/crates/harp/src/matrix.rs new file mode 100644 index 000000000..555b84e33 --- /dev/null +++ b/crates/harp/src/matrix.rs @@ -0,0 +1,34 @@ +use libr::*; + +use crate::r_dim; +use crate::r_int_get; +use crate::r_length; +use crate::utils::*; + +/// Matrix support +/// +/// # Notes +/// +/// Currently we only utilize this for the static [Matrix::dim()] method, +/// but we could actually wrap matrices here (with validation) and provide +/// additional methods, like [harp::DataFrame]. +pub struct Matrix {} + +impl Matrix { + /// Compute the dimensions of a matrix + pub fn dim(x: SEXP) -> crate::Result<(i32, i32)> { + if !r_is_matrix(x) { + return Err(crate::anyhow!("`x` must be a matrix")); + } + + let dim = r_dim(x); + + if r_typeof(dim) != INTSXP || r_length(dim) != 2 { + return Err(crate::anyhow!( + "`dim` must be an integer vector of length 2" + )); + } + + Ok((r_int_get(dim, 0), r_int_get(dim, 1))) + } +} diff --git a/crates/harp/src/object.rs b/crates/harp/src/object.rs index 178ae67ad..39184dc77 100644 --- a/crates/harp/src/object.rs +++ b/crates/harp/src/object.rs @@ -19,7 +19,6 @@ use libr::*; use crate::error::Error; use crate::exec::RFunction; use crate::exec::RFunctionExt; -use crate::protect::RProtect; use crate::r_inherits; use crate::r_symbol; use crate::size::r_size; @@ -148,6 +147,7 @@ pub fn r_length(x: SEXP) -> isize { unsafe { Rf_xlength(x) } } +/// Raw access to the underlying `R_DimSymbol` attribute pub fn r_dim(x: SEXP) -> SEXP { unsafe { Rf_getAttrib(x, R_DimSymbol) } } @@ -458,26 +458,16 @@ impl RObject { /// Gets a vector containing names for the object's values (from the `names` /// attribute). Returns `None` if the object's value(s) don't have names. pub fn names(&self) -> Option>> { - let names = unsafe { Rf_getAttrib(self.sexp, R_NamesSymbol) }; - let names = RObject::from(names); - match names.kind() { - STRSXP => Vec::>::try_from(names).ok(), - _ => None, + match self.get_attribute_names() { + Some(names) => match names.kind() { + STRSXP => Vec::>::try_from(names).ok(), + _ => None, + }, + None => None, } } - /// Gets a named attribute from the object. Returns `None` if the attribute - /// doesn't exist. - pub fn attr(&self, name: &str) -> Option { - let val = unsafe { Rf_getAttrib(self.sexp, r_symbol!(name)) }; - if r_is_null(val) { - None - } else { - Some(RObject::new(val)) - } - } - - pub fn set_attr(&self, name: &str, value: SEXP) { + pub fn set_attribute(&self, name: &str, value: SEXP) { unsafe { Rf_protect(value); Rf_setAttrib(self.sexp, r_symbol!(name), value); @@ -485,12 +475,47 @@ impl RObject { } } + /// Gets a named attribute from the object. Returns `None` if the attribute + /// was `NULL`. + pub fn get_attribute(&self, name: &str) -> Option { + self.get_attribute_from_symbol(unsafe { r_symbol!(name) }) + } + + /// Gets the [R_NamesSymbol] attribute from the object. Returns `None` if there are no + /// names. + pub fn get_attribute_names(&self) -> Option { + self.get_attribute_from_symbol(unsafe { R_NamesSymbol }) + } + + /// Gets the [R_RowNamesSymbol] attribute from the object. Returns `None` if there are + /// no row names. + /// + /// # Notes + /// + /// Note that [Rf_getAttrib()] will turn compact row names of the form `c(NA, -5)` + /// into ALTREP compact intrange objects. If you really need to avoid this, use + /// `.row_names_info(x, 0L)` instead, which goes through `getAttrib0()`, but note that + /// R core frowns on this. + /// https://github.com/wch/r-source/blob/e11e04d1f9966551991569b43da2ba6ab2251f30/src/main/attrib.c#L177-L187 + pub fn get_attribute_row_names(&self) -> Option { + self.get_attribute_from_symbol(unsafe { R_RowNamesSymbol }) + } + + fn get_attribute_from_symbol(&self, symbol: SEXP) -> Option { + let out = unsafe { Rf_getAttrib(self.sexp, symbol) }; + if r_is_null(out) { + None + } else { + Some(RObject::new(out)) + } + } + pub fn inherits(&self, class: &str) -> bool { return r_inherits(self.sexp, class); } pub fn class(&self) -> harp::Result>> { - let Some(class) = self.attr("class") else { + let Some(class) = self.get_attribute("class") else { return Ok(None); }; @@ -503,19 +528,6 @@ impl RObject { Ok(Some(class.try_into()?)) } - pub fn dim(&self) -> harp::Result>> { - let dim: RObject = r_dim(self.sexp).into(); - - if dim.sexp == harp::r_null() { - return Ok(None); - } - - let dim: Vec = dim.try_into()?; - let dim = dim.into_iter().map(|d| d as usize).collect(); - - Ok(Some(dim)) - } - pub fn duplicate(&self) -> RObject { unsafe { RObject::new(libr::Rf_duplicate(self.sexp)) } } @@ -1061,28 +1073,26 @@ impl TryFrom<&Vec> for RObject { impl TryFrom for HashMap { type Error = crate::error::Error; fn try_from(value: RObject) -> Result { - unsafe { - r_assert_type(*value, &[STRSXP, VECSXP])?; + r_assert_type(value.sexp, &[STRSXP, VECSXP])?; - let mut protect = RProtect::new(); - let names = protect.add(Rf_getAttrib(*value, R_NamesSymbol)); - r_assert_type(names, &[STRSXP])?; - - let value = protect.add(Rf_coerceVector(*value, STRSXP)); + let Some(names) = value.get_attribute_names() else { + return Err(Error::UnexpectedType(NILSXP, vec![STRSXP])); + }; - let n = Rf_xlength(names); - let mut map = HashMap::::with_capacity(n as usize); + let value = RObject::new(unsafe { Rf_coerceVector(value.sexp, STRSXP) }); - for i in (0..Rf_xlength(names)).rev() { - // Translate the name and value into Rust strings. - let lhs = r_chr_get_owned_utf8(names, i)?; - let rhs = r_chr_get_owned_utf8(value, i)?; + let n = names.length(); + let mut map = HashMap::::with_capacity(n as usize); - map.insert(lhs, rhs); - } + for i in (0..n).rev() { + // Translate the name and value into Rust strings. + let lhs = r_chr_get_owned_utf8(names.sexp, i)?; + let rhs = r_chr_get_owned_utf8(value.sexp, i)?; - Ok(map) + map.insert(lhs, rhs); } + + Ok(map) } } @@ -1091,28 +1101,26 @@ impl TryFrom for HashMap { impl TryFrom for HashMap { type Error = crate::error::Error; fn try_from(value: RObject) -> Result { - unsafe { - r_assert_type(*value, &[INTSXP, VECSXP])?; + r_assert_type(*value, &[INTSXP, VECSXP])?; - let mut protect = RProtect::new(); - let names = protect.add(Rf_getAttrib(*value, R_NamesSymbol)); - r_assert_type(names, &[STRSXP])?; - - let value = protect.add(Rf_coerceVector(*value, INTSXP)); + let Some(names) = value.get_attribute_names() else { + return Err(Error::UnexpectedType(NILSXP, vec![STRSXP])); + }; - let n = Rf_xlength(names); - let mut map = HashMap::::with_capacity(n as usize); + let value = RObject::new(unsafe { Rf_coerceVector(value.sexp, INTSXP) }); - for i in (0..Rf_xlength(names)).rev() { - // Translate the name and value into Rust strings. - let name = r_chr_get_owned_utf8(names, i)?; - let val = r_int_get(value, i); + let n = names.length(); + let mut map = HashMap::::with_capacity(n as usize); - map.insert(name, val); - } + for i in (0..n).rev() { + // Translate the name and value into Rust strings. + let name = r_chr_get_owned_utf8(names.sexp, i)?; + let val = r_int_get(value.sexp, i); - Ok(map) + map.insert(name, val); } + + Ok(map) } } @@ -1121,23 +1129,23 @@ impl TryFrom for HashMap { impl TryFrom for HashMap { type Error = crate::error::Error; fn try_from(value: RObject) -> Result { - unsafe { - let mut protect = RProtect::new(); - let names = protect.add(Rf_getAttrib(*value, R_NamesSymbol)); - r_assert_type(names, &[STRSXP])?; - - let n = Rf_xlength(names); - let mut map = HashMap::::with_capacity(n as usize); - - // iterate in the reverse order to keep the first occurence of a name - for i in (0..n).rev() { - let name = r_chr_get_owned_utf8(names, i)?; - let value: RObject = RObject::new(VECTOR_ELT(*value, i)); - map.insert(name, value); - } + r_assert_type(*value, &[VECSXP])?; + + let Some(names) = value.get_attribute_names() else { + return Err(Error::UnexpectedType(NILSXP, vec![STRSXP])); + }; - Ok(map) + let n = names.length(); + let mut map = HashMap::::with_capacity(n as usize); + + // iterate in the reverse order to keep the first occurence of a name + for i in (0..n).rev() { + let name = r_chr_get_owned_utf8(names.sexp, i)?; + let value = RObject::new(list_get(value.sexp, i)); + map.insert(name, value); } + + Ok(map) } } diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index 9f4176741..91562fd94 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -40,7 +40,7 @@ pub struct SrcFile { // attributes. impl RObject { pub fn srcrefs(&self) -> anyhow::Result> { - let srcref = unwrap!(self.attr("srcref"), None => { + let srcref = unwrap!(self.get_attribute("srcref"), None => { return Err(anyhow!("Can't find `srcref` attribute")); }); diff --git a/crates/harp/src/table.rs b/crates/harp/src/table.rs index 7da2acd04..a542a4761 100644 --- a/crates/harp/src/table.rs +++ b/crates/harp/src/table.rs @@ -2,14 +2,9 @@ use libr::*; use crate::exec::RFunction; use crate::exec::RFunctionExt; -use crate::object::r_length; use crate::object::RObject; -use crate::r_assert_type; use crate::utils::r_is_data_frame; use crate::utils::r_is_matrix; -use crate::utils::r_typeof; -use crate::vector::CharacterVector; -use crate::vector::Vector; #[derive(Clone, Copy)] pub enum TableKind { @@ -17,25 +12,14 @@ pub enum TableKind { Matrix, } -pub struct TableInfo { - pub kind: TableKind, - pub dims: TableDim, - pub col_names: ColumnNames, -} - -// TODO: Might want to encode as types with methods so that we can make -// assumptions about memory layout more safely. Also makes it possible -// to compute properties more lazily. -pub fn table_info(x: SEXP) -> Option { +pub fn table_kind(x: SEXP) -> Option { if r_is_data_frame(x) { - return df_info(x).ok(); - } - - if r_is_matrix(x) { - return mat_info(x).ok(); + Some(TableKind::Dataframe) + } else if r_is_matrix(x) { + Some(TableKind::Matrix) + } else { + None } - - None } /// Extracts a single column from a table. @@ -64,109 +48,3 @@ pub fn tbl_get_column(x: SEXP, column_index: i32, kind: TableKind) -> anyhow::Re }, } } - -pub fn df_info(x: SEXP) -> anyhow::Result { - unsafe { - let dims = df_dim(x)?; - let col_names = ColumnNames::new(Rf_getAttrib(x, R_NamesSymbol)); - - Ok(TableInfo { - kind: TableKind::Dataframe, - dims, - col_names, - }) - } -} - -pub fn mat_info(x: SEXP) -> anyhow::Result { - let dims = mat_dim(x); - - let col_names = RFunction::from("colnames").add(x).call()?; - let col_names = ColumnNames::new(col_names.sexp); - - Ok(TableInfo { - kind: TableKind::Matrix, - dims, - col_names, - }) -} - -pub struct TableDim { - pub num_rows: i32, - pub num_cols: i32, -} - -/// Safety: Assumes a data frame as input. -/// TODO: Extract row info from attribute. -pub unsafe fn df_dim(data: SEXP) -> harp::Result { - // FIXME: We shouldn't dispatch to methods here - let dims = RFunction::new("base", "dim.data.frame") - .add(data) - .call() - .unwrap(); - - let Ok(_) = r_assert_type(dims.sexp, &[libr::INTSXP]) else { - return Err(harp::anyhow!( - "Data frame dimensions must be an integer vector, instead it has type `{}`", - harp::r_type2char(dims.kind()) - )); - }; - if dims.length() != 2 { - return Err(harp::anyhow!( - "Data frame must have 2 dimensions, instead it has {}", - dims.length() - )); - } - - Ok(TableDim { - num_rows: INTEGER_ELT(dims.sexp, 0), - num_cols: INTEGER_ELT(dims.sexp, 1), - }) -} - -pub fn mat_dim(x: SEXP) -> TableDim { - unsafe { - let dims = Rf_getAttrib(x, R_DimSymbol); - - // Might want to return an error instead, or take a strongly typed input - if r_typeof(dims) != INTSXP || r_length(dims) != 2 { - return TableDim { - num_rows: r_length(x) as i32, - num_cols: 1, - }; - } - - TableDim { - num_rows: INTEGER_ELT(dims, 0), - num_cols: INTEGER_ELT(dims, 1), - } - } -} - -pub struct ColumnNames { - pub names: Option, -} - -impl ColumnNames { - pub fn new(names: SEXP) -> Self { - unsafe { - let names = if r_typeof(names) == STRSXP { - Some(CharacterVector::new_unchecked(names)) - } else { - None - }; - Self { names } - } - } - - pub fn get_unchecked(&self, index: isize) -> Option { - if let Some(names) = &self.names { - if let Some(name) = names.get_unchecked(index) { - if name.len() > 0 { - return Some(name); - } - } - } - None - } -} diff --git a/crates/harp/src/vector/formatted_vector.rs b/crates/harp/src/vector/formatted_vector.rs index 8e6ec3903..0dce38fac 100644 --- a/crates/harp/src/vector/formatted_vector.rs +++ b/crates/harp/src/vector/formatted_vector.rs @@ -17,7 +17,6 @@ use crate::r_format_vec; use crate::r_is_object; use crate::r_length; use crate::r_subset_vec; -use crate::table_info; use crate::utils::r_assert_type; use crate::utils::r_typeof; use crate::vector::CharacterVector; @@ -102,12 +101,9 @@ impl FormattedVector { } fn column_iter_indices(&self, column: isize) -> anyhow::Result> { - let dim = table_info(self.vector.sexp) - .ok_or(anyhow!("Not a mtrix"))? - .dims; - - let start = column as i64 * dim.num_rows as i64; - let end = start + dim.num_rows as i64; + let (n_row, _n_col) = harp::Matrix::dim(self.vector.sexp)?; + let start = column as i64 * n_row as i64; + let end = start + n_row as i64; Ok(start..end) } } diff --git a/crates/harp/src/vector/names.rs b/crates/harp/src/vector/names.rs index 0c23787bc..845f4c7e7 100644 --- a/crates/harp/src/vector/names.rs +++ b/crates/harp/src/vector/names.rs @@ -4,12 +4,9 @@ // Copyright (C) 2022 Posit Software, PBC. All rights reserved. // // -use libr::R_NamesSymbol; -use libr::Rf_getAttrib; use libr::SEXP; use crate::object::RObject; -use crate::utils::r_is_null; use crate::vector::CharacterVector; use crate::vector::Vector; @@ -21,18 +18,17 @@ pub struct Names { impl Names { pub fn new(x: SEXP, default: impl Fn(isize) -> String + 'static) -> Self { unsafe { - let names = RObject::new(Rf_getAttrib(x, R_NamesSymbol)); + let names = RObject::view(x).get_attribute_names(); let default = Box::new(default); - if r_is_null(*names) { - Self { - data: None, + match names { + Some(names) => Self { + data: Some(CharacterVector::new_unchecked(names.sexp)), default, - } - } else { - Self { - data: Some(CharacterVector::new_unchecked(names)), + }, + None => Self { + data: None, default, - } + }, } } }