Skip to content

Only force ALTREP compact row names (i.e. from duckplyr) if requested #745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 1, 2025
42 changes: 22 additions & 20 deletions crates/ark/src/data_explorer/r_data_explorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ use harp::exec::RFunction;
use harp::exec::RFunctionExt;
use harp::object::RObject;
use harp::r_symbol;
use harp::table_kind;
use harp::tbl_get_column;
use harp::TableInfo;
use harp::ColumnNames;
use harp::TableKind;
use itertools::Itertools;
use libr::*;
Expand Down Expand Up @@ -562,22 +563,27 @@ impl RDataExplorer {
fn r_get_shape(table: RObject) -> anyhow::Result<DataObjectShape> {
unsafe {
let table = table.clone();
let object = *table;

let info = table_info_or_bail(object)?;
let Some(kind) = table_kind(table.sexp) else {
return Err(anyhow!("Unsupported type for the data viewer"));
};

let harp::TableInfo {
kind,
dims:
harp::TableDim {
num_rows,
num_cols: total_num_columns,
},
col_names: column_names,
} = info;
// `DataFrame::n_row()` will materialize duckplyr compact row names, but we
// are ok with that for the data explorer and don't provide a hook to opt out.
let (n_row, n_col, column_names) = match kind {
TableKind::Dataframe => (
harp::DataFrame::n_row(table.sexp)?,
harp::DataFrame::n_col(table.sexp)?,
ColumnNames::from_data_frame(table.sexp)?,
),
TableKind::Matrix => {
let (n_row, n_col) = harp::Matrix::dim(table.sexp)?;
(n_row, n_col, ColumnNames::from_matrix(table.sexp)?)
},
};

let mut column_schemas = Vec::<ColumnSchema>::new();
for i in 0..(total_num_columns as isize) {
for i in 0..(n_col as isize) {
let column_name = match column_names.get_unchecked(i) {
Some(name) => name,
None => String::from(""),
Expand All @@ -586,8 +592,8 @@ impl RDataExplorer {
// TODO: handling for nested data frame columns

let col = match kind {
harp::TableKind::Dataframe => VECTOR_ELT(object, i),
harp::TableKind::Matrix => object,
harp::TableKind::Dataframe => VECTOR_ELT(table.sexp, i),
harp::TableKind::Matrix => table.sexp,
};

let type_name = WorkspaceVariableDisplayType::from(col, false).display_type;
Expand All @@ -610,7 +616,7 @@ impl RDataExplorer {
Ok(DataObjectShape {
columns: column_schemas,
kind,
num_rows,
num_rows: n_row,
})
}
}
Expand Down Expand Up @@ -1073,10 +1079,6 @@ impl RDataExplorer {
}
}

fn table_info_or_bail(x: SEXP) -> anyhow::Result<TableInfo> {
harp::table_info(x).ok_or(anyhow!("Unsupported type for data viewer"))
}

/// Open an R object in the data viewer.
///
/// This function is called from the R side to open an R object in the data viewer.
Expand Down
6 changes: 4 additions & 2 deletions crates/ark/src/modules/positron/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ark_methods_table$ark_positron_variable_get_children <- new.env(
)
lockEnvironment(ark_methods_table, TRUE)

ark_methods_allowed_packages <- c("torch", "reticulate")
ark_methods_allowed_packages <- c("torch", "reticulate", "duckplyr")

# check if the calling package is allowed to touch the methods table
check_caller_allowed <- function() {
Expand Down Expand Up @@ -77,7 +77,9 @@ check_register_args <- function(generic, class) {
check_register_args(generic, class)

for (cls in class) {
if (exists(cls, envir = ark_methods_table[[generic]], inherits = FALSE)) {
if (
exists(cls, envir = ark_methods_table[[generic]], inherits = FALSE)
) {
remove(list = cls, envir = ark_methods_table[[generic]])
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/ark/src/srcref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ fn generate_source(
}

// Ignore functions that already have sources
if let Some(_) = old.attr("srcref") {
if let Some(_) = old.get_attribute("srcref") {
return Ok(None);
}

Expand Down
96 changes: 53 additions & 43 deletions crates/ark/src/variables/variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ use harp::object::RObject;
use harp::r_null;
use harp::r_symbol;
use harp::symbol::RSymbol;
use harp::table_info;
use harp::utils::pairlist_size;
use harp::utils::r_altrep_class;
use harp::utils::r_assert_type;
Expand All @@ -51,7 +50,7 @@ use harp::vector::CharacterVector;
use harp::vector::IntegerVector;
use harp::vector::Vector;
use harp::List;
use harp::TableDim;
use harp::TableKind;
use itertools::Itertools;
use libr::*;
use stdext::local;
Expand Down Expand Up @@ -90,7 +89,7 @@ impl WorkspaceVariableDisplayValue {

let out = match r_typeof(value) {
NILSXP => Self::new(String::from("NULL"), false),
VECSXP if r_inherits(value, "data.frame") => Self::from_data_frame(value),
VECSXP if r_is_data_frame(value) => Self::from_data_frame(value)?,
VECSXP if !r_inherits(value, "POSIXlt") => Self::from_list(value),
LISTSXP => Self::empty(),
SYMSXP if value == unsafe { R_MissingArg } => {
Expand Down Expand Up @@ -157,15 +156,17 @@ impl WorkspaceVariableDisplayValue {
Ok(Self::new(display_value, truncated))
}

fn from_data_frame(value: SEXP) -> Self {
let dim = match unsafe { harp::df_dim(value) } {
Ok(dim) => dim,
// FIXME: Needs more type safety
Err(_) => TableDim {
num_rows: -1,
num_cols: -1,
},
};
fn from_data_frame(value: SEXP) -> anyhow::Result<Self> {
// Classes should provide an `ark_positron_variable_display_value()` method
// if they need to opt out of ALTREP materialization here.
let n_row = harp::DataFrame::n_row(value)?;
let n_col = harp::DataFrame::n_col(value)?;

let row_word = plural("row", n_row);
let col_word = plural("column", n_col);

let n_row = n_row.to_string();
let n_col = n_col.to_string();

let class = match r_classes(value) {
None => String::from(""),
Expand All @@ -175,15 +176,9 @@ impl WorkspaceVariableDisplayValue {
},
};

let value = format!(
"[{} {} x {} {}]{}",
dim.num_rows,
plural("row", dim.num_rows),
dim.num_cols,
plural("column", dim.num_cols),
class
);
Self::new(value, false)
let value = format!("[{n_row} {row_word} x {n_col} {col_word}]{class}");

Ok(Self::new(value, false))
}

fn from_list(value: SEXP) -> Self {
Expand Down Expand Up @@ -305,13 +300,7 @@ impl WorkspaceVariableDisplayValue {
}

fn from_matrix(value: SEXP) -> anyhow::Result<Self> {
let (n_row, n_col) = match harp::table_info(value) {
Some(info) => (info.dims.num_rows, info.dims.num_cols),
None => {
log::error!("Failed to get matrix dimensions");
(-1, -1)
},
};
let (n_row, n_col) = harp::Matrix::dim(value)?;

let class = match r_classes(value) {
None => String::from(" <matrix>"),
Expand Down Expand Up @@ -496,13 +485,23 @@ impl WorkspaceVariableDisplayType {
let dfclass = classes.get_unchecked(0).unwrap();
match include_length {
true => {
let dim = table_info(value);
let shape = match dim {
Some(info) => {
format!("{}, {}", info.dims.num_rows, info.dims.num_cols)
// Classes should provide an `ark_positron_variable_display_type()` method
// if they need to opt out of ALTREP materialization here.
let n_row: String = match harp::DataFrame::n_row(value) {
Ok(n_row) => n_row.to_string(),
Err(error) => {
log::error!("Can't compute number of rows: {error}");
String::from("?")
},
None => String::from("?, ?"),
};
let n_col = match harp::DataFrame::n_col(value) {
Ok(n_col) => n_col.to_string(),
Err(error) => {
log::error!("Can't compute number of columns: {error}");
String::from("?")
},
};
let shape = format!("{n_row}, {n_col}");
let display_type = format!("{} [{}]", dfclass, shape);
Self::simple(display_type)
},
Expand Down Expand Up @@ -731,8 +730,26 @@ impl PositronVariable {

fn variable_length(x: SEXP) -> usize {
// Check for tabular data
if let Some(info) = harp::table_info(x) {
return info.dims.num_cols as usize;
// We don't currently provide an ark hook for variable length, so we are somewhat
// careful to only query n-col and never n-row for data frames, to avoid duckplyr
// query materialization.
if let Some(kind) = harp::table_kind(x) {
return match kind {
TableKind::Dataframe => match harp::DataFrame::n_col(x) {
Ok(n_col) => n_col as usize,
Err(error) => {
log::error!("Can't compute number of data frame columns: {error}");
0
},
},
TableKind::Matrix => match harp::Matrix::dim(x) {
Ok((_n_row, n_col)) => n_col as usize,
Err(error) => {
log::error!("Can't compute matrix dimensions: {error}");
0
},
},
};
}

// Otherwise treat as vector
Expand Down Expand Up @@ -1201,14 +1218,7 @@ impl PositronVariable {

fn inspect_matrix(matrix: SEXP) -> anyhow::Result<Vec<Variable>> {
let matrix = RObject::new(matrix);

let n_col = match harp::table_info(matrix.sexp) {
Some(info) => info.dims.num_cols,
None => {
log::warn!("Unexpected matrix object. Couldn't get dimensions.");
return Ok(vec![]);
},
};
let (_n_row, n_col) = harp::Matrix::dim(matrix.sexp)?;

let make_variable = |access_key, display_name, display_value, is_truncated| Variable {
access_key,
Expand Down
8 changes: 4 additions & 4 deletions crates/harp/src/attrib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn zap_srcref_fn(x: SEXP) -> RObject {
unsafe {
let x = RObject::view(x).shallow_duplicate();

x.set_attr("srcref", r_null());
x.set_attribute("srcref", r_null());
libr::SET_BODY(x.sexp, zap_srcref(libr::R_ClosureExpr(x.sexp)).sexp);

x
Expand Down Expand Up @@ -64,7 +64,7 @@ fn zap_srcref_expr(x: SEXP) -> RObject {

fn zap_srcref_attrib(x: SEXP) {
let x = RObject::view(x);
x.set_attr("srcfile", r_null());
x.set_attr("srcref", r_null());
x.set_attr("wholeSrcref", r_null());
x.set_attribute("srcfile", r_null());
x.set_attribute("srcref", r_null());
x.set_attribute("wholeSrcref", r_null());
}
57 changes: 57 additions & 0 deletions crates/harp/src/column_names.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use libr::*;

use crate::exec::RFunction;
use crate::exec::RFunctionExt;
use crate::utils::*;
use crate::vector::Vector;
use crate::CharacterVector;
use crate::RObject;

/// Column names
///
/// Column names represent an optional character vector of names. This class is mostly
/// useful for ergonomics, since [ColumnNames::get_unchecked()] will propagate [None]
/// when used on a vector without column names.
pub struct ColumnNames {
names: Option<CharacterVector>,
}

impl ColumnNames {
pub fn new(names: SEXP) -> Self {
let names = if r_typeof(names) == STRSXP {
Some(unsafe { CharacterVector::new_unchecked(names) })
} else {
None
};
Self { names }
}

pub fn from_data_frame(x: SEXP) -> crate::Result<Self> {
if !r_is_data_frame(x) {
return Err(crate::anyhow!("`x` must be a data frame."));
}
let Some(column_names) = RObject::view(x).get_attribute_names() else {
return Err(crate::anyhow!("`x` must have column names."));
};
Ok(Self::new(column_names.sexp))
}

pub fn from_matrix(x: SEXP) -> crate::Result<Self> {
if !r_is_matrix(x) {
return Err(crate::anyhow!("`x` must be a matrix."));
}
let column_names = RFunction::from("colnames").add(x).call()?;
Ok(Self::new(column_names.sexp))
}

pub fn get_unchecked(&self, index: isize) -> Option<String> {
if let Some(names) = &self.names {
if let Some(name) = names.get_unchecked(index) {
if name.len() > 0 {
return Some(name);
}
}
}
None
}
}
Loading