From 509b6c0f2f48eec5d093c67fbb56dcc4754917b1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 6 Mar 2025 08:49:20 -0500 Subject: [PATCH 1/2] test(memtable): add test for empty memtable --- ibis/backends/tests/test_generic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 57d47ff703c6..55972401445a 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -40,6 +40,7 @@ np = pytest.importorskip("numpy") pd = pytest.importorskip("pandas") tm = pytest.importorskip("pandas.testing") +pa = pytest.importorskip("pyarrow") NULL_BACKEND_TYPES = { "bigquery": "NULL", @@ -2516,3 +2517,19 @@ def test_table_describe_with_multiple_decimal_columns(con): expr = t.describe() result = con.to_pyarrow(expr) assert len(result) == 2 + + +@pytest.mark.parametrize( + "input", + [ + [], + pa.table([[]], pa.schema({"x": pa.int64()})), + ], +) +@pytest.mark.notyet(["druid"], raises=PyDruidProgrammingError) +@pytest.mark.notyet( + ["flink"], raises=ValueError, reason="flink doesn't support empty tables" +) +def test_empty_memtable(con, input): + t = ibis.memtable(input, schema={"x": "int64"}) + assert not len(con.to_pyarrow(t)) From 22826a61846c480b90ce957afa0e977e628b0312 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 6 Mar 2025 14:03:00 -0500 Subject: [PATCH 2/2] fix(memtable): ensure that empty memtables can be converted to pyarrow --- ibis/backends/tests/test_generic.py | 6 ++---- ibis/expr/api.py | 13 +++++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 55972401445a..962749c92f20 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -2521,10 +2521,8 @@ def test_table_describe_with_multiple_decimal_columns(con): @pytest.mark.parametrize( "input", - [ - [], - pa.table([[]], pa.schema({"x": pa.int64()})), - ], + [[], pa.table([[]], pa.schema({"x": pa.int64()}))], + ids=["list", "pyarrow-table"], ) @pytest.mark.notyet(["druid"], raises=PyDruidProgrammingError) @pytest.mark.notyet( diff --git a/ibis/expr/api.py b/ibis/expr/api.py index e4b512555151..8bf56710ab6e 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -429,15 +429,24 @@ def _memtable( schema: SchemaLike | None = None, name: str | None = None, ) -> Table: + import ibis + if hasattr(data, "__arrow_c_stream__"): # Support objects exposing arrow's PyCapsule interface import pyarrow as pa - data = pa.table(data) + data = pa.table( + data, + schema=ibis.schema(schema).to_pyarrow() if schema is not None else None, + ) else: import pandas as pd - data = pd.DataFrame(data, columns=columns) + data = pd.DataFrame( + data, + columns=columns + or (ibis.schema(schema).names if schema is not None else None), + ) return _memtable(data, columns=columns, schema=schema, name=name)