Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-1936603: fix limit 0 bug with show #3090

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

### Snowpark Python API Updates

#### Bug Fixes

- Fixed a bug where `df.show` would still show rows after a `df.limit(0)` call was applied to it.

### Snowpark Local Testing Updates

#### New Features
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1375,7 +1375,7 @@ def limit(self, n: int, *, offset: int = 0) -> "SelectStatement":
else:
new = copy(self)
new.from_ = self.from_.to_subqueryable()
new.limit_ = min(self.limit_, n) if self.limit_ else n
new.limit_ = min(self.limit_, n) if self.limit_ is not None else n
new.offset = offset or self.offset
new.column_states = self.column_states
new.pre_actions = new.from_.pre_actions
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/mock/_select_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def set_operator(
def limit(self, n: int, *, offset: int = 0) -> "SelectStatement":
new = copy(self)
new.from_ = self.from_.to_subqueryable()
new.limit_ = min(self.limit_, n) if self.limit_ else n
new.limit_ = min(self.limit_, n) if self.limit_ is not None else n
new.offset = (self.offset + offset) if self.offset else offset
new._column_states = self._column_states
return new
Expand Down
101 changes: 101 additions & 0 deletions tests/ast/data/df_limit.test
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ df = df.select(col("*")).limit(3)

df = df.select(col("*")).limit(n=1, offset=1)

df = df.select(col("*")).limit(0)

## EXPECTED UNPARSER OUTPUT

df = session.table("table1")
Expand All @@ -18,6 +20,10 @@ df = df.select(col("*"))

df = df.limit(1, 1)

df = df.select(col("*"))

df = df.limit(0, 0)

## EXPECTED ENCODED AST

interned_value_table {
Expand Down Expand Up @@ -254,6 +260,101 @@ body {
}
}
}
body {
assign {
expr {
dataframe_select__columns {
cols {
args {
apply_expr {
fn {
builtin_fn {
name {
name {
name_flat {
name: "col"
}
}
}
}
}
pos_args {
string_val {
src {
end_column: 31
end_line: 31
file: 2
start_column: 23
start_line: 31
}
v: "*"
}
}
src {
end_column: 31
end_line: 31
file: 2
start_column: 23
start_line: 31
}
}
}
variadic: true
}
df {
dataframe_ref {
id {
bitfield1: 5
}
}
}
src {
end_column: 32
end_line: 31
file: 2
start_column: 13
start_line: 31
}
}
}
symbol {
value: "df"
}
uid: 6
var_id {
bitfield1: 6
}
}
}
body {
assign {
expr {
dataframe_limit {
df {
dataframe_ref {
id {
bitfield1: 6
}
}
}
src {
end_column: 41
end_line: 31
file: 2
start_column: 13
start_line: 31
}
}
}
symbol {
value: "df"
}
uid: 7
var_id {
bitfield1: 7
}
}
}
client_ast_version: 1
client_language {
python_language {
Expand Down
41 changes: 41 additions & 0 deletions tests/integ/scala/test_dataframe_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3180,3 +3180,44 @@ def test_to_df(session):
assert "Invalid input type in to_df(), expected str or a list of strs." in str(
exc_info
)


def test_limit(session):
df = session.create_dataframe([[1, 2], [2, 3]]).to_df("a", "b")
# run show(), make sure no error is reported
res = df._show_string(_emit_ast=session.ast_enabled)
assert (
res
== """
-------------
|"A" |"B" |
-------------
|1 |2 |
|2 |3 |
-------------\n""".lstrip()
)

df1 = df.limit(1)
res = df1._show_string(_emit_ast=session.ast_enabled)
assert (
res
== """
-------------
|"A" |"B" |
-------------
|1 |2 |
-------------\n""".lstrip()
)

df2 = df.limit(0)

res = df2._show_string(_emit_ast=session.ast_enabled)
assert (
res
== """
-------------
|"A" |"B" |
-------------
| | |
-------------\n""".lstrip()
)
16 changes: 13 additions & 3 deletions tests/integ/test_simplifier_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,25 @@ def test_filter(setup_reduce_cast, session, simplifier_table):
def test_limit(setup_reduce_cast, session, simplifier_table):
df = session.table(simplifier_table)
df = df.limit(10)
assert df.queries["queries"][-1] == f"SELECT * FROM {simplifier_table} LIMIT 10"
assert (
df.queries["queries"][-1].lower()
== f"select * from {simplifier_table.lower()} limit 10"
)

df = session.sql(f"select * from {simplifier_table}")
df = df.limit(10)
# we don't know if the original sql already has top/limit clause using a subquery is necessary.
# or else there will be SQL compile error.
assert (
df.queries["queries"][-1]
== f"SELECT * FROM (select * from {simplifier_table}) LIMIT 10"
df.queries["queries"][-1].lower()
== f"select * from (select * from {simplifier_table.lower()}) limit 10"
)

df = session.sql(f"select * from {simplifier_table}")
df = df.limit(0)
assert (
df.queries["queries"][-1].lower()
== f"select * from (select * from {simplifier_table.lower()}) limit 0"
)

# test for non-select sql statement
Expand Down
Loading