From 26f8a772f796493755678badfb2194d38e554a8e Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 15 Aug 2025 23:21:35 +0000 Subject: [PATCH 1/6] chore: implement comparison_ops for sqlglot compiler --- .../compile/sqlglot/expressions/binary_compiler.py | 11 +++++++++++ tests/system/small/engines/test_comparison_ops.py | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index b5d665e2e5..fc0e59fc7a 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -73,6 +73,17 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: ) +@BINARY_OP_REGISTRATION.register(ops.eq_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = left.expr + if left.dtype == dtypes.BOOL_DTYPE: + left_expr = sge.Cast(this=left_expr, to="INT64") + right_expr = right.expr + if right.dtype == dtypes.BOOL_DTYPE: + right_expr = sge.Cast(this=right_expr, to="INT64") + return sge.EQ(this=left_expr, expression=right_expr) + + @BINARY_OP_REGISTRATION.register(ops.div_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: left_expr = left.expr diff --git a/tests/system/small/engines/test_comparison_ops.py b/tests/system/small/engines/test_comparison_ops.py index fefff93f58..6b97c8bfa8 100644 --- a/tests/system/small/engines/test_comparison_ops.py +++ b/tests/system/small/engines/test_comparison_ops.py @@ -48,7 +48,8 @@ def apply_op_pairwise( return new_arr -@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) +# @pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True) +@pytest.mark.parametrize("engine", ["bq-sqlglot"], indirect=True) @pytest.mark.parametrize( "op", [ From 6f83b54b613e27ee18bfb9d7eecb9bf911370a44 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 19 Aug 2025 23:27:24 +0000 Subject: [PATCH 2/6] implement eq_null_match_op --- .../sqlglot/expressions/binary_compiler.py | 20 +++++++++++++++++++ .../test_eq_null_match/out.sql | 14 +++++++++++++ .../expressions/test_binary_compiler.py | 6 ++++++ 3 files changed, 40 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_eq_null_match/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index fc0e59fc7a..b2440dca54 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -84,6 +84,26 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.EQ(this=left_expr, expression=right_expr) +@BINARY_OP_REGISTRATION.register(ops.eq_null_match_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = left.expr + if left.dtype == dtypes.BOOL_DTYPE and right.dtype != dtypes.BOOL_DTYPE: + left_expr = sge.Cast(this=left_expr, to="INT64") + + right_expr = right.expr + if right.dtype == dtypes.BOOL_DTYPE and left.dtype != dtypes.BOOL_DTYPE: + right_expr = sge.Cast(this=right_expr, to="INT64") + + sentinel = sge.convert("$NULL_SENTINEL$") + left_coalesce = sge.Coalesce( + this=sge.Cast(this=left_expr, to="STRING"), expressions=[sentinel] + ) + right_coalesce = sge.Coalesce( + this=sge.Cast(this=right_expr, to="STRING"), expressions=[sentinel] + ) + return sge.EQ(this=left_coalesce, expression=right_coalesce) + + @BINARY_OP_REGISTRATION.register(ops.div_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: left_expr = left.expr diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_eq_null_match/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_eq_null_match/out.sql new file mode 100644 index 0000000000..90cbcfe5c7 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_eq_null_match/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + COALESCE(CAST(`bfcol_1` AS STRING), '$NULL_SENTINEL$') = COALESCE(CAST(CAST(`bfcol_0` AS INT64) AS STRING), '$NULL_SENTINEL$') AS `bfcol_4` + FROM `bfcte_0` +) +SELECT + `bfcol_4` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index 0d3fd42607..f3e12ee556 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -102,6 +102,12 @@ def test_div_timedelta(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(bf_df.sql, "out.sql") +def test_eq_null_match(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + sql = _apply_binary_op(bf_df, ops.eq_null_match_op, "int64_col", "bool_col") + snapshot.assert_match(sql, "out.sql") + + def test_json_set(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_binary_op( From 7ad273a361f7986a6013a644095d2636fc630593 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 19 Aug 2025 23:41:48 +0000 Subject: [PATCH 3/6] implement ne_op --- .../sqlglot/expressions/binary_compiler.py | 11 ++++ .../test_ne_numeric/out.sql | 54 +++++++++++++++++++ .../expressions/test_binary_compiler.py | 12 +++++ 3 files changed, 77 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ne_numeric/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index b2440dca54..3a99adb073 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -149,6 +149,17 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return result +@BINARY_OP_REGISTRATION.register(ops.ne_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = left.expr + if left.dtype == dtypes.BOOL_DTYPE: + left_expr = sge.Cast(this=left_expr, to="INT64") + right_expr = right.expr + if right.dtype == dtypes.BOOL_DTYPE: + right_expr = sge.Cast(this=right_expr, to="INT64") + return sge.NEQ(this=left_expr, expression=right_expr) + + @BINARY_OP_REGISTRATION.register(ops.sub_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype): diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ne_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ne_numeric/out.sql new file mode 100644 index 0000000000..6fba4b960f --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ne_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` <> `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` <> 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` <> CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) <> `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_ne_int`, + `bfcol_40` AS `int_ne_1`, + `bfcol_41` AS `int_ne_bool`, + `bfcol_42` AS `bool_ne_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index f3e12ee556..de99ae5538 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -175,3 +175,15 @@ def test_mul_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): blob_df = scalar_types_df["string_col"].str.to_blob() snapshot.assert_match(blob_df.to_frame().sql, "out.sql") + + +def test_ne_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_ne_int"] = bf_df["int64_col"] != bf_df["int64_col"] + bf_df["int_ne_1"] = bf_df["int64_col"] != 1 + + bf_df["int_ne_bool"] = bf_df["int64_col"] != bf_df["bool_col"] + bf_df["bool_ne_int"] = bf_df["bool_col"] != bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") From 68a77eb8b5c55d01cfa43e0c0b60f3b1d5566b97 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 19 Aug 2025 23:46:06 +0000 Subject: [PATCH 4/6] implement gt_op --- .../sqlglot/expressions/binary_compiler.py | 11 ++++ .../test_gt_numeric/out.sql | 54 +++++++++++++++++++ .../expressions/test_binary_compiler.py | 12 +++++ 3 files changed, 77 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_gt_numeric/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index 3a99adb073..c146471655 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -125,6 +125,17 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.GTE(this=left.expr, expression=right.expr) +@BINARY_OP_REGISTRATION.register(ops.gt_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = left.expr + if left.dtype == dtypes.BOOL_DTYPE: + left_expr = sge.Cast(this=left_expr, to="INT64") + right_expr = right.expr + if right.dtype == dtypes.BOOL_DTYPE: + right_expr = sge.Cast(this=right_expr, to="INT64") + return sge.GT(this=left_expr, expression=right_expr) + + @BINARY_OP_REGISTRATION.register(ops.JSONSet) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_gt_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_gt_numeric/out.sql new file mode 100644 index 0000000000..b0c8768850 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_gt_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` > `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` > 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` > CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) > `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_gt_int`, + `bfcol_40` AS `int_gt_1`, + `bfcol_41` AS `int_gt_bool`, + `bfcol_42` AS `bool_gt_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index de99ae5538..f52b32b5ad 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -108,6 +108,18 @@ def test_eq_null_match(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_gt_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_gt_int"] = bf_df["int64_col"] > bf_df["int64_col"] + bf_df["int_gt_1"] = bf_df["int64_col"] > 1 + + bf_df["int_gt_bool"] = bf_df["int64_col"] > bf_df["bool_col"] + bf_df["bool_gt_int"] = bf_df["bool_col"] > bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + def test_json_set(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_binary_op( From f485d26797175bdc56d29a549cb4f2a8b56cb838 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 19 Aug 2025 23:53:15 +0000 Subject: [PATCH 5/6] implement lt_op --- .../sqlglot/expressions/binary_compiler.py | 11 ++++ .../test_lt_numeric/out.sql | 54 +++++++++++++++++++ .../expressions/test_binary_compiler.py | 12 +++++ 3 files changed, 77 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_lt_numeric/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index c146471655..ff09e7b5ad 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -141,6 +141,17 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr) +@BINARY_OP_REGISTRATION.register(ops.lt_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = left.expr + if left.dtype == dtypes.BOOL_DTYPE: + left_expr = sge.Cast(this=left_expr, to="INT64") + right_expr = right.expr + if right.dtype == dtypes.BOOL_DTYPE: + right_expr = sge.Cast(this=right_expr, to="INT64") + return sge.LT(this=left_expr, expression=right_expr) + + @BINARY_OP_REGISTRATION.register(ops.mul_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: left_expr = left.expr diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_lt_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_lt_numeric/out.sql new file mode 100644 index 0000000000..b244e3cbcc --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_lt_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` < `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` < 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` < CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) < `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_lt_int`, + `bfcol_40` AS `int_lt_1`, + `bfcol_41` AS `int_lt_bool`, + `bfcol_42` AS `bool_lt_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index f52b32b5ad..53ca8a8387 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -129,6 +129,18 @@ def test_json_set(json_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_lt_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_lt_int"] = bf_df["int64_col"] < bf_df["int64_col"] + bf_df["int_lt_1"] = bf_df["int64_col"] < 1 + + bf_df["int_lt_bool"] = bf_df["int64_col"] < bf_df["bool_col"] + bf_df["bool_lt_int"] = bf_df["bool_col"] < bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + def test_sub_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]] From 4b2a6518729b0d3156daffc362bd8928e44dc87d Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 20 Aug 2025 00:09:26 +0000 Subject: [PATCH 6/6] implement le --- .../sqlglot/expressions/binary_compiler.py | 19 ++++++- .../test_ge_numeric/out.sql | 54 +++++++++++++++++++ .../test_le_numeric/out.sql | 54 +++++++++++++++++++ .../expressions/test_binary_compiler.py | 24 +++++++++ 4 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ge_numeric/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_le_numeric/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py index ff09e7b5ad..4b835eb2d7 100644 --- a/bigframes/core/compile/sqlglot/expressions/binary_compiler.py +++ b/bigframes/core/compile/sqlglot/expressions/binary_compiler.py @@ -122,7 +122,13 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: @BINARY_OP_REGISTRATION.register(ops.ge_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: - return sge.GTE(this=left.expr, expression=right.expr) + left_expr = left.expr + if left.dtype == dtypes.BOOL_DTYPE: + left_expr = sge.Cast(this=left_expr, to="INT64") + right_expr = right.expr + if right.dtype == dtypes.BOOL_DTYPE: + right_expr = sge.Cast(this=right_expr, to="INT64") + return sge.GTE(this=left_expr, expression=right_expr) @BINARY_OP_REGISTRATION.register(ops.gt_op) @@ -152,6 +158,17 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.LT(this=left_expr, expression=right_expr) +@BINARY_OP_REGISTRATION.register(ops.le_op) +def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: + left_expr = left.expr + if left.dtype == dtypes.BOOL_DTYPE: + left_expr = sge.Cast(this=left_expr, to="INT64") + right_expr = right.expr + if right.dtype == dtypes.BOOL_DTYPE: + right_expr = sge.Cast(this=right_expr, to="INT64") + return sge.LTE(this=left_expr, expression=right_expr) + + @BINARY_OP_REGISTRATION.register(ops.mul_op) def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression: left_expr = left.expr diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ge_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ge_numeric/out.sql new file mode 100644 index 0000000000..494cb861a7 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_ge_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` >= `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` >= 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` >= CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) >= `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_ge_int`, + `bfcol_40` AS `int_ge_1`, + `bfcol_41` AS `int_ge_bool`, + `bfcol_42` AS `bool_ge_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_le_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_le_numeric/out.sql new file mode 100644 index 0000000000..2f642d8cbb --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_le_numeric/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `bool_col` AS `bfcol_0`, + `int64_col` AS `bfcol_1`, + `rowindex` AS `bfcol_2` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `bfcol_2` AS `bfcol_6`, + `bfcol_1` AS `bfcol_7`, + `bfcol_0` AS `bfcol_8`, + `bfcol_1` <= `bfcol_1` AS `bfcol_9` + FROM `bfcte_0` +), `bfcte_2` AS ( + SELECT + *, + `bfcol_6` AS `bfcol_14`, + `bfcol_7` AS `bfcol_15`, + `bfcol_8` AS `bfcol_16`, + `bfcol_9` AS `bfcol_17`, + `bfcol_7` <= 1 AS `bfcol_18` + FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + `bfcol_15` <= CAST(`bfcol_16` AS INT64) AS `bfcol_29` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_24` AS `bfcol_36`, + `bfcol_25` AS `bfcol_37`, + `bfcol_26` AS `bfcol_38`, + `bfcol_27` AS `bfcol_39`, + `bfcol_28` AS `bfcol_40`, + `bfcol_29` AS `bfcol_41`, + CAST(`bfcol_26` AS INT64) <= `bfcol_25` AS `bfcol_42` + FROM `bfcte_3` +) +SELECT + `bfcol_36` AS `rowindex`, + `bfcol_37` AS `int64_col`, + `bfcol_38` AS `bool_col`, + `bfcol_39` AS `int_le_int`, + `bfcol_40` AS `int_le_1`, + `bfcol_41` AS `int_le_bool`, + `bfcol_42` AS `bool_le_int` +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py index 53ca8a8387..c579cf76c2 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py @@ -120,6 +120,18 @@ def test_gt_numeric(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(bf_df.sql, "out.sql") +def test_ge_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_ge_int"] = bf_df["int64_col"] >= bf_df["int64_col"] + bf_df["int_ge_1"] = bf_df["int64_col"] >= 1 + + bf_df["int_ge_bool"] = bf_df["int64_col"] >= bf_df["bool_col"] + bf_df["bool_ge_int"] = bf_df["bool_col"] >= bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + def test_json_set(json_types_df: bpd.DataFrame, snapshot): bf_df = json_types_df[["json_col"]] sql = _apply_binary_op( @@ -141,6 +153,18 @@ def test_lt_numeric(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(bf_df.sql, "out.sql") +def test_le_numeric(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "bool_col"]] + + bf_df["int_le_int"] = bf_df["int64_col"] <= bf_df["int64_col"] + bf_df["int_le_1"] = bf_df["int64_col"] <= 1 + + bf_df["int_le_bool"] = bf_df["int64_col"] <= bf_df["bool_col"] + bf_df["bool_le_int"] = bf_df["bool_col"] <= bf_df["int64_col"] + + snapshot.assert_match(bf_df.sql, "out.sql") + + def test_sub_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]]