Skip to content

Commit dd01c71

Browse files
committed
fix: escape underscores when simplifying starts_with (#19076)
1 parent 90aa47e commit dd01c71

File tree

3 files changed

+48
-6
lines changed

3 files changed

+48
-6
lines changed

datafusion/functions/src/string/starts_with.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,18 @@ impl ScalarUDFImpl for StartsWithFunc {
132132
) -> Result<ExprSimplifyResult> {
133133
if let Expr::Literal(scalar_value, _) = &args[1] {
134134
// Convert starts_with(col, 'prefix') to col LIKE 'prefix%' with proper escaping
135-
// Example: starts_with(col, 'ja%') -> col LIKE 'ja\%%'
136-
// 1. 'ja%' (input pattern)
137-
// 2. 'ja\%' (escape special char '%')
138-
// 3. 'ja\%%' (add suffix for starts_with)
135+
// Escapes pattern characters: starts_with(col, 'j\_a%') -> col LIKE 'j\\\_a\%%'
136+
// 1. 'j\_a%' (input pattern)
137+
// 2. 'j\\\_a\%' (escape special chars '%', '_' and '\')
138+
// 3. 'j\\\_a\%%' (add unescaped % suffix for starts_with)
139139
let like_expr = match scalar_value {
140140
ScalarValue::Utf8(Some(pattern))
141141
| ScalarValue::LargeUtf8(Some(pattern))
142142
| ScalarValue::Utf8View(Some(pattern)) => {
143-
let escaped_pattern = pattern.replace("%", "\\%");
143+
let escaped_pattern = pattern
144+
.replace("\\", "\\\\")
145+
.replace("%", "\\%")
146+
.replace("_", "\\_");
144147
let like_pattern = format!("{escaped_pattern}%");
145148
Expr::Literal(ScalarValue::Utf8(Some(like_pattern)), None)
146149
}

datafusion/sqllogictest/test_files/string/string_literal.slt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,25 @@ SELECT ends_with('foobar', 'foo')
207207
----
208208
false
209209

210+
query B
211+
SELECT ends_with(a, '%bar') from (values ('foobar'), ('foo%bar')) as t(a);
212+
----
213+
false
214+
true
215+
216+
query B
217+
SELECT ends_with(a, '_bar') from (values ('foobar'), ('foo_bar')) as t(a);
218+
----
219+
false
220+
true
221+
222+
query B
223+
SELECT ends_with(a, '\_bar') from (values ('foobar'), ('foo\\bar'), ('foo\_bar')) as t(a);
224+
----
225+
false
226+
false
227+
true
228+
210229
query I
211230
SELECT levenshtein('kitten', 'sitting')
212231
----
@@ -846,6 +865,26 @@ SELECT starts_with('foobar', 'bar')
846865
----
847866
false
848867

868+
869+
query B
870+
SELECT starts_with(a, 'foo%') from (values ('foobar'), ('foo%bar')) as t(a);
871+
----
872+
false
873+
true
874+
875+
query B
876+
SELECT starts_with(a, 'foo\_') from (values ('foobar'), ('foo\\_bar'), ('foo\_bar')) as t(a);
877+
----
878+
false
879+
false
880+
true
881+
882+
query B
883+
SELECT starts_with(a, 'foo_') from (values ('foobar'), ('foo_bar')) as t(a);
884+
----
885+
false
886+
true
887+
849888
query TT
850889
select ' ', '|'
851890
----

datafusion/sqllogictest/test_files/string/string_view.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ EXPLAIN SELECT
370370
FROM test;
371371
----
372372
logical_plan
373-
01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f_o%") AS c5, test.column1_utf8view LIKE Utf8View("f_o%") AS c6
373+
01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE LargeUtf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8View("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f\_o%") AS c4, test.column1_large_utf8 LIKE LargeUtf8("f\_o%") AS c5, test.column1_utf8view LIKE Utf8View("f\_o%") AS c6
374374
02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view]
375375

376376
## Test STARTS_WITH works with column arguments

0 commit comments

Comments
 (0)