-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Support logic optimize rule to pass the case that Utf8view datatype combined with Utf8 datatype #15239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support logic optimize rule to pass the case that Utf8view datatype combined with Utf8 datatype #15239
Changes from all commits
66d2897
36df94e
308307b
c7c0c98
c75cd88
e1e57a8
9b8a011
4187e9f
3ef7b29
a10f46a
2fbe7a6
70934b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -564,6 +564,7 @@ impl DFSchema { | |
} | ||
|
||
/// Check to see if fields in 2 Arrow schemas are compatible | ||
#[deprecated(since = "47.0.0", note = "This method is no longer used")] | ||
pub fn check_arrow_schema_type_compatible( | ||
alamb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
&self, | ||
arrow_schema: &Schema, | ||
|
@@ -604,26 +605,57 @@ impl DFSchema { | |
}) | ||
} | ||
|
||
/// Returns true if the two schemas have the same qualified named | ||
/// fields with the same data types. Returns false otherwise. | ||
#[deprecated(since = "47.0.0", note = "Use has_equivalent_names_and_types` instead")] | ||
pub fn equivalent_names_and_types(&self, other: &Self) -> bool { | ||
self.has_equivalent_names_and_types(other).is_ok() | ||
} | ||
|
||
/// Returns Ok if the two schemas have the same qualified named | ||
/// fields with the compatible data types. | ||
/// | ||
/// This is a specialized version of Eq that ignores differences | ||
/// in nullability and metadata. | ||
/// Returns an `Err` with a message otherwise. | ||
/// | ||
/// This is a specialized version of Eq that ignores differences in | ||
/// nullability and metadata. | ||
/// | ||
/// Use [DFSchema]::logically_equivalent_names_and_types for a weaker | ||
/// logical type checking, which for example would consider a dictionary | ||
/// encoded UTF8 array to be equivalent to a plain UTF8 array. | ||
pub fn equivalent_names_and_types(&self, other: &Self) -> bool { | ||
pub fn has_equivalent_names_and_types(&self, other: &Self) -> Result<()> { | ||
// case 1 : schema length mismatch | ||
if self.fields().len() != other.fields().len() { | ||
return false; | ||
_plan_err!( | ||
"Schema mismatch: the schema length are not same \ | ||
Expected schema length: {}, got: {}", | ||
self.fields().len(), | ||
other.fields().len() | ||
) | ||
} else { | ||
// case 2 : schema length match, but fields mismatch | ||
// check if the fields name are the same and have the same data types | ||
self.fields() | ||
.iter() | ||
.zip(other.fields().iter()) | ||
.try_for_each(|(f1, f2)| { | ||
if f1.name() != f2.name() | ||
|| (!DFSchema::datatype_is_semantically_equal( | ||
f1.data_type(), | ||
f2.data_type(), | ||
) && !can_cast_types(f2.data_type(), f1.data_type())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zhuqi-lucas this addition of "can_cast_types" broken Substrait consumer (since we use this logic to decide if we should cast things to get names to match - this now always passes). But also overall using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you @Blizzara for testing and feedback, i am sorry for the broken Substrait consumer which i was not aware. I will help review when you submit the fix.
|
||
{ | ||
_plan_err!( | ||
"Schema mismatch: Expected field '{}' with type {:?}, \ | ||
but got '{}' with type {:?}.", | ||
f1.name(), | ||
f1.data_type(), | ||
f2.name(), | ||
f2.data_type() | ||
) | ||
} else { | ||
Ok(()) | ||
} | ||
}) | ||
} | ||
let self_fields = self.iter(); | ||
let other_fields = other.iter(); | ||
self_fields.zip(other_fields).all(|((q1, f1), (q2, f2))| { | ||
q1 == q2 | ||
&& f1.name() == f2.name() | ||
&& Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type()) | ||
}) | ||
} | ||
|
||
/// Checks if two [`DataType`]s are logically equal. This is a notably weaker constraint | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have a replaceable method for this one?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe we have a similar API already:
logically_equivalent_names_and_types