@@ -307,7 +307,6 @@ def _is_single_row_query_method(query: Any) -> bool:
307307 "get_column_max" ,
308308 "get_column_mean" ,
309309 "get_column_stdev" ,
310- "get_column_nonnull_count" ,
311310 "get_column_unique_count" ,
312311 }
313312 CONSTANT_ROW_QUERY_METHODS = {
@@ -331,6 +330,7 @@ def _is_single_row_query_method(query: Any) -> bool:
331330
332331 FIRST_PARTY_SINGLE_ROW_QUERY_METHODS = {
333332 "get_column_unique_count_dh_patch" ,
333+ "_get_column_cardinality" ,
334334 }
335335
336336 # We'll do this the inefficient way since the arrays are pretty small.
@@ -497,7 +497,20 @@ def _get_column_cardinality(
497497 self , column_spec : _SingleColumnSpec , column : str
498498 ) -> None :
499499 try :
500- nonnull_count = self .dataset .get_column_nonnull_count (column )
500+ # Don't use Great Expectations get_column_nonnull_count because it
501+ # generates this SQL:
502+ #
503+ # sum(CASE WHEN (mycolumn IN (NULL) OR mycolumn IS NULL) THEN 1 ELSE 0 END)
504+ #
505+ # which fails for complex types (such as Databricks maps) that don't
506+ # support the IN operator.
507+ nonnull_count = convert_to_json_serializable (
508+ self .dataset .engine .execute (
509+ sa .select (sa .func .count (sa .column (column ))).select_from (
510+ self .dataset ._table
511+ )
512+ ).scalar ()
513+ )
501514 column_spec .nonnull_count = nonnull_count
502515 except Exception as e :
503516 logger .debug (
0 commit comments