14
14
15
15
from typing import Tuple
16
16
17
- import db_dtypes # type:ignore
18
17
import google .api_core .exceptions
19
18
import pandas as pd
20
19
import pandas .testing
@@ -307,10 +306,10 @@ def test_load_json_w_json_string_items(session):
307
306
)
308
307
),
309
308
"""
310
- df = session .read_gbq (sql , index_col = "id" )
311
-
312
- assert df .dtypes ["json_col" ] == pd .ArrowDtype (db_dtypes .JSONArrowType ())
309
+ # TODO(b/401630655): JSON is not compatible with allow_large_results=False
310
+ df = session .read_gbq (sql , index_col = "id" ).to_pandas (allow_large_results = True )
313
311
312
+ assert dtypes .is_json_type (df .dtypes ["json_col" ])
314
313
assert df ["json_col" ][0 ] == '{"boolean":true}'
315
314
assert df ["json_col" ][1 ] == '{"int":100}'
316
315
assert df ["json_col" ][2 ] == '{"float":0.98}'
@@ -325,17 +324,24 @@ def test_load_json_w_json_string_items(session):
325
324
326
325
def test_load_json_to_pandas_has_correct_result (session ):
327
326
df = session .read_gbq ("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_col" )
328
- assert df .dtypes ["json_col" ] == pd .ArrowDtype (db_dtypes .JSONArrowType ())
329
- result = df .to_pandas ()
327
+ assert dtypes .is_json_type (df .dtypes ["json_col" ])
328
+
329
+ # TODO(b/401630655): JSON is not compatible with allow_large_results=False
330
+ result = df .to_pandas (allow_large_results = True )
330
331
331
332
# These JSON strings are compatible with BigQuery's JSON storage,
332
333
pd_df = pd .DataFrame (
333
334
{"json_col" : ['{"bar":true,"foo":10}' ]},
334
- dtype = pd . ArrowDtype ( db_dtypes . JSONArrowType ()) ,
335
+ dtype = dtypes . JSON_DTYPE ,
335
336
)
336
337
pd_df .index = pd_df .index .astype ("Int64" )
337
- pd .testing .assert_series_equal (result .dtypes , pd_df .dtypes )
338
- pd .testing .assert_series_equal (result ["json_col" ], pd_df ["json_col" ])
338
+ assert dtypes .is_json_type (pd_df .dtypes ["json_col" ])
339
+
340
+ # `check_exact=False` can workaround the known issue in pandas:
341
+ # https://github.com/pandas-dev/pandas/issues/60958
342
+ pd .testing .assert_series_equal (
343
+ result ["json_col" ], pd_df ["json_col" ], check_exact = False
344
+ )
339
345
340
346
341
347
def test_load_json_in_struct (session ):
@@ -363,13 +369,14 @@ def test_load_json_in_struct(session):
363
369
)
364
370
), 7),
365
371
"""
366
- df = session .read_gbq (sql , index_col = "id" )
372
+ # TODO(b/401630655): JSON is not compatible with allow_large_results=False
373
+ df = session .read_gbq (sql , index_col = "id" ).to_pandas (allow_large_results = True )
367
374
368
375
assert isinstance (df .dtypes ["struct_col" ], pd .ArrowDtype )
369
376
assert isinstance (df .dtypes ["struct_col" ].pyarrow_dtype , pa .StructType )
370
377
371
378
data = df ["struct_col" ].struct .field ("data" )
372
- assert data . dtype == pd . ArrowDtype ( db_dtypes . JSONArrowType () )
379
+ assert dtypes . is_json_type ( data . dtype )
373
380
374
381
assert data [0 ] == '{"boolean":true}'
375
382
assert data [1 ] == '{"int":100}'
@@ -400,14 +407,15 @@ def test_load_json_in_array(session):
400
407
)
401
408
] AS array_col,
402
409
"""
403
- df = session .read_gbq (sql , index_col = "id" )
410
+ # TODO(b/401630655): JSON is not compatible with allow_large_results=False
411
+ df = session .read_gbq (sql , index_col = "id" ).to_pandas (allow_large_results = True )
404
412
405
413
assert isinstance (df .dtypes ["array_col" ], pd .ArrowDtype )
406
414
assert isinstance (df .dtypes ["array_col" ].pyarrow_dtype , pa .ListType )
407
415
408
416
data = df ["array_col" ].list
409
417
assert data .len ()[0 ] == 7
410
- assert data [0 ].dtype == pd . ArrowDtype ( db_dtypes . JSONArrowType () )
418
+ assert dtypes . is_json_type ( data [0 ].dtype )
411
419
412
420
assert data [0 ][0 ] == '{"boolean":true}'
413
421
assert data [1 ][0 ] == '{"int":100}'
0 commit comments