@@ -1282,6 +1282,66 @@ def analyzer_fn(inputs):
12821282 self .assertAnalyzerOutputs (
12831283 input_data , input_metadata , analyzer_fn , expected_outputs )
12841284
1285+ def testMaxWithSparseTensorReduceTrue (self ):
1286+
1287+ def analyzer_fn (inputs ):
1288+ return {'max' : tft .max (inputs ['sparse' ])}
1289+
1290+ input_data = [{
1291+ 'sparse' : ([0 , 1 ], [0. , 1. ])
1292+ }, {
1293+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1294+ }]
1295+ input_metadata = dataset_metadata .DatasetMetadata ({
1296+ 'sparse' :
1297+ sch .ColumnSchema (
1298+ tf .float32 , [4 ],
1299+ sch .SparseColumnRepresentation (
1300+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1301+ })
1302+ expected_outputs = {'max' : np .array (3. , np .float32 )}
1303+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1304+ expected_outputs )
1305+
1306+ @tft_unit .parameters (
1307+ (tf .int32 ,),
1308+ (tf .int64 ,),
1309+ (tf .float32 ,),
1310+ (tf .float64 ,),
1311+ )
1312+ def testMaxWithSparseTensorReduceFalse (self , input_dtype ):
1313+
1314+ def analyzer_fn (inputs ):
1315+ return {'max' : tft .max (inputs ['sparse' ], False )}
1316+
1317+ input_data = [{
1318+ 'sparse' : ([0 , 1 ], [- 1. , 1. ])
1319+ }, {
1320+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1321+ }]
1322+ input_metadata = dataset_metadata .DatasetMetadata ({
1323+ 'sparse' :
1324+ sch .ColumnSchema (
1325+ input_dtype , [4 ],
1326+ sch .SparseColumnRepresentation (
1327+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1328+ })
1329+ if input_dtype == tf .float32 or input_dtype == tf .float64 :
1330+ expected_outputs = {
1331+ 'max' :
1332+ np .array ([- 1. , 2. , float ('nan' ), 3. ], input_dtype .as_numpy_dtype )
1333+ }
1334+ else :
1335+ expected_outputs = {
1336+ 'max' :
1337+ np .array (
1338+ [- 1 , 2 , np .iinfo (input_dtype .as_numpy_dtype ).min , 3 ],
1339+ input_dtype .as_numpy_dtype )
1340+ }
1341+
1342+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1343+ expected_outputs )
1344+
12851345 def testNumericMeanWithSparseTensorReduceTrue (self ):
12861346
12871347 def analyzer_fn (inputs ):
@@ -1341,6 +1401,70 @@ def analyzer_fn(inputs):
13411401 self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
13421402 expected_outputs )
13431403
1404+ @tft_unit .parameters (
1405+ (tf .int32 ,),
1406+ (tf .int64 ,),
1407+ (tf .float32 ,),
1408+ (tf .float64 ,),
1409+ )
1410+ def testVarWithSparseTensorReduceInstanceDimsTrue (self , input_dtype ):
1411+
1412+ def analyzer_fn (inputs ):
1413+ return {'var' : tft .var (inputs ['sparse' ])}
1414+
1415+ input_data = [{
1416+ 'sparse' : ([0 , 1 ], [0. , 1. ])
1417+ }, {
1418+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1419+ }]
1420+ input_metadata = dataset_metadata .DatasetMetadata ({
1421+ 'sparse' :
1422+ sch .ColumnSchema (
1423+ input_dtype , [4 ],
1424+ sch .SparseColumnRepresentation (
1425+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1426+ })
1427+ if input_dtype == tf .float64 :
1428+ expected_outputs = {'var' : np .array (1.25 , np .float64 )}
1429+ else :
1430+ expected_outputs = {'var' : np .array (1.25 , np .float32 )}
1431+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1432+ expected_outputs )
1433+
1434+ @tft_unit .parameters (
1435+ (tf .int32 ,),
1436+ (tf .int64 ,),
1437+ (tf .float32 ,),
1438+ (tf .float64 ,),
1439+ )
1440+ def testVarWithSparseTensorReduceInstanceDimsFalse (self , input_dtype ):
1441+
1442+ def analyzer_fn (inputs ):
1443+ return {'var' : tft .var (inputs ['sparse' ], reduce_instance_dims = False )}
1444+
1445+ input_data = [{
1446+ 'sparse' : ([0 , 1 ], [0. , 1. ])
1447+ }, {
1448+ 'sparse' : ([1 , 3 ], [2. , 3. ])
1449+ }]
1450+ input_metadata = dataset_metadata .DatasetMetadata ({
1451+ 'sparse' :
1452+ sch .ColumnSchema (
1453+ input_dtype , [4 ],
1454+ sch .SparseColumnRepresentation (
1455+ 'val' , [sch .SparseIndexField ('idx' , False )]))
1456+ })
1457+ if input_dtype == tf .float64 :
1458+ expected_outputs = {
1459+ 'var' : np .array ([0. , .25 , float ('nan' ), 0. ], np .float64 )
1460+ }
1461+ else :
1462+ expected_outputs = {
1463+ 'var' : np .array ([0. , .25 , float ('nan' ), 0. ], np .float32 )
1464+ }
1465+ self .assertAnalyzerOutputs (input_data , input_metadata , analyzer_fn ,
1466+ expected_outputs )
1467+
13441468 def testNumericAnalyzersWithSparseInputs (self ):
13451469 def repeat (in_tensor , value ):
13461470 batch_size = tf .shape (in_tensor )[0 ]
@@ -1358,11 +1482,6 @@ def min_fn(inputs):
13581482 return {'min' : repeat (inputs ['a' ], tft .min (inputs ['a' ]))}
13591483 _ = input_dataset | beam_impl .AnalyzeDataset (min_fn )
13601484
1361- with self .assertRaises (TypeError ):
1362- def max_fn (inputs ):
1363- return {'max' : repeat (inputs ['a' ], tft .max (inputs ['a' ]))}
1364- _ = input_dataset | beam_impl .AnalyzeDataset (max_fn )
1365-
13661485 with self .assertRaises (TypeError ):
13671486 def sum_fn (inputs ):
13681487 return {'sum' : repeat (inputs ['a' ], tft .sum (inputs ['a' ]))}
@@ -1373,11 +1492,6 @@ def size_fn(inputs):
13731492 return {'size' : repeat (inputs ['a' ], tft .size (inputs ['a' ]))}
13741493 _ = input_dataset | beam_impl .AnalyzeDataset (size_fn )
13751494
1376- with self .assertRaises (TypeError ):
1377- def var_fn (inputs ):
1378- return {'var' : repeat (inputs ['a' ], tft .var (inputs ['a' ]))}
1379- _ = input_dataset | beam_impl .AnalyzeDataset (var_fn )
1380-
13811495 def testStringToTFIDF (self ):
13821496 def preprocessing_fn (inputs ):
13831497 inputs_as_ints = tft .compute_and_apply_vocabulary (
0 commit comments