@@ -478,9 +478,14 @@ def test_concat_different_columns_per_modality_and_per_sample(run_component, sam
478
478
non_shared_features = data_sample1 .var_names .difference (data_sample2 .var_names )
479
479
assert concatenated_data .var .loc [non_shared_features , 'mod2:Feat4' ].isna ().all ()
480
480
481
- @pytest .mark .parametrize ("test_value,expected" , [("bar" , "bar" ), (True , True ), (0.1 , 0.1 ), (np .nan , pd .NA )])
481
+ @pytest .mark .parametrize ("test_value,test_value_dtype,expected" , [("bar" , "str" , "bar" ),
482
+ (True , pd .BooleanDtype (), True ),
483
+ (1 , pd .Int16Dtype (), 1 ),
484
+ (0.1 , float , 0.1 ),
485
+ (0.1 , np .float64 , 0.1 ),
486
+ (np .nan , np .float64 , pd .NA )])
482
487
def test_concat_remove_na (run_component , sample_1_h5mu , sample_2_h5mu ,
483
- write_mudata_to_file , random_h5mu_path , test_value , expected ,
488
+ write_mudata_to_file , random_h5mu_path , test_value , test_value_dtype , expected ,
484
489
change_column_contents ):
485
490
"""
486
491
Test concatenation of samples where the column from one sample contains NA values
@@ -492,7 +497,7 @@ def test_concat_remove_na(run_component, sample_1_h5mu, sample_2_h5mu,
492
497
"""
493
498
change_column_contents (sample_1_h5mu , 'var' , 'Shared_feat' , {'mod1' : np .nan , 'mod2' : np .nan })
494
499
change_column_contents (sample_2_h5mu , 'var' , 'Shared_feat' , {'mod1' : test_value , 'mod2' : np .nan })
495
-
500
+ sample_2_h5mu . var [ 'Shared_feat' ] = sample_2_h5mu . var [ 'Shared_feat' ]. astype ( test_value_dtype )
496
501
output_path = random_h5mu_path ()
497
502
498
503
run_component ([
@@ -547,9 +552,17 @@ def test_concat_invalid_h5_error_includes_path(run_component, tmp_path,
547
552
err .value .stdout .decode ('utf-8' ))
548
553
549
554
550
- @pytest .mark .parametrize ("test_value_1,test_value_2,expected" , [(1 , "1" , pd .CategoricalDtype (categories = ['1.0' , '1' ]))])
555
+ @pytest .mark .parametrize ("test_value_1,value_1_dtype,test_value_2,value_2_dtype,expected" ,
556
+ [(1 , float , "1" , str , pd .CategoricalDtype (categories = ['1.0' , '1' ])),
557
+ (1 , np .float64 , "1" , str , pd .CategoricalDtype (categories = ['1.0' , '1' ])),
558
+ (1 , pd .Int16Dtype (), 2.0 , pd .Int16Dtype (), pd .Int64Dtype ()),
559
+ (True , bool , False , bool , pd .BooleanDtype ()),
560
+ (True , pd .BooleanDtype (), False , bool , pd .BooleanDtype ()),
561
+ ("foo" , str , "bar" , str , pd .CategoricalDtype (categories = ['bar' , 'foo' ])),
562
+ ]
563
+ )
551
564
def test_concat_dtypes_per_modality (run_component , write_mudata_to_file , change_column_contents ,
552
- sample_1_h5mu , sample_2_h5mu , test_value_1 , test_value_2 ,
565
+ sample_1_h5mu , sample_2_h5mu , test_value_1 , value_1_dtype , test_value_2 , value_2_dtype ,
553
566
expected , random_h5mu_path ):
554
567
"""
555
568
Test joining column with different dtypes to make sure that they are writable.
@@ -561,7 +574,10 @@ def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_
561
574
for the test column in mod2 is still writable.
562
575
"""
563
576
change_column_contents (sample_1_h5mu , "var" , "test_col" , {"mod1" : test_value_1 , "mod2" : test_value_1 })
577
+ sample_1_h5mu .var ['test_col' ] = sample_1_h5mu .var ['test_col' ].astype (value_1_dtype )
564
578
change_column_contents (sample_2_h5mu , "var" , "test_col" , {"mod1" : test_value_2 , "mod2" : test_value_2 })
579
+ sample_2_h5mu .var ['test_col' ] = sample_2_h5mu .var ['test_col' ].astype (value_2_dtype )
580
+
565
581
output_file = random_h5mu_path ()
566
582
run_component ([
567
583
"--input_id" , "sample1;sample2" ,
@@ -573,6 +589,40 @@ def test_concat_dtypes_per_modality(run_component, write_mudata_to_file, change_
573
589
concatenated_data = md .read (output_file )
574
590
assert concatenated_data ['mod2' ].var ['test_col' ].dtype == expected
575
591
592
+
593
+ @pytest .mark .parametrize ("test_value,value_dtype,expected" ,
594
+ [(1 , float , pd .Int64Dtype ()),
595
+ (1 , np .float64 , pd .Int64Dtype ()),
596
+ (1 , pd .Int16Dtype (), pd .Int16Dtype ()),
597
+ (True , bool , pd .BooleanDtype ()),
598
+ (True , pd .BooleanDtype (), pd .BooleanDtype ()),
599
+ ("foo" , str , pd .CategoricalDtype (categories = ['foo' ])),
600
+ ]
601
+ )
602
+ def test_concat_dtypes_per_modality_multidim (run_component , write_mudata_to_file ,
603
+ sample_1_h5mu , sample_2_h5mu , test_value , value_dtype ,
604
+ expected , random_h5mu_path ):
605
+ """
606
+ Test if the result of concatenation is still writable when the input already contain
607
+ data in .varm and this data is kept. Because we are joining observations, the dtype of this
608
+ data may change and the result might not be writable anymore
609
+ """
610
+
611
+ sample_1_h5mu ['mod1' ].varm ['test_df' ] = pd .DataFrame (index = sample_1_h5mu ['mod1' ].var_names )
612
+ sample_1_h5mu ['mod1' ].varm ['test_df' ]['test_col' ] = test_value
613
+ sample_1_h5mu ['mod1' ].varm ['test_df' ]['test_col' ] = sample_1_h5mu ['mod1' ].varm ['test_df' ]['test_col' ].astype (value_dtype )
614
+
615
+ output_file = random_h5mu_path ()
616
+ run_component ([
617
+ "--input_id" , "sample1;sample2" ,
618
+ "--input" , write_mudata_to_file (sample_1_h5mu ),
619
+ "--input" , write_mudata_to_file (sample_2_h5mu ),
620
+ "--output" , output_file ,
621
+ "--other_axis_mode" , "move"
622
+ ])
623
+ concatenated_data = md .read (output_file )
624
+ assert concatenated_data ['mod1' ].varm ['test_df' ]['test_col' ].dtype == expected
625
+
576
626
@pytest .mark .parametrize ("test_value_1,test_value_2,expected" , [(1 , "1" , pd .CategoricalDtype (categories = ['1.0' , '1' ]))])
577
627
def test_concat_dtypes_global (run_component , write_mudata_to_file , change_column_contents ,
578
628
sample_1_h5mu , sample_2_h5mu , test_value_1 , test_value_2 ,
@@ -622,6 +672,8 @@ def test_non_overlapping_modalities(run_component, sample_2_h5mu, sample_3_h5mu,
622
672
"--output" , output_path ,
623
673
"--other_axis_mode" , "move"
624
674
])
675
+ output_data = md .read (output_path )
676
+ assert set (output_data .mod .keys ()) == {"mod1" , "mod2" , "mod3" }
625
677
626
678
627
679
def test_resolve_annotation_conflict_missing_column (run_component , sample_1_h5mu ,
0 commit comments