Skip to content

Commit 6a9cca8

Browse files
authored
Merge pull request #153 from sfiligoi/h5unifrac_multi
Add h5unifrac_all method
2 parents 9fc61fc + 5898295 commit 6a9cca8

File tree

4 files changed

+84
-14
lines changed

4 files changed

+84
-14
lines changed

.github/workflows/main.yml

+7
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@ jobs:
114114
python -c "import h5py,skbio; dm = skbio.DistanceMatrix.read('ci/test.dm'); f_u=h5py.File('ci/test2.dm.h5','r'); dm_u=skbio.stats.distance.DistanceMatrix(f_u['matrix'][:,:],f_u['order'][:]); t=abs(dm_u.data-dm.data).max(); print(t); assert t < 0.1"
115115
python -c "import h5py; f_u=h5py.File('ci/test2.dm.h5','r'); print(f_u.keys()); assert len(f_u['stat_methods'][:]) == 1"
116116
python -c "import h5py; f_u=h5py.File('ci/test3.dm.h5','r'); print(f_u.keys()); assert len(f_u['pcoa_eigvals'][:]) == 2"
117+
# repeat using unifrac's h5 interfaces
118+
python -c "import unifrac; dm_u=unifrac.h5unifrac('ci/test.dm.h5'); dm_l=unifrac.h5unifrac_all('ci/test.dm.h5')"
119+
python -c "import unifrac,skbio; dm = skbio.DistanceMatrix.read('ci/test.dm'); dm_u=unifrac.h5unifrac('ci/test.dm.h5'); t=abs(dm_u.data-dm.data).max(); print(t); assert t < 0.1"
120+
python -c "import unifrac,skbio; dm = skbio.DistanceMatrix.read('ci/test.dm'); dm_u=unifrac.h5unifrac_all('ci/test.dm.h5')[0]; t=abs(dm_u.data-dm.data).max(); print(t); assert t < 0.1"
121+
python -c "import unifrac,skbio; dm = skbio.DistanceMatrix.read('ci/test.dm'); dm_u=unifrac.h5unifrac('ci/test2.dm.h5'); t=abs(dm_u.data-dm.data).max(); print(t); assert t < 0.1"
122+
python -c "import unifrac; st_l=unifrac.h5permanova_dict('ci/test2.dm.h5'); assert len(st_l) == 1"
123+
python -c "import unifrac; pc=unifrac.h5pcoa('ci/test3.dm.h5'); print(pc); assert len(pc.eigvals) == 2"
117124
if [[ "$(uname -s)" == "Linux" ]];
118125
then
119126
MD5=md5sum

README.md

+8-7
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,14 @@ The library can be accessed directly from within Python. If operating in this mo
140140
>>> import unifrac
141141
>>> dir(unifrac)
142142
['__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__',
143-
'__package__', '__path__', '__spec__', '__version__', '_api', '_meta', '_methods', 'set_random_seed', 'faith_pd',
144-
'generalized', 'generalized_fp32', 'generalized_fp32_to_file', 'generalized_fp64', 'generalized_fp64_to_file', 'generalized_to_file',
145-
'h5pcoa', 'h5pcoa_all', 'h5permanova', 'h5permanova_dict', 'h5unifrac', 'meta', 'pkg_resources', 'ssu', 'ssu_fast', 'ssu_inmem', 'ssu_to_file', 'ssu_to_file_v2',
146-
'unweighted', 'unweighted_fp32', 'unweighted_fp32_to_file', 'unweighted_fp64', 'unweighted_fp64_to_file', 'unweighted_to_file',
147-
'weighted_normalized', 'weighted_normalized_fp32', 'weighted_normalized_fp32_to_file', 'weighted_normalized_fp64', 'weighted_normalized_fp64_to_file',
148-
'weighted_normalized_to_file', 'weighted_unnormalized', 'weighted_unnormalized_fp32', 'weighted_unnormalized_fp32_to_file', 'weighted_unnormalized_fp64',
149-
'weighted_unnormalized_fp64_to_file', 'weighted_unnormalized_to_file']
143+
'__package__', '__path__', '__spec__', '__version__', '_api', '_meta', '_methods',
144+
'faith_pd', 'generalized', 'generalized_fp32', 'generalized_fp32_to_file', 'generalized_fp64', 'generalized_fp64_to_file', 'generalized_to_file',
145+
'h5pcoa', 'h5pcoa_all', 'h5permanova', 'h5permanova_dict', 'h5unifrac', 'h5unifrac_all', 'meta', 'pkg_resources',
146+
'set_random_seed', 'ssu', 'ssu_fast', 'ssu_inmem', 'ssu_to_file', 'ssu_to_file_v2', 'unweighted', 'unweighted_fp32',
147+
'unweighted_fp32_to_file', 'unweighted_fp64', 'unweighted_fp64_to_file', 'unweighted_to_file', 'weighted_normalized',
148+
'weighted_normalized_fp32', 'weighted_normalized_fp32_to_file', 'weighted_normalized_fp64', 'weighted_normalized_fp64_to_file',
149+
'weighted_normalized_to_file', 'weighted_unnormalized', 'weighted_unnormalized_fp32', 'weighted_unnormalized_fp32_to_file',
150+
'weighted_unnormalized_fp64', 'weighted_unnormalized_fp64_to_file', 'weighted_unnormalized_to_file']
150151
>>> print(unifrac.unweighted.__doc__)
151152
Compute Unweighted UniFrac
152153

unifrac/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
weighted_unnormalized_fp32_to_file,
3434
generalized_fp32_to_file,
3535
meta,
36-
h5unifrac,
36+
h5unifrac, h5unifrac_all,
3737
h5pcoa, h5pcoa_all,
3838
h5permanova, h5permanova_dict)
3939
from unifrac._api import ssu, ssu_fast, faith_pd, set_random_seed
@@ -57,7 +57,7 @@
5757
'weighted_normalized_fp32_to_file',
5858
'weighted_unnormalized_fp32_to_file',
5959
'generalized_fp32_to_file',
60-
'h5unifrac', 'h5pcoa', 'h5pcoa_all',
60+
'h5unifrac', 'h5unifrac_all', 'h5pcoa', 'h5pcoa_all',
6161
'h5permanova', 'h5permanova_dict',
6262
'ssu', 'ssu_fast', 'faith_pd',
6363
'ssu_to_file', 'ssu_to_file_v2',

unifrac/_methods.py

+67-5
Original file line numberDiff line numberDiff line change
@@ -2508,7 +2508,7 @@ def generalized_fp32_to_file(table: str,
25082508

25092509

25102510
def h5unifrac(h5file: str) -> skbio.DistanceMatrix:
2511-
"""Read UniFrac from a hdf5 file
2511+
"""Read UniFrac distance matrix from a hdf5 file
25122512
25132513
Parameters
25142514
----------
@@ -2538,13 +2538,68 @@ def h5unifrac(h5file: str) -> skbio.DistanceMatrix:
25382538
"""
25392539

25402540
with h5py.File(h5file, "r") as f_u:
2541-
dm = skbio.DistanceMatrix(
2541+
if 'matrix:0' in f_u.keys():
2542+
# multi format
2543+
dm = skbio.DistanceMatrix(
2544+
f_u['matrix:0'][:, :],
2545+
[c.decode('ascii') for c in f_u['order'][:]])
2546+
else:
2547+
# single format
2548+
dm = skbio.DistanceMatrix(
25422549
f_u['matrix'][:, :],
25432550
[c.decode('ascii') for c in f_u['order'][:]])
25442551

25452552
return dm
25462553

25472554

2555+
def h5unifrac_all(h5file: str) -> skbio.DistanceMatrix:
2556+
"""Read all UniFrac distance matrices from a hdf5 file
2557+
2558+
Parameters
2559+
----------
2560+
h5file : str
2561+
A filepath to a hdf5 file.
2562+
2563+
Returns
2564+
-------
2565+
tuple(skbio.DistanceMatrix)
2566+
The distance matrices.
2567+
2568+
Raises
2569+
------
2570+
OSError
2571+
If the hdf5 file is not found
2572+
KeyError
2573+
If the hdf5 does not have the necessary fields
2574+
2575+
References
2576+
----------
2577+
.. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for
2578+
comparing microbial communities. Appl. Environ. Microbiol. 71, 8228-8235
2579+
(2005).
2580+
.. [2] Chang, Q., Luan, Y. & Sun, F. Variance adjusted weighted UniFrac: a
2581+
powerful beta diversity measure for comparing communities based on
2582+
phylogeny. BMC Bioinformatics 12:118 (2011).
2583+
"""
2584+
2585+
with h5py.File(h5file, "r") as f_u:
2586+
order = [c.decode('ascii') for c in f_u['order'][:]]
2587+
if 'matrix' in f_u.keys():
2588+
# single format
2589+
dms = [skbio.DistanceMatrix(
2590+
f_u['matrix'][:, :], order)]
2591+
else:
2592+
# multi format
2593+
dms = []
2594+
i = 0
2595+
while 'matrix:%i' % i in f_u.keys():
2596+
dms.append(skbio.DistanceMatrix(
2597+
f_u['matrix:%i' % i][:, :], order))
2598+
i = i + 1
2599+
2600+
return dms
2601+
2602+
25482603
def _build_pcoa(f_u, long_method_name, order_index,
25492604
eigval_key, samples_key, prop_key):
25502605
axis_labels = ["PC%d" % i for i in
@@ -2597,9 +2652,16 @@ def h5pcoa(h5file: str) -> skbio.OrdinationResults:
25972652
order_index = [c.decode('ascii')
25982653
for c in f_u['order'][:]]
25992654

2600-
pc = _build_pcoa(f_u, long_method_name, order_index,
2601-
'pcoa_eigvals', 'pcoa_samples',
2602-
'pcoa_proportion_explained')
2655+
if 'pcoa_eigvals:0' in f_u.keys():
2656+
# multi interface
2657+
pc = _build_pcoa(f_u, long_method_name, order_index,
2658+
'pcoa_eigvals:0', 'pcoa_samples:0',
2659+
'pcoa_proportion_explained:0')
2660+
else:
2661+
# single interface
2662+
pc = _build_pcoa(f_u, long_method_name, order_index,
2663+
'pcoa_eigvals', 'pcoa_samples',
2664+
'pcoa_proportion_explained')
26032665

26042666
return pc
26052667

0 commit comments

Comments
 (0)