Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Variance adjusted UniFrac #20

Merged
merged 28 commits into from
Aug 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
79f8ab5
Test for NULL on malloc...
wasade Aug 15, 2017
96db81c
Test for file existence
wasade Aug 15, 2017
b82cb31
Cleaning up warning messages
wasade Aug 16, 2017
fe30abf
ENH: generalized unifrac
wasade Aug 16, 2017
7e8cd6a
Merge branch 'higher_stack_threading' into g_unifrac
wasade Aug 16, 2017
a99808c
Expose via q2
wasade Aug 16, 2017
23af923
TST: d^0 and d^0.5
wasade Aug 16, 2017
f007742
Merge branch 'master' of github.com:wasade/q2-state-unifrac into g_un…
wasade Aug 16, 2017
b82b7e5
Missed import
wasade Aug 16, 2017
a98bf69
Refactoring
wasade Aug 16, 2017
2c083f2
regression: task param boundaries were set wrong
wasade Aug 16, 2017
98ca65c
Merge branch 'g_unifrac' into vaw
wasade Aug 16, 2017
e3804bd
Refactor tasks to separate file
wasade Aug 16, 2017
034cc4c
VAW methods
wasade Aug 16, 2017
eb904a3
Citations
wasade Aug 16, 2017
a91850c
Expose VAW
wasade Aug 16, 2017
97232c6
travis c++ needs another include
wasade Aug 17, 2017
2b12261
syntax...
wasade Aug 17, 2017
0993406
wrong param name
wasade Aug 17, 2017
9425e6a
Merge branch 'master' of github.com:wasade/q2-state-unifrac into vaw
wasade Aug 17, 2017
be69604
Missing a merge conflict
wasade Aug 17, 2017
2d30c62
Fix param
wasade Aug 17, 2017
7b5c105
Merge branch 'master' of github.com:wasade/q2-state-unifrac into vaw
wasade Aug 18, 2017
b8e7997
Merge conflict error
wasade Aug 19, 2017
aa72ae8
Merge hell
wasade Aug 19, 2017
a9df5fc
Not sure
wasade Aug 19, 2017
619aeb9
Addressing @eldeveloper's comments
wasade Aug 21, 2017
6a4f1a6
Missed close of comment
wasade Aug 21, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions q2_state_unifrac/_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,61 +23,73 @@ def _sanity():
raise ValueError("ssu could not be located!")


def _run(table_fp, tree_fp, output_fp, threads, method):
def _run(table_fp, tree_fp, output_fp, threads, method,
variance_adjusted=False, alpha=None):
cmd = [resource_filename(*ARGS),
'-i', table_fp,
'-t', tree_fp,
'-o', output_fp,
'-n', threads,
'-m', method]

if variance_adjusted:
cmd.append('--vaw')

if alpha is not None:
cmd.append('-a')
cmd.append(str(alpha))

subprocess.run(cmd, check=True)


def unweighted(table: BIOMV210Format,
phylogeny: NewickFormat,
threads: int=1)-> skbio.DistanceMatrix:
threads: int=1,
variance_adjusted: bool=False)-> skbio.DistanceMatrix:
_sanity()

with tempfile.TemporaryDirectory() as tmp:
output_fp = os.path.join(tmp, 'foo.dm')
_run(str(table), str(phylogeny), output_fp, str(threads),
'unweighted')
'unweighted', variance_adjusted)
return skbio.DistanceMatrix.read(output_fp)


def weighted_normalized(table: BIOMV210Format,
phylogeny: NewickFormat,
threads: int=1)-> skbio.DistanceMatrix:
threads: int=1,
variance_adjusted: bool=False)-> skbio.DistanceMatrix:
_sanity()

with tempfile.TemporaryDirectory() as tmp:
output_fp = os.path.join(tmp, 'foo.dm')
_run(str(table), str(phylogeny), output_fp, str(threads),
'weighted_normalized')
'weighted_normalized', variance_adjusted)
return skbio.DistanceMatrix.read(output_fp)


def weighted_unnormalized(table: BIOMV210Format,
phylogeny: NewickFormat,
threads: int=1)-> skbio.DistanceMatrix:
threads: int=1,
variance_adjusted: bool=False) -> skbio.DistanceMatrix: # noqa
_sanity()

with tempfile.TemporaryDirectory() as tmp:
output_fp = os.path.join(tmp, 'foo.dm')
_run(str(table), str(phylogeny), output_fp, str(threads),
'weighted_unnormalized')
'weighted_unnormalized', variance_adjusted)
return skbio.DistanceMatrix.read(output_fp)


def generalized(table: BIOMV210Format,
phylogeny: NewickFormat,
threads: int=1,
alpha: float=1.0)-> skbio.DistanceMatrix:
alpha: float=1.0,
variance_adjusted: bool=False)-> skbio.DistanceMatrix:
_sanity()

with tempfile.TemporaryDirectory() as tmp:
output_fp = os.path.join(tmp, 'foo.dm')
_run(str(table), str(phylogeny), output_fp, str(threads),
'generalized')
'generalized', variance_adjusted, alpha)
return skbio.DistanceMatrix.read(output_fp)
30 changes: 23 additions & 7 deletions q2_state_unifrac/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from qiime2.plugin import (Plugin, Properties, Int, Float)
from qiime2.plugin import (Plugin, Properties, Int, Float, Bool)
from q2_types.feature_table import FeatureTable, Frequency
from q2_types.distance_matrix import DistanceMatrix
from q2_types.tree import Phylogeny, Rooted
Expand All @@ -28,8 +28,12 @@
function=q2_state_unifrac.unweighted,
inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'),
'phylogeny': Phylogeny[Rooted]},
parameters={'threads': Int},
parameter_descriptions={'threads': 'The number of threads to use.'},
parameters={'threads': Int,
'variance_adjusted': Bool},
parameter_descriptions={'threads': 'The number of threads to use.',
'variance_adjusted':
('Perform variance adjustment based on '
'Chang et al. BMC Bioinformatics 2011')},
input_descriptions={
'table': 'A rarefied FeatureTable',
'phylogeny': ('A rooted phylogeny which relates the observations in '
Expand All @@ -47,8 +51,12 @@
function=q2_state_unifrac.weighted_unnormalized,
inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'),
'phylogeny': Phylogeny[Rooted]},
parameters={'threads': Int},
parameter_descriptions={'threads': 'The number of threads to use.'},
parameters={'threads': Int,
'variance_adjusted': Bool},
parameter_descriptions={'threads': 'The number of threads to use.',
'variance_adjusted':
('Perform variance adjustment based on '
'Chang et al. BMC Bioinformatics 2011')},
input_descriptions={
'table': 'A rarefied FeatureTable',
'phylogeny': ('A rooted phylogeny which relates the observations in '
Expand All @@ -66,8 +74,12 @@
function=q2_state_unifrac.weighted_normalized,
inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'),
'phylogeny': Phylogeny[Rooted]},
parameters={'threads': Int},
parameter_descriptions={'threads': 'The number of threads to use.'},
parameters={'threads': Int,
'variance_adjusted': Bool},
parameter_descriptions={'threads': 'The number of threads to use.',
'variance_adjusted':
('Perform variance adjustment based on '
'Chang et al. BMC Bioinformatics 2011')},
input_descriptions={
'table': 'A rarefied FeatureTable',
'phylogeny': ('A rooted phylogeny which relates the observations in '
Expand All @@ -86,8 +98,12 @@
inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'),
'phylogeny': Phylogeny[Rooted]},
parameters={'threads': Int,
'variance_adjusted': Bool,
'alpha': Float},
parameter_descriptions={'threads': 'The number of threads to use.',
'variance_adjusted':
('Perform variance adjustment based on '
'Chang et al. BMC Bioinformatics 2011'),
'alpha': ('The value of alpha controls importance '
'of sample proportions. 1.0 is '
'weighted normalized UniFrac. 0.0 is '
Expand Down
8 changes: 4 additions & 4 deletions sucpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ else
endif


test: tree.o test_su.cpp biom.o unifrac.o
$(CXX) $(CPPFLAGS) -Wno-unused-parameter test_su.cpp -o test_su tree.o biom.o unifrac.o
test: tree.o test_su.cpp biom.o unifrac.o unifrac_task.o
$(CXX) $(CPPFLAGS) -Wno-unused-parameter test_su.cpp -o test_su tree.o biom.o unifrac.o unifrac_task.o

main: tree.o biom.o unifrac.o cmd.o
$(CXX) $(CPPFLAGS) su.cpp -o ssu tree.o biom.o unifrac.o cmd.o
main: tree.o biom.o unifrac.o cmd.o unifrac_task.o
$(CXX) $(CPPFLAGS) su.cpp -o ssu tree.o biom.o unifrac.o cmd.o unifrac_task.o

ocltest: opencl.o
clang++ -framework OpenCL opencl.cpp -c
Expand Down
50 changes: 36 additions & 14 deletions sucpp/su.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,26 @@
#include <thread>

void usage() {
std::cout << "usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-n threads] [-a alpha]" << std::endl;
std::cout << "usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-n threads] [-a alpha] [--vaw]" << std::endl;
std::cout << std::endl;
std::cout << " -i\tThe input BIOM table." << std::endl;
std::cout << " -t\tThe input phylogeny in newick." << std::endl;
std::cout << " -m\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized]." << std::endl;
std::cout << " -o\tThe output distance matrix." << std::endl;
std::cout << " -n\t[OPTIONAL] The number of threads, default is 1." << std::endl;
std::cout << " -a\t[OPTIONAL] Generalized UniFrac alpha, default is 1." << std::endl;
std::cout << " -i\t\tThe input BIOM table." << std::endl;
std::cout << " -t\t\tThe input phylogeny in newick." << std::endl;
std::cout << " -m\t\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized]." << std::endl;
std::cout << " -o\t\tThe output distance matrix." << std::endl;
std::cout << " -n\t\t[OPTIONAL] The number of threads, default is 1." << std::endl;
std::cout << " -a\t\t[OPTIONAL] Generalized UniFrac alpha, default is 1." << std::endl;
std::cout << " --vaw\t[OPTIONAL] Variance adjusted, default is to not adjust for variance." << std::endl;
std::cout << std::endl;
std::cout << "Citations: " << std::endl;
std::cout << " For UniFrac, please see:" << std::endl;
std::cout << " Lozupone and Knight Appl Environ Microbiol 2005; DOI: 10.1128/AEM.71.12.8228-8235.2005" << std::endl;
std::cout << " Lozupone et al. Appl Environ Microbiol 2007; DOI: 10.1128/AEM.01996-06" << std::endl;
std::cout << " Hamady et al. ISME 2010; DOI: 10.1038/ismej.2009.97" << std::endl;
std::cout << " Lozupone et al. ISME 2011; DOI: 10.1038/ismej.2010.133" << std::endl;
std::cout << " For Generalized UniFrac, please see: " << std::endl;
std::cout << " Chen et al. Bioinformatics 2012; DOI: 10.1093/bioinformatics/bts342" << std::endl;
std::cout << " For Variance Adjusted UniFrac, please see: " << std::endl;
std::cout << " Chang et al. BMC Bioinformatics 2011; DOI: 10.1186/1471-2105-12-118" << std::endl;
std::cout << std::endl;
}

Expand Down Expand Up @@ -97,6 +109,7 @@ int main(int argc, char **argv){
nthreads = atoi(nthreads_arg.c_str());
}

bool vaw = input.cmdOptionExists("--vaw");
double g_unifrac_alpha;
if(gunifrac_arg.empty()) {
g_unifrac_alpha = 1.0;
Expand Down Expand Up @@ -150,13 +163,22 @@ int main(int argc, char **argv){
tasks[threads.size() - 1].stop = end;

for(unsigned int tid = 0; tid < threads.size(); tid++) {
threads[tid] = std::thread(su::unifrac,
std::ref(table),
std::ref(tree_sheared),
method,
std::ref(dm_stripes),
std::ref(dm_stripes_total),
&tasks[tid]);
if(vaw)
threads[tid] = std::thread(su::unifrac_vaw,
std::ref(table),
std::ref(tree_sheared),
method,
std::ref(dm_stripes),
std::ref(dm_stripes_total),
&tasks[tid]);
else
threads[tid] = std::thread(su::unifrac,
std::ref(table),
std::ref(tree_sheared),
method,
std::ref(dm_stripes),
std::ref(dm_stripes_total),
&tasks[tid]);
}

for(unsigned int tid = 0; tid < threads.size(); tid++) {
Expand Down
49 changes: 46 additions & 3 deletions sucpp/test_su.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,48 @@ void test_generalized_unifrac() {
SUITE_END();
}

void test_vaw_unifrac_weighted_normalized() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor stylistic note, I checked in vim and this test case has a mix of tabs and spaces. Up to you if you want to fix. 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch

SUITE_START("test vaw weighted normalized unifrac");

std::vector<std::thread> threads(1);
su::BPTree tree = su::BPTree("(GG_OTU_1:1,(GG_OTU_2:1,GG_OTU_3:1):1,(GG_OTU_5:1,GG_OTU_4:1):1);");
su::biom table = su::biom("test.biom");

// as computed by GUniFrac, the original implementation of VAW-UniFrac
// could not be found.
// Sample1 Sample2 Sample3 Sample4 Sample5 Sample6
//Sample1 0.0000000 0.4086040 0.6240185 0.4639481 0.2857143 0.2766318
//Sample2 0.4086040 0.0000000 0.3798594 0.6884992 0.6807616 0.4735781
//Sample3 0.6240185 0.3798594 0.0000000 0.7713254 0.8812897 0.5047114
//Sample4 0.4639481 0.6884992 0.7713254 0.0000000 0.6666667 0.2709298
//Sample5 0.2857143 0.6807616 0.8812897 0.6666667 0.0000000 0.4735991
//Sample6 0.2766318 0.4735781 0.5047114 0.2709298 0.4735991 0.0000000
// weighted normalized unifrac as computed above

std::vector<double*> w_exp;
double w_stride1[] = {0.4086040, 0.3798594, 0.7713254, 0.6666667, 0.4735991, 0.2766318};
double w_stride2[] = {0.6240185, 0.6884992, 0.8812897, 0.2709298, 0.2857143, 0.4735781};
double w_stride3[] = {0.4639481, 0.6807616, 0.5047114, 0.4639481, 0.6807616, 0.5047114};
w_exp.push_back(w_stride1);
w_exp.push_back(w_stride2);
w_exp.push_back(w_stride3);
std::vector<double*> w_strides = su::make_strides(6);
std::vector<double*> w_strides_total = su::make_strides(6);
su::task_parameters w_task_p;
w_task_p.start = 0; w_task_p.stop = 3; w_task_p.tid = 0; w_task_p.n_samples = 6;
w_task_p.g_unifrac_alpha = 1.0;
su::unifrac_vaw(table, tree, su::weighted_normalized, w_strides, w_strides_total, &w_task_p);

for(unsigned int i = 0; i < 3; i++) {
for(unsigned int j = 0; j < 6; j++) {
ASSERT(fabs(w_strides[i][j] - w_exp[i][j]) < 0.000001);
}
free(w_strides[i]);
}
SUITE_END();
}


void test_make_strides() {
SUITE_START("test make stripes");
std::vector<double*> exp;
Expand Down Expand Up @@ -826,7 +868,7 @@ void test_bptree_shear_simple() {
su::BPTree tree = su::BPTree("((3:2,4:3,(6:5)5:4)2:1,7:6,((10:9,11:10)9:8)8:7)r");

// simple
std::unordered_set<std::string> to_keep = {"4", "6", "7", "10", "11"};
std::unordered_set<std::string> to_keep = {"4", "6", "7", "10", "11"};

uint32_t exp_nparens = 20;
std::vector<bool> exp_structure = {true, true, true, false, true, true, false, false, false, true,
Expand All @@ -848,7 +890,7 @@ void test_bptree_shear_deep() {
su::BPTree tree = su::BPTree("((3:2,4:3,(6:5)5:4)2:1,7:6,((10:9,11:10)9:8)8:7)r");

// deep
std::unordered_set<std::string> to_keep = {"10", "11"};
std::unordered_set<std::string> to_keep = {"10", "11"};

uint32_t exp_nparens = 10;
std::vector<bool> exp_structure = {true, true, true, true, false, true, false, false, false, false};
Expand Down Expand Up @@ -893,7 +935,7 @@ void test_bptree_collapse_edge() {
ASSERT(obs.names == exp.names);
ASSERT(vec_almost_equal(obs.lengths, exp.lengths));

SUITE_END();
SUITE_END();
}

void test_unifrac_sample_counts() {
Expand Down Expand Up @@ -945,6 +987,7 @@ int main(int argc, char** argv) {
test_unnormalized_weighted_unifrac();
test_normalized_weighted_unifrac();
test_generalized_unifrac();
test_vaw_unifrac_weighted_normalized();
test_unifrac_sample_counts();

printf("\n");
Expand Down
Loading