diff --git a/q2_state_unifrac/_methods.py b/q2_state_unifrac/_methods.py index dbe015b9..ca9d4ae9 100644 --- a/q2_state_unifrac/_methods.py +++ b/q2_state_unifrac/_methods.py @@ -23,7 +23,8 @@ def _sanity(): raise ValueError("ssu could not be located!") -def _run(table_fp, tree_fp, output_fp, threads, method): +def _run(table_fp, tree_fp, output_fp, threads, method, + variance_adjusted=False, alpha=None): cmd = [resource_filename(*ARGS), '-i', table_fp, '-t', tree_fp, @@ -31,53 +32,64 @@ def _run(table_fp, tree_fp, output_fp, threads, method): '-n', threads, '-m', method] + if variance_adjusted: + cmd.append('--vaw') + + if alpha is not None: + cmd.append('-a') + cmd.append(str(alpha)) + subprocess.run(cmd, check=True) def unweighted(table: BIOMV210Format, phylogeny: NewickFormat, - threads: int=1)-> skbio.DistanceMatrix: + threads: int=1, + variance_adjusted: bool=False)-> skbio.DistanceMatrix: _sanity() with tempfile.TemporaryDirectory() as tmp: output_fp = os.path.join(tmp, 'foo.dm') _run(str(table), str(phylogeny), output_fp, str(threads), - 'unweighted') + 'unweighted', variance_adjusted) return skbio.DistanceMatrix.read(output_fp) def weighted_normalized(table: BIOMV210Format, phylogeny: NewickFormat, - threads: int=1)-> skbio.DistanceMatrix: + threads: int=1, + variance_adjusted: bool=False)-> skbio.DistanceMatrix: _sanity() with tempfile.TemporaryDirectory() as tmp: output_fp = os.path.join(tmp, 'foo.dm') _run(str(table), str(phylogeny), output_fp, str(threads), - 'weighted_normalized') + 'weighted_normalized', variance_adjusted) return skbio.DistanceMatrix.read(output_fp) def weighted_unnormalized(table: BIOMV210Format, phylogeny: NewickFormat, - threads: int=1)-> skbio.DistanceMatrix: + threads: int=1, + variance_adjusted: bool=False) -> skbio.DistanceMatrix: # noqa _sanity() with tempfile.TemporaryDirectory() as tmp: output_fp = os.path.join(tmp, 'foo.dm') _run(str(table), str(phylogeny), output_fp, str(threads), - 'weighted_unnormalized') + 'weighted_unnormalized', variance_adjusted) return skbio.DistanceMatrix.read(output_fp) def generalized(table: BIOMV210Format, phylogeny: NewickFormat, threads: int=1, - alpha: float=1.0)-> skbio.DistanceMatrix: + alpha: float=1.0, + variance_adjusted: bool=False)-> skbio.DistanceMatrix: _sanity() with tempfile.TemporaryDirectory() as tmp: output_fp = os.path.join(tmp, 'foo.dm') _run(str(table), str(phylogeny), output_fp, str(threads), - 'generalized') + 'generalized', variance_adjusted, alpha) return skbio.DistanceMatrix.read(output_fp) diff --git a/q2_state_unifrac/plugin_setup.py b/q2_state_unifrac/plugin_setup.py index caee7c6b..302b395e 100644 --- a/q2_state_unifrac/plugin_setup.py +++ b/q2_state_unifrac/plugin_setup.py @@ -6,7 +6,7 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -from qiime2.plugin import (Plugin, Properties, Int, Float) +from qiime2.plugin import (Plugin, Properties, Int, Float, Bool) from q2_types.feature_table import FeatureTable, Frequency from q2_types.distance_matrix import DistanceMatrix from q2_types.tree import Phylogeny, Rooted @@ -28,8 +28,12 @@ function=q2_state_unifrac.unweighted, inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'), 'phylogeny': Phylogeny[Rooted]}, - parameters={'threads': Int}, - parameter_descriptions={'threads': 'The number of threads to use.'}, + parameters={'threads': Int, + 'variance_adjusted': Bool}, + parameter_descriptions={'threads': 'The number of threads to use.', + 'variance_adjusted': + ('Perform variance adjustment based on ' + 'Chang et al. BMC Bioinformatics 2011')}, input_descriptions={ 'table': 'A rarefied FeatureTable', 'phylogeny': ('A rooted phylogeny which relates the observations in ' @@ -47,8 +51,12 @@ function=q2_state_unifrac.weighted_unnormalized, inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'), 'phylogeny': Phylogeny[Rooted]}, - parameters={'threads': Int}, - parameter_descriptions={'threads': 'The number of threads to use.'}, + parameters={'threads': Int, + 'variance_adjusted': Bool}, + parameter_descriptions={'threads': 'The number of threads to use.', + 'variance_adjusted': + ('Perform variance adjustment based on ' + 'Chang et al. BMC Bioinformatics 2011')}, input_descriptions={ 'table': 'A rarefied FeatureTable', 'phylogeny': ('A rooted phylogeny which relates the observations in ' @@ -66,8 +74,12 @@ function=q2_state_unifrac.weighted_normalized, inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'), 'phylogeny': Phylogeny[Rooted]}, - parameters={'threads': Int}, - parameter_descriptions={'threads': 'The number of threads to use.'}, + parameters={'threads': Int, + 'variance_adjusted': Bool}, + parameter_descriptions={'threads': 'The number of threads to use.', + 'variance_adjusted': + ('Perform variance adjustment based on ' + 'Chang et al. BMC Bioinformatics 2011')}, input_descriptions={ 'table': 'A rarefied FeatureTable', 'phylogeny': ('A rooted phylogeny which relates the observations in ' @@ -86,8 +98,12 @@ inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'), 'phylogeny': Phylogeny[Rooted]}, parameters={'threads': Int, + 'variance_adjusted': Bool, 'alpha': Float}, parameter_descriptions={'threads': 'The number of threads to use.', + 'variance_adjusted': + ('Perform variance adjustment based on ' + 'Chang et al. BMC Bioinformatics 2011'), 'alpha': ('The value of alpha controls importance ' 'of sample proportions. 1.0 is ' 'weighted normalized UniFrac. 0.0 is ' diff --git a/sucpp/Makefile b/sucpp/Makefile index a5be99c5..2cd5ad8d 100644 --- a/sucpp/Makefile +++ b/sucpp/Makefile @@ -29,11 +29,11 @@ else endif -test: tree.o test_su.cpp biom.o unifrac.o - $(CXX) $(CPPFLAGS) -Wno-unused-parameter test_su.cpp -o test_su tree.o biom.o unifrac.o +test: tree.o test_su.cpp biom.o unifrac.o unifrac_task.o + $(CXX) $(CPPFLAGS) -Wno-unused-parameter test_su.cpp -o test_su tree.o biom.o unifrac.o unifrac_task.o -main: tree.o biom.o unifrac.o cmd.o - $(CXX) $(CPPFLAGS) su.cpp -o ssu tree.o biom.o unifrac.o cmd.o +main: tree.o biom.o unifrac.o cmd.o unifrac_task.o + $(CXX) $(CPPFLAGS) su.cpp -o ssu tree.o biom.o unifrac.o cmd.o unifrac_task.o ocltest: opencl.o clang++ -framework OpenCL opencl.cpp -c diff --git a/sucpp/su.cpp b/sucpp/su.cpp index f59edd8d..9a5c8bfc 100644 --- a/sucpp/su.cpp +++ b/sucpp/su.cpp @@ -9,14 +9,26 @@ #include <thread> void usage() { - std::cout << "usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-n threads] [-a alpha]" << std::endl; + std::cout << "usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-n threads] [-a alpha] [--vaw]" << std::endl; std::cout << std::endl; - std::cout << " -i\tThe input BIOM table." << std::endl; - std::cout << " -t\tThe input phylogeny in newick." << std::endl; - std::cout << " -m\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized]." << std::endl; - std::cout << " -o\tThe output distance matrix." << std::endl; - std::cout << " -n\t[OPTIONAL] The number of threads, default is 1." << std::endl; - std::cout << " -a\t[OPTIONAL] Generalized UniFrac alpha, default is 1." << std::endl; + std::cout << " -i\t\tThe input BIOM table." << std::endl; + std::cout << " -t\t\tThe input phylogeny in newick." << std::endl; + std::cout << " -m\t\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized]." << std::endl; + std::cout << " -o\t\tThe output distance matrix." << std::endl; + std::cout << " -n\t\t[OPTIONAL] The number of threads, default is 1." << std::endl; + std::cout << " -a\t\t[OPTIONAL] Generalized UniFrac alpha, default is 1." << std::endl; + std::cout << " --vaw\t[OPTIONAL] Variance adjusted, default is to not adjust for variance." << std::endl; + std::cout << std::endl; + std::cout << "Citations: " << std::endl; + std::cout << " For UniFrac, please see:" << std::endl; + std::cout << " Lozupone and Knight Appl Environ Microbiol 2005; DOI: 10.1128/AEM.71.12.8228-8235.2005" << std::endl; + std::cout << " Lozupone et al. Appl Environ Microbiol 2007; DOI: 10.1128/AEM.01996-06" << std::endl; + std::cout << " Hamady et al. ISME 2010; DOI: 10.1038/ismej.2009.97" << std::endl; + std::cout << " Lozupone et al. ISME 2011; DOI: 10.1038/ismej.2010.133" << std::endl; + std::cout << " For Generalized UniFrac, please see: " << std::endl; + std::cout << " Chen et al. Bioinformatics 2012; DOI: 10.1093/bioinformatics/bts342" << std::endl; + std::cout << " For Variance Adjusted UniFrac, please see: " << std::endl; + std::cout << " Chang et al. BMC Bioinformatics 2011; DOI: 10.1186/1471-2105-12-118" << std::endl; std::cout << std::endl; } @@ -97,6 +109,7 @@ int main(int argc, char **argv){ nthreads = atoi(nthreads_arg.c_str()); } + bool vaw = input.cmdOptionExists("--vaw"); double g_unifrac_alpha; if(gunifrac_arg.empty()) { g_unifrac_alpha = 1.0; @@ -150,13 +163,22 @@ int main(int argc, char **argv){ tasks[threads.size() - 1].stop = end; for(unsigned int tid = 0; tid < threads.size(); tid++) { - threads[tid] = std::thread(su::unifrac, - std::ref(table), - std::ref(tree_sheared), - method, - std::ref(dm_stripes), - std::ref(dm_stripes_total), - &tasks[tid]); + if(vaw) + threads[tid] = std::thread(su::unifrac_vaw, + std::ref(table), + std::ref(tree_sheared), + method, + std::ref(dm_stripes), + std::ref(dm_stripes_total), + &tasks[tid]); + else + threads[tid] = std::thread(su::unifrac, + std::ref(table), + std::ref(tree_sheared), + method, + std::ref(dm_stripes), + std::ref(dm_stripes_total), + &tasks[tid]); } for(unsigned int tid = 0; tid < threads.size(); tid++) { diff --git a/sucpp/test_su.cpp b/sucpp/test_su.cpp index 65055bfa..446c89ff 100644 --- a/sucpp/test_su.cpp +++ b/sucpp/test_su.cpp @@ -743,6 +743,48 @@ void test_generalized_unifrac() { SUITE_END(); } +void test_vaw_unifrac_weighted_normalized() { + SUITE_START("test vaw weighted normalized unifrac"); + + std::vector<std::thread> threads(1); + su::BPTree tree = su::BPTree("(GG_OTU_1:1,(GG_OTU_2:1,GG_OTU_3:1):1,(GG_OTU_5:1,GG_OTU_4:1):1);"); + su::biom table = su::biom("test.biom"); + + // as computed by GUniFrac, the original implementation of VAW-UniFrac + // could not be found. + // Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 + //Sample1 0.0000000 0.4086040 0.6240185 0.4639481 0.2857143 0.2766318 + //Sample2 0.4086040 0.0000000 0.3798594 0.6884992 0.6807616 0.4735781 + //Sample3 0.6240185 0.3798594 0.0000000 0.7713254 0.8812897 0.5047114 + //Sample4 0.4639481 0.6884992 0.7713254 0.0000000 0.6666667 0.2709298 + //Sample5 0.2857143 0.6807616 0.8812897 0.6666667 0.0000000 0.4735991 + //Sample6 0.2766318 0.4735781 0.5047114 0.2709298 0.4735991 0.0000000 + // weighted normalized unifrac as computed above + + std::vector<double*> w_exp; + double w_stride1[] = {0.4086040, 0.3798594, 0.7713254, 0.6666667, 0.4735991, 0.2766318}; + double w_stride2[] = {0.6240185, 0.6884992, 0.8812897, 0.2709298, 0.2857143, 0.4735781}; + double w_stride3[] = {0.4639481, 0.6807616, 0.5047114, 0.4639481, 0.6807616, 0.5047114}; + w_exp.push_back(w_stride1); + w_exp.push_back(w_stride2); + w_exp.push_back(w_stride3); + std::vector<double*> w_strides = su::make_strides(6); + std::vector<double*> w_strides_total = su::make_strides(6); + su::task_parameters w_task_p; + w_task_p.start = 0; w_task_p.stop = 3; w_task_p.tid = 0; w_task_p.n_samples = 6; + w_task_p.g_unifrac_alpha = 1.0; + su::unifrac_vaw(table, tree, su::weighted_normalized, w_strides, w_strides_total, &w_task_p); + + for(unsigned int i = 0; i < 3; i++) { + for(unsigned int j = 0; j < 6; j++) { + ASSERT(fabs(w_strides[i][j] - w_exp[i][j]) < 0.000001); + } + free(w_strides[i]); + } + SUITE_END(); +} + + void test_make_strides() { SUITE_START("test make stripes"); std::vector<double*> exp; @@ -826,7 +868,7 @@ void test_bptree_shear_simple() { su::BPTree tree = su::BPTree("((3:2,4:3,(6:5)5:4)2:1,7:6,((10:9,11:10)9:8)8:7)r"); // simple - std::unordered_set<std::string> to_keep = {"4", "6", "7", "10", "11"}; + std::unordered_set<std::string> to_keep = {"4", "6", "7", "10", "11"}; uint32_t exp_nparens = 20; std::vector<bool> exp_structure = {true, true, true, false, true, true, false, false, false, true, @@ -848,7 +890,7 @@ void test_bptree_shear_deep() { su::BPTree tree = su::BPTree("((3:2,4:3,(6:5)5:4)2:1,7:6,((10:9,11:10)9:8)8:7)r"); // deep - std::unordered_set<std::string> to_keep = {"10", "11"}; + std::unordered_set<std::string> to_keep = {"10", "11"}; uint32_t exp_nparens = 10; std::vector<bool> exp_structure = {true, true, true, true, false, true, false, false, false, false}; @@ -893,7 +935,7 @@ void test_bptree_collapse_edge() { ASSERT(obs.names == exp.names); ASSERT(vec_almost_equal(obs.lengths, exp.lengths)); - SUITE_END(); + SUITE_END(); } void test_unifrac_sample_counts() { @@ -945,6 +987,7 @@ int main(int argc, char** argv) { test_unnormalized_weighted_unifrac(); test_normalized_weighted_unifrac(); test_generalized_unifrac(); + test_vaw_unifrac_weighted_normalized(); test_unifrac_sample_counts(); printf("\n"); diff --git a/sucpp/unifrac.cpp b/sucpp/unifrac.cpp index 74619a3b..528647d9 100644 --- a/sucpp/unifrac.cpp +++ b/sucpp/unifrac.cpp @@ -3,7 +3,6 @@ #include "unifrac.hpp" #include <unordered_map> #include <cstdlib> -#include <math.h> #include <algorithm> #include <thread> @@ -52,11 +51,12 @@ double* PropStack::pop(uint32_t node) { * add it to our record of known vectors so we can track our mallocs */ double *vec; + int err = 0; if(prop_stack.empty()) { - posix_memalign((void **)&vec, 32, sizeof(double) * defaultsize); - if(vec == NULL) { - fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", - sizeof(double) * defaultsize, __FILE__, __LINE__); + err = posix_memalign((void **)&vec, 32, sizeof(double) * defaultsize); + if(vec == NULL || err != 0) { + fprintf(stderr, "Failed to allocate %zd bytes, err %d; [%s]:%d\n", + sizeof(double) * defaultsize, err, __FILE__, __LINE__); exit(EXIT_FAILURE); } } @@ -307,10 +307,10 @@ void _unweighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, void progressbar(float progress) { - // from http://stackoverflow.com/a/14539953 + // from http://stackoverflow.com/a/14539953 // // could encapsulate into a classs for displaying time elapsed etc - int barWidth = 70; + int barWidth = 70; std::cout << "["; int pos = barWidth * progress; for (int i = 0; i < barWidth; ++i) { @@ -322,6 +322,59 @@ void progressbar(float progress) { std::cout.flush(); } +void initialize_embedded(double*& prop, const su::task_parameters* task_p) { + int err = 0; + err = posix_memalign((void **)&prop, 32, sizeof(double) * task_p->n_samples * 2); + if(prop == NULL || err != 0) { + fprintf(stderr, "Failed to allocate %zd bytes, err %d; [%s]:%d\n", + sizeof(double) * task_p->n_samples * 2, err, __FILE__, __LINE__); + exit(EXIT_FAILURE); + } +} + +void initialize_sample_counts(double*& counts, const su::task_parameters* task_p, biom &table) { + int err = 0; + err = posix_memalign((void **)&counts, 32, sizeof(double) * task_p->n_samples * 2); + if(counts == NULL || err != 0) { + fprintf(stderr, "Failed to allocate %zd bytes, err %d; [%s]:%d\n", + sizeof(double) * task_p->n_samples, err, __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + for(unsigned int i = 0; i < table.n_samples; i++) { + counts[i] = table.sample_counts[i]; + counts[i + table.n_samples] = table.sample_counts[i]; + } +} + +void initialize_stripes(std::vector<double*> &dm_stripes, + std::vector<double*> &dm_stripes_total, + Method unifrac_method, + const su::task_parameters* task_p) { + int err = 0; + for(unsigned int i = task_p->start; i < task_p->stop; i++){ + err = posix_memalign((void **)&dm_stripes[i], 32, sizeof(double) * task_p->n_samples); + if(dm_stripes[i] == NULL || err != 0) { + fprintf(stderr, "Failed to allocate %zd bytes, err %d; [%s]:%d\n", + sizeof(double) * task_p->n_samples, err, __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + for(unsigned int j = 0; j < task_p->n_samples; j++) + dm_stripes[i][j] = 0.; + + if(unifrac_method == unweighted || unifrac_method == weighted_normalized) { + err = posix_memalign((void **)&dm_stripes_total[i], 32, sizeof(double) * task_p->n_samples); + if(dm_stripes_total[i] == NULL || err != 0) { + fprintf(stderr, "Failed to allocate %zd bytes err %d; [%s]:%d\n", + sizeof(double) * task_p->n_samples, err, __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + for(unsigned int j = 0; j < task_p->n_samples; j++) + dm_stripes_total[i][j] = 0.; + } + } +} + + void su::unifrac(biom &table, BPTree &tree, Method unifrac_method, @@ -342,53 +395,36 @@ void su::unifrac(biom &table, switch(unifrac_method) { case unweighted: - func = &_unweighted_unifrac_task; + func = &su::_unweighted_unifrac_task; break; case weighted_normalized: - func = &_normalized_weighted_unifrac_task; + func = &su::_normalized_weighted_unifrac_task; break; case weighted_unnormalized: - func = &_unnormalized_weighted_unifrac_task; + func = &su::_unnormalized_weighted_unifrac_task; break; case generalized: - func = &_generalized_unifrac_task; + func = &su::_generalized_unifrac_task; + break; + default: + func = NULL; break; } + + if(func == NULL) { + fprintf(stderr, "Unknown unifrac task\n"); + exit(1); + } + PropStack propstack(table.n_samples); uint32_t node; double *node_proportions; double *embedded_proportions; double length; - posix_memalign((void **)&embedded_proportions, 32, sizeof(double) * table.n_samples * 2); - if(embedded_proportions == NULL) { - fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", - sizeof(double) * table.n_samples, __FILE__, __LINE__); - exit(EXIT_FAILURE); - } - // thread local memory - for(unsigned int i = task_p->start; i < task_p->stop; i++){ - posix_memalign((void **)&dm_stripes[i], 32, sizeof(double) * table.n_samples); - if(dm_stripes[i] == NULL) { - fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", - sizeof(double) * table.n_samples, __FILE__, __LINE__); - exit(EXIT_FAILURE); - } - for(unsigned int j = 0; j < table.n_samples; j++) - dm_stripes[i][j] = 0.; - - if(unifrac_method == unweighted || unifrac_method == weighted_normalized) { - posix_memalign((void **)&dm_stripes_total[i], 32, sizeof(double) * table.n_samples); - if(dm_stripes_total[i] == NULL) { - fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", - sizeof(double) * table.n_samples, __FILE__, __LINE__); - exit(EXIT_FAILURE); - } - for(unsigned int j = 0; j < table.n_samples; j++) - dm_stripes_total[i][j] = 0.; - } - } + initialize_embedded(embedded_proportions, task_p); + initialize_stripes(std::ref(dm_stripes), std::ref(dm_stripes_total), unifrac_method, task_p); // - 1 to avoid root for(unsigned int k = 0; k < (tree.nparens / 2) - 1; k++) { @@ -447,7 +483,7 @@ void su::unifrac(biom &table, // should make this compile-time support //if((tid == 0) && ((k % 1000) == 0)) - // progressbar((float)k / (float)(tree.nparens / 2)); + // progressbar((float)k / (float)(tree.nparens / 2)); } if(unifrac_method == weighted_normalized || unifrac_method == unweighted || unifrac_method == generalized) { @@ -461,15 +497,105 @@ void su::unifrac(biom &table, free(embedded_proportions); } +void su::unifrac_vaw(biom &table, + BPTree &tree, + Method unifrac_method, + std::vector<double*> &dm_stripes, + std::vector<double*> &dm_stripes_total, + const su::task_parameters* task_p) { + + if(table.n_samples != task_p->n_samples) { + fprintf(stderr, "Task and table n_samples not equal\n"); + exit(EXIT_FAILURE); + } + + void (*func)(std::vector<double*>&, // dm_stripes + std::vector<double*>&, // dm_stripes_total + double*, // embedded_proportions + double*, // embedded_counts + double*, // sample total counts + double, // length + const su::task_parameters*); + + switch(unifrac_method) { + case unweighted: + func = &su::_vaw_unweighted_unifrac_task; + break; + case weighted_normalized: + func = &su::_vaw_normalized_weighted_unifrac_task; + break; + case weighted_unnormalized: + func = &su::_vaw_unnormalized_weighted_unifrac_task; + break; + case generalized: + func = &su::_vaw_generalized_unifrac_task; + break; + default: + func = NULL; + break; + } + if(func == NULL) { + fprintf(stderr, "Unknown unifrac task\n"); + exit(1); + } + PropStack propstack(table.n_samples); + PropStack countstack(table.n_samples); + + uint32_t node; + double *node_proportions; + double *node_counts; + double *embedded_proportions; + double *embedded_counts; + double *sample_total_counts; + double length; + + initialize_embedded(embedded_proportions, task_p); + initialize_embedded(embedded_counts, task_p); + initialize_sample_counts(sample_total_counts, task_p, table); + initialize_stripes(std::ref(dm_stripes), std::ref(dm_stripes_total), unifrac_method, task_p); + + // - 1 to avoid root + for(unsigned int k = 0; k < (tree.nparens / 2) - 1; k++) { + node = tree.postorderselect(k); + length = tree.lengths[node]; + + node_proportions = propstack.pop(node); + node_counts = countstack.pop(node); + + set_proportions(node_proportions, tree, node, table, propstack); + set_proportions(node_counts, tree, node, table, countstack, false); + + embed_proportions(embedded_proportions, node_proportions, task_p->n_samples); + embed_proportions(embedded_counts, node_counts, task_p->n_samples); + + func(dm_stripes, dm_stripes_total, embedded_proportions, embedded_counts, sample_total_counts, length, task_p); + } + + if(unifrac_method == weighted_normalized || unifrac_method == unweighted || unifrac_method == generalized) { + for(unsigned int i = task_p->start; i < task_p->stop; i++) { + for(unsigned int j = 0; j < task_p->n_samples; j++) { + dm_stripes[i][j] = dm_stripes[i][j] / dm_stripes_total[i][j]; + } + } + } + + free(embedded_proportions); + free(embedded_counts); + free(sample_total_counts); +} void su::set_proportions(double* props, BPTree &tree, uint32_t node, biom &table, - PropStack &ps) { + PropStack &ps, + bool normalize) { if(tree.isleaf(node)) { table.get_obs_data(tree.names[node], props); - for(unsigned int i = 0; i < table.n_samples; i++) - props[i] = props[i] / table.sample_counts[i]; + for(unsigned int i = 0; i < table.n_samples; i++) { + props[i] = props[i]; + if(normalize) + props[i] /= table.sample_counts[i]; + } } else { unsigned int current = tree.leftchild(node); @@ -495,12 +621,13 @@ std::vector<double*> su::make_strides(unsigned int n_samples) { uint32_t n_rotations = (n_samples + 1) / 2; std::vector<double*> dm_stripes(n_rotations); + int err = 0; for(unsigned int i = 0; i < n_rotations; i++) { double* tmp; - posix_memalign((void **)&tmp, 32, sizeof(double) * n_samples); - if(tmp == NULL) { - fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", - sizeof(double) * n_samples, __FILE__, __LINE__); + err = posix_memalign((void **)&tmp, 32, sizeof(double) * n_samples); + if(tmp == NULL || err != 0) { + fprintf(stderr, "Failed to allocate %zd bytes, err %d; [%s]:%d\n", + sizeof(double) * n_samples, err, __FILE__, __LINE__); exit(EXIT_FAILURE); } for(unsigned int j = 0; j < n_samples; j++) diff --git a/sucpp/unifrac.hpp b/sucpp/unifrac.hpp index baa1fa84..d5403cf3 100644 --- a/sucpp/unifrac.hpp +++ b/sucpp/unifrac.hpp @@ -2,18 +2,11 @@ #include <vector> #include <unordered_map> #include <thread> +#include "unifrac_task.hpp" namespace su { enum Method {unweighted, weighted_normalized, weighted_unnormalized, generalized}; - struct task_parameters { - uint32_t n_samples; // number of samples - unsigned int start; // starting stripe - unsigned int stop; // stopping stripe - unsigned int tid; // thread ID - double g_unifrac_alpha; // generalized unifrac alpha - }; - class PropStack { private: std::stack<double*> prop_stack; @@ -34,11 +27,19 @@ namespace su { std::vector<double*> &dm_stripes_total, const task_parameters* task_p); + void unifrac_vaw(biom &table, + BPTree &tree, + Method unifrac_method, + std::vector<double*> &dm_stripes, + std::vector<double*> &dm_stripes_total, + const task_parameters* task_p); + double** deconvolute_stripes(std::vector<double*> &stripes, uint32_t n); void set_proportions(double* props, BPTree &tree, uint32_t node, biom &table, - PropStack &ps); + PropStack &ps, + bool normalize = true); std::vector<double*> make_strides(unsigned int n_samples); inline void embed_proportions(double* out, double* in, uint32_t n) { double val; diff --git a/sucpp/unifrac_task.cpp b/sucpp/unifrac_task.cpp new file mode 100644 index 00000000..d84a60b4 --- /dev/null +++ b/sucpp/unifrac_task.cpp @@ -0,0 +1,292 @@ +#include "unifrac_task.hpp" + + +void su::_unnormalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + for(unsigned int stripe=task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + + /* intrinsics yield about a 2x reduction in runtime on llvm. they + * were not effective on linux gcc 4.9.1 or 4.9.2. it is unclear + * if they would be effective on other versions of gcc. + * + * one reason they help is that these for loops are not easily + * autovectorizable. using the intrinsics effectively gets around + * this. ...although, it also appears that loop unrolling works. + * + * it may make sense to revisit the inclusion of intriniscs, however + * support must be tested at compile time, so it's rather annoying + * at the moment. basically, we can't assume the presence of avx2. + */ + for(unsigned int j = 0; j < task_p->n_samples / 4; j++) { + int k = j * 4; + double u1 = embedded_proportions[k]; + double u2 = embedded_proportions[k + 1]; + double u3 = embedded_proportions[k + 2]; + double u4 = embedded_proportions[k + 3]; + + double v1 = embedded_proportions[k + stripe + 1]; + double v2 = embedded_proportions[k + stripe + 2]; + double v3 = embedded_proportions[k + stripe + 3]; + double v4 = embedded_proportions[k + stripe + 4]; + + dm_stripe[k] += fabs(u1 - v1) * length; + dm_stripe[k + 1] += fabs(u2 - v2) * length; + dm_stripe[k + 2] += fabs(u3 - v3) * length; + dm_stripe[k + 3] += fabs(u4 - v4) * length; + } + + if((task_p->n_samples % 4) != 0) { + for(unsigned int k = task_p->n_samples - (task_p->n_samples % 4); k < task_p->n_samples; k++) { + double u = embedded_proportions[k]; + double v = embedded_proportions[k + stripe + 1]; + + dm_stripe[k] += fabs(u - v) * length; + } + } + } +} + +void su::_vaw_unnormalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + for(unsigned int stripe=task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples; j++) { + double u = embedded_proportions[j]; + double v = embedded_proportions[j + stripe + 1]; + + double m = sample_total_counts[j] + sample_total_counts[j + stripe + 1]; + double mi = embedded_counts[j] + embedded_counts[j + stripe + 1]; + double vaw = sqrt(mi * (m - mi)); + + if(vaw > 0) + dm_stripe[j] += (fabs(u - v) * length) / vaw; + } + } +} +void su::_normalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + double *dm_stripe_total; + + // point of thread + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + dm_stripe_total = dm_stripes_total[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples / 4; j++) { + int k = j * 4; + double u1 = embedded_proportions[k]; + double u2 = embedded_proportions[k + 1]; + double u3 = embedded_proportions[k + 2]; + double u4 = embedded_proportions[k + 3]; + + double v1 = embedded_proportions[k + stripe + 1]; + double v2 = embedded_proportions[k + stripe + 2]; + double v3 = embedded_proportions[k + stripe + 3]; + double v4 = embedded_proportions[k + stripe + 4]; + + dm_stripe[k] += fabs(u1 - v1) * length; + dm_stripe[k + 1] += fabs(u2 - v2) * length; + dm_stripe[k + 2] += fabs(u3 - v3) * length; + dm_stripe[k + 3] += fabs(u4 - v4) * length; + + dm_stripe_total[k] += (u1 + v1) * length; + dm_stripe_total[k + 1] += (u2 + v2) * length; + dm_stripe_total[k + 2] += (u3 + v3) * length; + dm_stripe_total[k + 3] += (u4 + v4) * length; + } + + if((task_p->n_samples % 4) != 0) { + for(unsigned int k = task_p->n_samples - (task_p->n_samples % 4); k < task_p->n_samples; k++) { + double u = embedded_proportions[k]; + double v = embedded_proportions[k + stripe + 1]; + + dm_stripe[k] += fabs(u - v) * length; + dm_stripe_total[k] += (u + v) * length; + } + } + } +} + +void su::_vaw_normalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + double *dm_stripe_total; + + // point of thread + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + dm_stripe_total = dm_stripes_total[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples; j++) { + double u = embedded_proportions[j]; + double v = embedded_proportions[j + stripe + 1]; + + double m = sample_total_counts[j] + sample_total_counts[j + stripe + 1]; + double mi = embedded_counts[j] + embedded_counts[j + stripe + 1]; + double vaw = sqrt(mi * (m - mi)); + + if(vaw > 0) { + dm_stripe[j] += (fabs(u - v) * length) / vaw; + dm_stripe_total[j] += ((u + v) * length) / vaw; + } + } + } +} +void su::_generalized_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + double *dm_stripe_total; + + // point of thread + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + dm_stripe_total = dm_stripes_total[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples; j++) { + double u1 = embedded_proportions[j]; + double v1 = embedded_proportions[j + stripe + 1]; + double sum1 = u1 + v1; + if(sum1 != 0.0) { + double sub1 = fabs(u1 - v1); + double sum_pow1 = pow(sum1, task_p->g_unifrac_alpha) * length; + dm_stripe[j] += sum_pow1 * (sub1 / sum1); + dm_stripe_total[j] += sum_pow1; + } + } + } +} + +void su::_vaw_generalized_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + double *dm_stripe_total; + + // point of thread + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + dm_stripe_total = dm_stripes_total[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples; j++) { + double m = sample_total_counts[j] + sample_total_counts[j + stripe + 1]; + double mi = embedded_counts[j] + embedded_counts[j + stripe + 1]; + double vaw = sqrt(mi * (m - mi)); + + double u1 = embedded_proportions[j]; + double v1 = embedded_proportions[j + stripe + 1]; + + if(vaw > 0.0) { + double sum1 = (u1 + v1) / vaw; + double sub1 = fabs(u1 - v1) / vaw; + double sum_pow1 = pow(sum1, task_p->g_unifrac_alpha) * length; + dm_stripe[j] += sum_pow1 * (sub1 / sum1); + dm_stripe_total[j] += sum_pow1; + } + } + } +} +void su::_unweighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + double *dm_stripe_total; + + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + dm_stripe_total = dm_stripes_total[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples / 4; j++) { + int k = j * 4; + int32_t u1 = embedded_proportions[k] > 0; + int32_t u2 = embedded_proportions[k + 1] > 0; + int32_t u3 = embedded_proportions[k + 2] > 0; + int32_t u4 = embedded_proportions[k + 3] > 0; + + int32_t v1 = embedded_proportions[k + stripe + 1] > 0; + int32_t v2 = embedded_proportions[k + stripe + 2] > 0; + int32_t v3 = embedded_proportions[k + stripe + 3] > 0; + int32_t v4 = embedded_proportions[k + stripe + 4] > 0; + + dm_stripe[k] += (u1 ^ v1) * length; + dm_stripe[k + 1] += (u2 ^ v2) * length; + dm_stripe[k + 2] += (u3 ^ v3) * length; + dm_stripe[k + 3] += (u4 ^ v4) * length; + + dm_stripe_total[k] += (u1 | v1) * length; + dm_stripe_total[k + 1] += (u2 | v2) * length; + dm_stripe_total[k + 2] += (u3 | v3) * length; + dm_stripe_total[k + 3] += (u4 | v4) * length; + } + + if((task_p->n_samples % 4) != 0) { + for(unsigned int k = task_p->n_samples - (task_p->n_samples % 4); k < task_p->n_samples; k++) { + int32_t u = embedded_proportions[k] > 0; + int32_t v = embedded_proportions[k + stripe + 1] > 0; + + dm_stripe[k] += (u ^ v) * length; + dm_stripe_total[k] += (u | v) * length; + } + } + } +} + +void su::_vaw_unweighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + double *dm_stripe_total; + + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + dm_stripe_total = dm_stripes_total[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples; j++) { + int32_t u = embedded_proportions[j] > 0; + int32_t v = embedded_proportions[j + stripe + 1] > 0; + + double m = sample_total_counts[j] + sample_total_counts[j + stripe + 1]; + double mi = embedded_counts[j] + embedded_counts[j + stripe + 1]; + double vaw = sqrt(mi * (m - mi)); + + if(vaw > 0) { + dm_stripe[j] += ((u ^ v) * length) / vaw; + dm_stripe_total[j] += ((u | v) * length) / vaw; + } + } + } +} + diff --git a/sucpp/unifrac_task.hpp b/sucpp/unifrac_task.hpp new file mode 100644 index 00000000..4a2dec6c --- /dev/null +++ b/sucpp/unifrac_task.hpp @@ -0,0 +1,108 @@ +#include <math.h> +#include <vector> +#include <stdint.h> + +namespace su { + /* task specific compute parameters + * + * n_samples <int> the number of samples being processed + * start <uint> the first stride to process + * stop <uint> the last stride to process + * tid <uint> the thread identifier + * g_unifrac_alpha <double> an alpha value for generalized unifrac + */ + struct task_parameters { + uint32_t n_samples; // number of samples + unsigned int start; // starting stripe + unsigned int stop; // stopping stripe + unsigned int tid; // thread ID + + // task specific arguments below + double g_unifrac_alpha; // generalized unifrac alpha + }; + + /* void su::unifrac tasks + * + * all methods utilize the same function signature. that signature is as follows: + * + * dm_stripes vector<double> the stripes of the distance matrix being accumulated + * into for unique branch length + * dm_stripes vector<double> the stripes of the distance matrix being accumulated + * into for total branch length (e.g., to normalize unweighted unifrac) + * embedded_proportions <double*> the proportions vector for a sample, or rather + * the counts vector normalized to 1. this vector is embedded as it is + * duplicated: if A, B and C are proportions for features A, B, and C, the + * vector will look like [A B C A B C]. + * length <double> the branch length of the current node to its parent. + * task_p <task_parameters*> task specific parameters. + */ + void _unnormalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p); + void _normalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p); + void _unweighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p); + void _generalized_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p); + + /* void su::unifrac_vaw tasks + * + * all methods utilize the same function signature. that signature is as follows: + * + * dm_stripes vector<double> the stripes of the distance matrix being accumulated + * into for unique branch length + * dm_stripes vector<double> the stripes of the distance matrix being accumulated + * into for total branch length (e.g., to normalize unweighted unifrac) + * embedded_proportions <double*> the proportions vector for a sample, or rather + * the counts vector normalized to 1. this vector is embedded as it is + * duplicated: if A, B and C are proportions for features A, B, and C, the + * vector will look like [A B C A B C]. + * embedded_counts <double*> the counts vector embedded in the same way and order as + * embedded_proportions. the values of this array are unnormalized feature + * counts for the subtree. + * sample_total_counts <double*> the total unnormalized feature counts for all samples + * embedded in the same way and order as embedded_proportions. + * length <double> the branch length of the current node to its parent. + * task_p <task_parameters*> task specific parameters. + */ + void _vaw_unnormalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p); + void _vaw_normalized_weighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p); + void _vaw_unweighted_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p); + void _vaw_generalized_unifrac_task(std::vector<double*> &__restrict__ dm_stripes, + std::vector<double*> &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double* __restrict__ embedded_counts, + double* __restrict__ sample_total_counts, + double length, + const su::task_parameters* task_p); +}