diff --git a/.travis.yml b/.travis.yml index 96dfbb1d..a1484d97 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,5 +49,7 @@ script: - for i in $(find ./ -maxdepth 1 -name 'core*' -print); do gdb $(pwd)/sucpp/ssu core* -ex "thread apply all bt" -ex "set pagination 0" -batch; done; - ls -lrt - qiime state-unifrac unweighted --i-table table.qza --i-phylogeny tree.qza --o-distance-matrix ssu_result.qza --verbose + - qiime state-unifrac unweighted --i-table table.qza --i-phylogeny tree.qza --o-distance-matrix ssu_result_thread.qza --verbose --p-threads 2 - qiime diversity beta-phylogenetic --p-metric unweighted_unifrac --i-table table.qza --i-phylogeny tree.qza --o-distance-matrix sk_result.qza - python compare_dms.py ssu_result.qza sk_result.qza + - python compare_dms.py ssu_result_thread.qza sk_result.qza diff --git a/q2_state_unifrac/__init__.py b/q2_state_unifrac/__init__.py index bab503ba..8772f077 100644 --- a/q2_state_unifrac/__init__.py +++ b/q2_state_unifrac/__init__.py @@ -10,8 +10,10 @@ from q2_state_unifrac._methods import (unweighted, weighted_normalized, - weighted_unnormalized) + weighted_unnormalized, + generalized) __version__ = pkg_resources.get_distribution('q2-state-unifrac').version -__all__ = ['unweighted', 'weighted_normalized', 'weighted_unnormalized'] +__all__ = ['unweighted', 'weighted_normalized', 'weighted_unnormalized', + 'generalized'] diff --git a/q2_state_unifrac/_methods.py b/q2_state_unifrac/_methods.py index f9dab867..dbe015b9 100644 --- a/q2_state_unifrac/_methods.py +++ b/q2_state_unifrac/_methods.py @@ -68,3 +68,16 @@ def weighted_unnormalized(table: BIOMV210Format, _run(str(table), str(phylogeny), output_fp, str(threads), 'weighted_unnormalized') return skbio.DistanceMatrix.read(output_fp) + + +def generalized(table: BIOMV210Format, + phylogeny: NewickFormat, + threads: int=1, + alpha: float=1.0)-> skbio.DistanceMatrix: + _sanity() + + with tempfile.TemporaryDirectory() as tmp: + output_fp = os.path.join(tmp, 'foo.dm') + _run(str(table), str(phylogeny), output_fp, str(threads), + 'generalized') + return skbio.DistanceMatrix.read(output_fp) diff --git a/q2_state_unifrac/plugin_setup.py b/q2_state_unifrac/plugin_setup.py index a60c1c33..caee7c6b 100644 --- a/q2_state_unifrac/plugin_setup.py +++ b/q2_state_unifrac/plugin_setup.py @@ -6,7 +6,7 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -from qiime2.plugin import (Plugin, Properties, Int) +from qiime2.plugin import (Plugin, Properties, Int, Float) from q2_types.feature_table import FeatureTable, Frequency from q2_types.distance_matrix import DistanceMatrix from q2_types.tree import Phylogeny, Rooted @@ -37,7 +37,9 @@ }, outputs=[('distance_matrix', DistanceMatrix % Properties('phylogenetic'))], name='Unweighted UniFrac', - description=('This method computes Unweighted UniFrac') + description=('This method computes unweighted UniFrac as described in ' + 'Lozupone and Knight 2005 Appl Environ Microbiol; ' + 'DOI: 10.1128/AEM.71.12.8228-8235.2005') ) @@ -54,7 +56,9 @@ }, outputs=[('distance_matrix', DistanceMatrix % Properties('phylogenetic'))], name='Weighted unnormalizd UniFrac', - description=('This method computes weighted unnormalized UniFrac') + description=('This method computes weighted unnormalized UniFrac as ' + 'described in Lozupone et al. 2007 Appl Environ Microbiol; ' + 'DOI: 10.1128/AEM.01996-06') ) @@ -70,6 +74,34 @@ 'the table.') }, outputs=[('distance_matrix', DistanceMatrix % Properties('phylogenetic'))], - name='Wweighted normalized UniFrac', - description=('This method computes weighted normalized UniFrac') + name='Weighted normalized UniFrac', + description=('This method computes weighted normalized UniFrac as ' + 'described in Lozupone et al. 2007 Appl Environ Microbiol; ' + 'DOI: 10.1128/AEM.01996-06') +) + + +plugin.methods.register_function( + function=q2_state_unifrac.generalized, + inputs={'table': FeatureTable[Frequency] % Properties('uniform-sampling'), + 'phylogeny': Phylogeny[Rooted]}, + parameters={'threads': Int, + 'alpha': Float}, + parameter_descriptions={'threads': 'The number of threads to use.', + 'alpha': ('The value of alpha controls importance ' + 'of sample proportions. 1.0 is ' + 'weighted normalized UniFrac. 0.0 is ' + 'close to unweighted UniFrac, but only ' + 'if the sample proportions are ' + 'dichotomized.')}, + input_descriptions={ + 'table': 'A rarefied FeatureTable', + 'phylogeny': ('A rooted phylogeny which relates the observations in ' + 'the table.') + }, + outputs=[('distance_matrix', DistanceMatrix % Properties('phylogenetic'))], + name='Generalized UniFrac', + description=('This method computes Generalized UniFrac as described in ' + 'Chen et al. 2012 Bioinformatics; ' + 'DOI: 10.1093/bioinformatics/bts342') ) diff --git a/sucpp/su.cpp b/sucpp/su.cpp index 75fdf625..f59edd8d 100644 --- a/sucpp/su.cpp +++ b/sucpp/su.cpp @@ -9,13 +9,14 @@ #include void usage() { - std::cout << "usage: ssu -i -o -m [METHOD] -t [-n threads]" << std::endl; + std::cout << "usage: ssu -i -o -m [METHOD] -t [-n threads] [-a alpha]" << std::endl; std::cout << std::endl; std::cout << " -i\tThe input BIOM table." << std::endl; std::cout << " -t\tThe input phylogeny in newick." << std::endl; - std::cout << " -m\tThe method, [unweighted | weighted_normalized | weighted_unnormalized]." << std::endl; + std::cout << " -m\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized]." << std::endl; std::cout << " -o\tThe output distance matrix." << std::endl; std::cout << " -n\t[OPTIONAL] The number of threads, default is 1." << std::endl; + std::cout << " -a\t[OPTIONAL] Generalized UniFrac alpha, default is 1." << std::endl; std::cout << std::endl; } @@ -60,6 +61,7 @@ int main(int argc, char **argv){ const std::string &output_filename = input.getCmdOption("-o"); const std::string &method_string = input.getCmdOption("-m"); const std::string &nthreads_arg = input.getCmdOption("-n"); + const std::string &gunifrac_arg = input.getCmdOption("-a"); su::Method method; @@ -95,12 +97,21 @@ int main(int argc, char **argv){ nthreads = atoi(nthreads_arg.c_str()); } + double g_unifrac_alpha; + if(gunifrac_arg.empty()) { + g_unifrac_alpha = 1.0; + } else { + g_unifrac_alpha = atof(gunifrac_arg.c_str()); + } + if(method_string == "unweighted") method = su::unweighted; else if(method_string == "weighted_normalized") method = su::weighted_normalized; else if(method_string == "weighted_unnormalized") method = su::weighted_unnormalized; + else if(method_string == "generalized") + method = su::generalized; else { err("Unknown method"); return EXIT_FAILURE; @@ -114,8 +125,8 @@ int main(int argc, char **argv){ std::unordered_set to_keep(table.obs_ids.begin(), table.obs_ids.end()); su::BPTree tree_sheared = tree.shear(to_keep).collapse(); - std::vector dm_stripes; // = su::make_strides(table.n_samples); - std::vector dm_stripes_total; // = su::make_strides(table.n_samples); + std::vector dm_stripes; + std::vector dm_stripes_total; dm_stripes.resize((table.n_samples + 1) / 2); dm_stripes_total.resize((table.n_samples + 1) / 2); @@ -123,27 +134,20 @@ int main(int argc, char **argv){ unsigned int start = 0; unsigned int end = dm_stripes.size(); - unsigned int *starts = (unsigned int*)malloc(sizeof(unsigned int) * nthreads); - if(starts == NULL) { - fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", - sizeof(unsigned int) * nthreads, __FILE__, __LINE__); - exit(EXIT_FAILURE); - } - unsigned int *ends = (unsigned int*)malloc(sizeof(unsigned int) * nthreads); - if(ends == NULL) { - fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", - sizeof(unsigned int) * nthreads, __FILE__, __LINE__); - exit(EXIT_FAILURE); - } + std::vector tasks; + tasks.resize(nthreads); std::vector threads(nthreads); for(unsigned int tid = 0; tid < threads.size(); tid++) { - starts[tid] = start; - ends[tid] = start + chunksize; + tasks[tid].tid = tid; + tasks[tid].start = start; // stripe start + tasks[tid].stop = start + chunksize; // stripe end + tasks[tid].n_samples = table.n_samples; + tasks[tid].g_unifrac_alpha = g_unifrac_alpha; start = start + chunksize; } // the last thread gets any trailing bits - ends[threads.size() - 1] = end; + tasks[threads.size() - 1].stop = end; for(unsigned int tid = 0; tid < threads.size(); tid++) { threads[tid] = std::thread(su::unifrac, @@ -152,18 +156,13 @@ int main(int argc, char **argv){ method, std::ref(dm_stripes), std::ref(dm_stripes_total), - starts[tid], - ends[tid], - tid); + &tasks[tid]); } for(unsigned int tid = 0; tid < threads.size(); tid++) { threads[tid].join(); } - free(starts); - free(ends); - double **dm = su::deconvolute_stripes(dm_stripes, table.n_samples); std::ofstream output; diff --git a/sucpp/test_su.cpp b/sucpp/test_su.cpp index 4f91b4ea..65055bfa 100644 --- a/sucpp/test_su.cpp +++ b/sucpp/test_su.cpp @@ -651,7 +651,10 @@ void test_unnormalized_weighted_unifrac() { std::vector strides = su::make_strides(6); std::vector strides_total = su::make_strides(6); - su::unifrac(table, tree, su::weighted_unnormalized, strides, strides_total, 0, 3, 0); + su::task_parameters task_p; + task_p.start = 0; task_p.stop = 3; task_p.tid = 0; task_p.n_samples = 6; + + su::unifrac(table, tree, su::weighted_unnormalized, strides, strides_total, &task_p); for(unsigned int i = 0; i < 3; i++) { for(unsigned int j = 0; j < 6; j++) { ASSERT(fabs(strides[i][j] - exp[i][j]) < 0.000001); @@ -661,6 +664,85 @@ void test_unnormalized_weighted_unifrac() { SUITE_END(); } +void test_generalized_unifrac() { + SUITE_START("test generalized unifrac"); + + std::vector threads(1); + su::BPTree tree = su::BPTree("(GG_OTU_1:1,(GG_OTU_2:1,GG_OTU_3:1):1,(GG_OTU_5:1,GG_OTU_4:1):1);"); + su::biom table = su::biom("test.biom"); + + // weighted normalized unifrac as computed above + std::vector w_exp; + double w_stride1[] = {0.38095238, 0.33333333, 0.73333333, 0.33333333, 0.5, 0.26785714}; + double w_stride2[] = {0.58095238, 0.66666667, 0.86666667, 0.25, 0.28571429, 0.45833333}; + double w_stride3[] = {0.47619048, 0.66666667, 0.46666667, 0.47619048, 0.66666667, 0.46666667}; + w_exp.push_back(w_stride1); + w_exp.push_back(w_stride2); + w_exp.push_back(w_stride3); + std::vector w_strides = su::make_strides(6); + std::vector w_strides_total = su::make_strides(6); + su::task_parameters w_task_p; + w_task_p.start = 0; w_task_p.stop = 3; w_task_p.tid = 0; w_task_p.n_samples = 6; + w_task_p.g_unifrac_alpha = 1.0; + su::unifrac(table, tree, su::generalized, w_strides, w_strides_total, &w_task_p); + + // as computed by GUniFrac v1.0 + // Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 + //Sample1 0.0000000 0.4408392 0.6886965 0.7060606 0.5833333 0.3278410 + //Sample2 0.4408392 0.0000000 0.5102041 0.7500000 0.8000000 0.5208125 + //Sample3 0.6886965 0.5102041 0.0000000 0.8649351 0.9428571 0.5952381 + //Sample4 0.7060606 0.7500000 0.8649351 0.0000000 0.5000000 0.4857143 + //Sample5 0.5833333 0.8000000 0.9428571 0.5000000 0.0000000 0.7485714 + //Sample6 0.3278410 0.5208125 0.5952381 0.4857143 0.7485714 0.0000000 + std::vector d0_exp; + double d0_stride1[] = {0.4408392, 0.5102041, 0.8649351, 0.5000000, 0.7485714, 0.3278410}; + double d0_stride2[] = {0.6886965, 0.7500000, 0.9428571, 0.4857143, 0.5833333, 0.5208125}; + double d0_stride3[] = {0.7060606, 0.8000000, 0.5952381, 0.7060606, 0.8000000, 0.5952381}; + d0_exp.push_back(d0_stride1); + d0_exp.push_back(d0_stride2); + d0_exp.push_back(d0_stride3); + std::vector d0_strides = su::make_strides(6); + std::vector d0_strides_total = su::make_strides(6); + su::task_parameters d0_task_p; + d0_task_p.start = 0; d0_task_p.stop = 3; d0_task_p.tid = 0; d0_task_p.n_samples = 6; + d0_task_p.g_unifrac_alpha = 0.0; + su::unifrac(table, tree, su::generalized, d0_strides, d0_strides_total, &d0_task_p); + + // as computed by GUniFrac v1.0 + // Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 + //Sample1 0.0000000 0.4040518 0.6285560 0.5869439 0.4082483 0.2995673 + //Sample2 0.4040518 0.0000000 0.4160597 0.7071068 0.7302479 0.4860856 + //Sample3 0.6285560 0.4160597 0.0000000 0.8005220 0.9073159 0.5218198 + //Sample4 0.5869439 0.7071068 0.8005220 0.0000000 0.4117216 0.3485667 + //Sample5 0.4082483 0.7302479 0.9073159 0.4117216 0.0000000 0.6188282 + //Sample6 0.2995673 0.4860856 0.5218198 0.3485667 0.6188282 0.0000000 + std::vector d05_exp; + double d05_stride1[] = {0.4040518, 0.4160597, 0.8005220, 0.4117216, 0.6188282, 0.2995673}; + double d05_stride2[] = {0.6285560, 0.7071068, 0.9073159, 0.3485667, 0.4082483, 0.4860856}; + double d05_stride3[] = {0.5869439, 0.7302479, 0.5218198, 0.5869439, 0.7302479, 0.5218198}; + d05_exp.push_back(d05_stride1); + d05_exp.push_back(d05_stride2); + d05_exp.push_back(d05_stride3); + std::vector d05_strides = su::make_strides(6); + std::vector d05_strides_total = su::make_strides(6); + su::task_parameters d05_task_p; + d05_task_p.start = 0; d05_task_p.stop = 3; d05_task_p.tid = 0; d05_task_p.n_samples = 6; + d05_task_p.g_unifrac_alpha = 0.5; + su::unifrac(table, tree, su::generalized, d05_strides, d05_strides_total, &d05_task_p); + + for(unsigned int i = 0; i < 3; i++) { + for(unsigned int j = 0; j < 6; j++) { + ASSERT(fabs(w_strides[i][j] - w_exp[i][j]) < 0.000001); + ASSERT(fabs(d0_strides[i][j] - d0_exp[i][j]) < 0.000001); + ASSERT(fabs(d05_strides[i][j] - d05_exp[i][j]) < 0.000001); + } + free(w_strides[i]); + free(d0_strides[i]); + free(d05_strides[i]); + } + SUITE_END(); +} + void test_make_strides() { SUITE_START("test make stripes"); std::vector exp; @@ -695,7 +777,10 @@ void test_unweighted_unifrac() { std::vector strides = su::make_strides(6); std::vector strides_total = su::make_strides(6); - su::unifrac(table, tree, su::unweighted, strides, strides_total, 0, 3, 0); + su::task_parameters task_p; + task_p.start = 0; task_p.stop = 3; task_p.tid = 0; task_p.n_samples = 6; + + su::unifrac(table, tree, su::unweighted, strides, strides_total, &task_p); for(unsigned int i = 0; i < 3; i++) { for(unsigned int j = 0; j < 6; j++) { @@ -722,8 +807,11 @@ void test_normalized_weighted_unifrac() { exp.push_back(stride3); std::vector strides = su::make_strides(6); std::vector strides_total = su::make_strides(6); - - su::unifrac(table, tree, su::weighted_normalized, strides, strides_total, 0, 3, 0); + + su::task_parameters task_p; + task_p.start = 0; task_p.stop = 3; task_p.tid = 0; task_p.n_samples = 6; + + su::unifrac(table, tree, su::weighted_normalized, strides, strides_total, &task_p); for(unsigned int i = 0; i < 3; i++) { for(unsigned int j = 0; j < 6; j++) { ASSERT(fabs(strides[i][j] - exp[i][j]) < 0.000001); @@ -856,6 +944,7 @@ int main(int argc, char** argv) { test_unweighted_unifrac(); test_unnormalized_weighted_unifrac(); test_normalized_weighted_unifrac(); + test_generalized_unifrac(); test_unifrac_sample_counts(); printf("\n"); diff --git a/sucpp/unifrac.cpp b/sucpp/unifrac.cpp index 41eee88e..74619a3b 100644 --- a/sucpp/unifrac.cpp +++ b/sucpp/unifrac.cpp @@ -109,12 +109,10 @@ void _unnormalized_weighted_unifrac_task(std::vector &__restrict__ dm_s std::vector &__restrict__ dm_stripes_total, double* __restrict__ embedded_proportions, double length, - uint32_t n_samples, - unsigned int start, - unsigned int stop) { + const su::task_parameters* task_p) { double *dm_stripe; //__m256d ymm0, ymm1; - for(unsigned int stripe=start; stripe < stop; stripe++) { + for(unsigned int stripe=task_p->start; stripe < task_p->stop; stripe++) { dm_stripe = dm_stripes[stripe]; /* intrinsics yield about a 2x reduction in runtime on llvm. they @@ -156,7 +154,7 @@ void _unnormalized_weighted_unifrac_task(std::vector &__restrict__ dm_s // double v = embedded_proportions[k + stripe + 1]; // dm_stripe[k] += fabs(u - v) * length; //} - for(unsigned int j = 0; j < n_samples / 4; j++) { + for(unsigned int j = 0; j < task_p->n_samples / 4; j++) { int k = j * 4; double u1 = embedded_proportions[k]; double u2 = embedded_proportions[k + 1]; @@ -174,8 +172,8 @@ void _unnormalized_weighted_unifrac_task(std::vector &__restrict__ dm_s dm_stripe[k + 3] += fabs(u4 - v4) * length; } - if((n_samples % 4) != 0) { - for(unsigned int k = n_samples - (n_samples % 4); k < n_samples; k++) { + if((task_p->n_samples % 4) != 0) { + for(unsigned int k = task_p->n_samples - (task_p->n_samples % 4); k < task_p->n_samples; k++) { double u = embedded_proportions[k]; double v = embedded_proportions[k + stripe + 1]; @@ -189,18 +187,16 @@ void _normalized_weighted_unifrac_task(std::vector &__restrict__ dm_str std::vector &__restrict__ dm_stripes_total, double* __restrict__ embedded_proportions, double length, - uint32_t n_samples, - unsigned int start, - unsigned int stop) { + const su::task_parameters* task_p) { double *dm_stripe; double *dm_stripe_total; // point of thread - for(unsigned int stripe = start; stripe < stop; stripe++) { + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { dm_stripe = dm_stripes[stripe]; dm_stripe_total = dm_stripes_total[stripe]; - for(unsigned int j = 0; j < n_samples / 4; j++) { + for(unsigned int j = 0; j < task_p->n_samples / 4; j++) { int k = j * 4; double u1 = embedded_proportions[k]; double u2 = embedded_proportions[k + 1]; @@ -217,19 +213,46 @@ void _normalized_weighted_unifrac_task(std::vector &__restrict__ dm_str dm_stripe[k + 2] += fabs(u3 - v3) * length; dm_stripe[k + 3] += fabs(u4 - v4) * length; - dm_stripe_total[k] += fabs(u1 + v1) * length; - dm_stripe_total[k + 1] += fabs(u2 + v2) * length; - dm_stripe_total[k + 2] += fabs(u3 + v3) * length; - dm_stripe_total[k + 3] += fabs(u4 + v4) * length; + dm_stripe_total[k] += (u1 + v1) * length; + dm_stripe_total[k + 1] += (u2 + v2) * length; + dm_stripe_total[k + 2] += (u3 + v3) * length; + dm_stripe_total[k + 3] += (u4 + v4) * length; } - if((n_samples % 4) != 0) { - for(unsigned int k = n_samples - (n_samples % 4); k < n_samples; k++) { + if((task_p->n_samples % 4) != 0) { + for(unsigned int k = task_p->n_samples - (task_p->n_samples % 4); k < task_p->n_samples; k++) { double u = embedded_proportions[k]; double v = embedded_proportions[k + stripe + 1]; dm_stripe[k] += fabs(u - v) * length; - dm_stripe_total[k] += fabs(u + v) * length; + dm_stripe_total[k] += (u + v) * length; + } + } + } +} + +void _generalized_unifrac_task(std::vector &__restrict__ dm_stripes, + std::vector &__restrict__ dm_stripes_total, + double* __restrict__ embedded_proportions, + double length, + const su::task_parameters* task_p) { + double *dm_stripe; + double *dm_stripe_total; + + // point of thread + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { + dm_stripe = dm_stripes[stripe]; + dm_stripe_total = dm_stripes_total[stripe]; + + for(unsigned int j = 0; j < task_p->n_samples; j++) { + double u1 = embedded_proportions[j]; + double v1 = embedded_proportions[j + stripe + 1]; + double sum1 = u1 + v1; + if(sum1 != 0.0) { + double sub1 = fabs(u1 - v1); + double sum_pow1 = pow(sum1, task_p->g_unifrac_alpha) * length; + dm_stripe[j] += sum_pow1 * (sub1 / sum1); + dm_stripe_total[j] += sum_pow1; } } } @@ -239,17 +262,15 @@ void _unweighted_unifrac_task(std::vector &__restrict__ dm_stripes, std::vector &__restrict__ dm_stripes_total, double* __restrict__ embedded_proportions, double length, - uint32_t n_samples, - unsigned int start, - unsigned int stop) { + const su::task_parameters* task_p) { double *dm_stripe; double *dm_stripe_total; - for(unsigned int stripe = start; stripe < stop; stripe++) { + for(unsigned int stripe = task_p->start; stripe < task_p->stop; stripe++) { dm_stripe = dm_stripes[stripe]; dm_stripe_total = dm_stripes_total[stripe]; - for(unsigned int j = 0; j < n_samples / 4; j++) { + for(unsigned int j = 0; j < task_p->n_samples / 4; j++) { int k = j * 4; int32_t u1 = embedded_proportions[k] > 0; int32_t u2 = embedded_proportions[k + 1] > 0; @@ -272,8 +293,8 @@ void _unweighted_unifrac_task(std::vector &__restrict__ dm_stripes, dm_stripe_total[k + 3] += (u4 | v4) * length; } - if((n_samples % 4) != 0) { - for(unsigned int k = n_samples - (n_samples % 4); k < n_samples; k++) { + if((task_p->n_samples % 4) != 0) { + for(unsigned int k = task_p->n_samples - (task_p->n_samples % 4); k < task_p->n_samples; k++) { int32_t u = embedded_proportions[k] > 0; int32_t v = embedded_proportions[k + stripe + 1] > 0; @@ -306,17 +327,18 @@ void su::unifrac(biom &table, Method unifrac_method, std::vector &dm_stripes, std::vector &dm_stripes_total, - unsigned int start, - unsigned int end, - unsigned int tid) { + const su::task_parameters* task_p) { + + if(table.n_samples != task_p->n_samples) { + fprintf(stderr, "Task and table n_samples not equal\n"); + exit(EXIT_FAILURE); + } void (*func)(std::vector&, // dm_stripes std::vector&, // dm_stripes_total double*, // embedded_proportions double, // length - uint32_t, // number of samples - unsigned int, // stripe start - unsigned int); // stripe stop + const su::task_parameters*); switch(unifrac_method) { case unweighted: @@ -328,6 +350,9 @@ void su::unifrac(biom &table, case weighted_unnormalized: func = &_unnormalized_weighted_unifrac_task; break; + case generalized: + func = &_generalized_unifrac_task; + break; } PropStack propstack(table.n_samples); @@ -343,7 +368,7 @@ void su::unifrac(biom &table, } // thread local memory - for(unsigned int i = start; i < end; i++){ + for(unsigned int i = task_p->start; i < task_p->stop; i++){ posix_memalign((void **)&dm_stripes[i], 32, sizeof(double) * table.n_samples); if(dm_stripes[i] == NULL) { fprintf(stderr, "Failed to allocate %zd bytes; [%s]:%d\n", @@ -372,7 +397,7 @@ void su::unifrac(biom &table, node_proportions = propstack.pop(node); set_proportions(node_proportions, tree, node, table, propstack); - embed_proportions(embedded_proportions, node_proportions, table.n_samples); + embed_proportions(embedded_proportions, node_proportions, task_p->n_samples); /* * The values in the example vectors correspond to index positions of an @@ -418,17 +443,16 @@ void su::unifrac(biom &table, * We end up performing N / 2 redundant calculations on the last stripe * (see C) but that is small over large N. */ - func(dm_stripes, dm_stripes_total, embedded_proportions, length, - table.n_samples, start, end); + func(dm_stripes, dm_stripes_total, embedded_proportions, length, task_p); // should make this compile-time support //if((tid == 0) && ((k % 1000) == 0)) // progressbar((float)k / (float)(tree.nparens / 2)); } - if(unifrac_method == weighted_normalized || unifrac_method == unweighted) { - for(unsigned int i = start; i < end; i++) { - for(unsigned int j = 0; j < table.n_samples; j++) { + if(unifrac_method == weighted_normalized || unifrac_method == unweighted || unifrac_method == generalized) { + for(unsigned int i = task_p->start; i < task_p->stop; i++) { + for(unsigned int j = 0; j < task_p->n_samples; j++) { dm_stripes[i][j] = dm_stripes[i][j] / dm_stripes_total[i][j]; } } diff --git a/sucpp/unifrac.hpp b/sucpp/unifrac.hpp index ab4335d8..baa1fa84 100644 --- a/sucpp/unifrac.hpp +++ b/sucpp/unifrac.hpp @@ -4,7 +4,15 @@ #include namespace su { - enum Method {unweighted, weighted_normalized, weighted_unnormalized}; + enum Method {unweighted, weighted_normalized, weighted_unnormalized, generalized}; + + struct task_parameters { + uint32_t n_samples; // number of samples + unsigned int start; // starting stripe + unsigned int stop; // stopping stripe + unsigned int tid; // thread ID + double g_unifrac_alpha; // generalized unifrac alpha + }; class PropStack { private: @@ -24,17 +32,8 @@ namespace su { Method unifrac_method, std::vector &dm_stripes, std::vector &dm_stripes_total, - unsigned int start, - unsigned int end, - unsigned int tid); - void unweighted_unifrac(biom &table, - BPTree &tree, - Method unifrac_method, - std::vector &dm_stripes, - std::vector &dm_stripes_total, - unsigned int start, - unsigned int end, - unsigned int tid); + const task_parameters* task_p); + double** deconvolute_stripes(std::vector &stripes, uint32_t n); void set_proportions(double* props, BPTree &tree, uint32_t node, @@ -49,5 +48,5 @@ namespace su { out[i + n] = val; } } -} +}