Skip to content

Commit d065698

Browse files
authored
Improvements to cache locality and sparse vector handling (#115)
* Re-arrange loops for cleaner code.. no-op * Add const guarantees to a few biom methods, to make it cleaner * Add const guarantees to a few tree methods, to make it cleaner * Add const guarantees in unifrac.cpp, to make it cleaner * Add biom::get_obs_data_range and su::set_proportions_range * Implemented chunked embedding. Added related _range methods, and helper classes * Add OpenMP directives * Add support for float intermediate results. * Make DEF_VEC_SIZE a function of TFloat * Add restricts to improve optimization * Fix CPU alignment parameters * Add checking fo zero overlap in UnifracUnweightedTask, also switched to 64-bit packed * Move norrmalization inside biom * Use explicit data type for bool to int conversion * Default to 0, not 1 * Fix typo * Rename variable to mirror the meaning * Add spares logic to UnnormalizedWeighted * Pre-compuite sums for partial-sparse use case in UnifracUnnormalizedWeightedTask * Fix ACC typo * Pre-compuite sums for partial-sparse use case in UnifracNnormalizedWeightedTask
1 parent dc8f010 commit d065698

9 files changed

+757
-264
lines changed

sucpp/biom.cpp

+52-5
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ void biom::create_id_index(std::vector<std::string> &ids,
159159
}
160160
}
161161

162-
unsigned int biom::get_obs_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out) {
162+
unsigned int biom::get_obs_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out) {
163163
uint32_t idx = obs_id_index.at(id);
164164
uint32_t start = obs_indptr[idx];
165165
uint32_t end = obs_indptr[idx + 1];
@@ -198,11 +198,12 @@ unsigned int biom::get_obs_data_direct(std::string id, uint32_t *& current_indic
198198
return count[0];
199199
}
200200

201-
void biom::get_obs_data(std::string id, double* out) {
201+
template<class TFloat>
202+
void biom::get_obs_data_TT(const std::string &id, TFloat* out) const {
202203
uint32_t idx = obs_id_index.at(id);
203204
unsigned int count = obs_counts_resident[idx];
204-
uint32_t *indices = obs_indices_resident[idx];
205-
double *data = obs_data_resident[idx];
205+
const uint32_t * const indices = obs_indices_resident[idx];
206+
const double * const data = obs_data_resident[idx];
206207

207208
// reset our output buffer
208209
for(unsigned int i = 0; i < n_samples; i++)
@@ -213,7 +214,53 @@ void biom::get_obs_data(std::string id, double* out) {
213214
}
214215
}
215216

216-
unsigned int biom::get_sample_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out) {
217+
void biom::get_obs_data(const std::string &id, double* out) const {
218+
biom::get_obs_data_TT(id,out);
219+
}
220+
221+
void biom::get_obs_data(const std::string &id, float* out) const {
222+
biom::get_obs_data_TT(id,out);
223+
}
224+
225+
226+
// note: out is supposed to be fully filled, i.e. out[start:end]
227+
template<class TFloat>
228+
void biom::get_obs_data_range_TT(const std::string &id, unsigned int start, unsigned int end, bool normalize, TFloat* out) const {
229+
uint32_t idx = obs_id_index.at(id);
230+
unsigned int count = obs_counts_resident[idx];
231+
const uint32_t * const indices = obs_indices_resident[idx];
232+
const double * const data = obs_data_resident[idx];
233+
234+
// reset our output buffer
235+
for(unsigned int i = start; i < end; i++)
236+
out[i-start] = 0.0;
237+
238+
if (normalize) {
239+
for(unsigned int i = 0; i < count; i++) {
240+
const int32_t j = indices[i];
241+
if ((j>=start)&&(j<end)) {
242+
out[j-start] = data[i]/sample_counts[j];
243+
}
244+
}
245+
} else {
246+
for(unsigned int i = 0; i < count; i++) {
247+
const uint32_t j = indices[i];
248+
if ((j>=start)&&(j<end)) {
249+
out[j-start] = data[i];
250+
}
251+
}
252+
}
253+
}
254+
255+
void biom::get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, double* out) const {
256+
biom::get_obs_data_range_TT(id,start,end,normalize,out);
257+
}
258+
259+
void biom::get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, float* out) const {
260+
biom::get_obs_data_range_TT(id,start,end,normalize,out);
261+
}
262+
263+
unsigned int biom::get_sample_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out) {
217264
uint32_t idx = sample_id_index.at(id);
218265
uint32_t start = sample_indptr[idx];
219266
uint32_t end = sample_indptr[idx + 1];

sucpp/biom.hpp

+23-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,22 @@ namespace su {
3838
* Values of an index position [0, n_samples) which do not
3939
* have data will be zero'd.
4040
*/
41-
void get_obs_data(std::string id, double* out);
41+
void get_obs_data(const std::string &id, double* out) const;
42+
void get_obs_data(const std::string &id, float* out) const;
43+
44+
/* get a dense vector of a range of observation data
45+
*
46+
* @param id The observation ID to fetc
47+
* @param start Initial index
48+
* @param end First index past the end
49+
* @param normalize If set, divide by sample_counts
50+
* @param out An allocated array of at least size (end-start). First element will corrrectpoint to index start.
51+
* Values of an index position [0, (end-start)) which do not
52+
* have data will be zero'd.
53+
*/
54+
void get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, double* out) const;
55+
void get_obs_data_range(const std::string &id, unsigned int start, unsigned int end, bool normalize, float* out) const;
56+
4257
private:
4358
/* retain DataSet handles within the HDF5 file */
4459
H5::DataSet obs_indices;
@@ -50,8 +65,8 @@ namespace su {
5065
double **obs_data_resident;
5166
unsigned int *obs_counts_resident;
5267

53-
unsigned int get_obs_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out);
54-
unsigned int get_sample_data_direct(std::string id, uint32_t *& current_indices_out, double *& current_data_out);
68+
unsigned int get_obs_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out);
69+
unsigned int get_sample_data_direct(const std::string &id, uint32_t *& current_indices_out, double *& current_data_out);
5570
double* get_sample_counts();
5671

5772
/* At construction, lookups mapping IDs -> index position within an
@@ -85,5 +100,10 @@ namespace su {
85100
*/
86101
void create_id_index(std::vector<std::string> &ids,
87102
std::unordered_map<std::string, uint32_t> &map);
103+
104+
105+
// templatized version
106+
template<class TFloat> void get_obs_data_TT(const std::string &id, TFloat* out) const;
107+
template<class TFloat> void get_obs_data_range_TT(const std::string &id, unsigned int start, unsigned int end, bool normalize, TFloat* out) const;
88108
};
89109
}

sucpp/test_su.cpp

+143-4
Original file line numberDiff line numberDiff line change
@@ -555,14 +555,14 @@ void test_bptree_rightsibling() {
555555

556556
void test_propstack_constructor() {
557557
SUITE_START("test propstack constructor");
558-
su::PropStack ps = su::PropStack(10);
558+
su::PropStack<double> ps(10);
559559
// nothing to test directly...
560560
SUITE_END();
561561
}
562562

563563
void test_propstack_push_and_pop() {
564564
SUITE_START("test propstack push and pop");
565-
su::PropStack ps = su::PropStack(10);
565+
su::PropStack<double> ps(10);
566566

567567
double *vec1 = ps.pop(1);
568568
double *vec2 = ps.pop(2);
@@ -587,7 +587,7 @@ void test_propstack_push_and_pop() {
587587

588588
void test_propstack_get() {
589589
SUITE_START("test propstack get");
590-
su::PropStack ps = su::PropStack(10);
590+
su::PropStack<double> ps(10);
591591

592592
double *vec1 = ps.pop(1);
593593
double *vec2 = ps.pop(2);
@@ -609,7 +609,7 @@ void test_unifrac_set_proportions() {
609609
// ( ( ) ( ( ) ( ) ) ( ( ) ( ) ) )
610610
su::BPTree tree = su::BPTree("(GG_OTU_1,(GG_OTU_2,GG_OTU_3),(GG_OTU_5,GG_OTU_4));");
611611
su::biom table = su::biom("test.biom");
612-
su::PropStack ps = su::PropStack(table.n_samples);
612+
su::PropStack<double> ps(table.n_samples);
613613

614614
double *obs = ps.pop(4); // GG_OTU_2
615615
double exp4[] = {0.714285714286, 0.333333333333, 0.0, 0.333333333333, 1.0, 0.25};
@@ -631,6 +631,143 @@ void test_unifrac_set_proportions() {
631631
SUITE_END();
632632
}
633633

634+
void test_unifrac_set_proportions_range() {
635+
SUITE_START("test unifrac set proportions range");
636+
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
637+
// ( ( ) ( ( ) ( ) ) ( ( ) ( ) ) )
638+
su::BPTree tree = su::BPTree("(GG_OTU_1,(GG_OTU_2,GG_OTU_3),(GG_OTU_5,GG_OTU_4));");
639+
su::biom table = su::biom("test.biom");
640+
641+
const double exp4[] = {0.714285714286, 0.333333333333, 0.0, 0.333333333333, 1.0, 0.25};
642+
const double exp6[] = {0.0, 0.0, 0.25, 0.666666666667, 0.0, 0.5};
643+
const double exp3[] = {0.71428571, 0.33333333, 0.25, 1.0, 1.0, 0.75};
644+
645+
646+
// first the whole table
647+
{
648+
su::PropStack<double> ps(table.n_samples);
649+
650+
double *obs = ps.pop(4); // GG_OTU_2
651+
set_proportions_range(obs, tree, 4, table, 0, table.n_samples, ps);
652+
for(unsigned int i = 0; i < table.n_samples; i++)
653+
ASSERT(fabs(obs[i] - exp4[i]) < 0.000001);
654+
655+
obs = ps.pop(6); // GG_OTU_3
656+
set_proportions_range(obs, tree, 6, table, 0, table.n_samples, ps);
657+
for(unsigned int i = 0; i < table.n_samples; i++)
658+
ASSERT(fabs(obs[i] - exp6[i]) < 0.000001);
659+
660+
obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
661+
set_proportions_range(obs, tree, 3, table, 0, table.n_samples, ps);
662+
for(unsigned int i = 0; i < table.n_samples; i++)
663+
ASSERT(fabs(obs[i] - exp3[i]) < 0.000001);
664+
}
665+
666+
// beginning
667+
{
668+
su::PropStack<double> ps(3);
669+
670+
double *obs = ps.pop(4); // GG_OTU_2
671+
set_proportions_range(obs, tree, 4, table, 0, 3, ps);
672+
for(unsigned int i = 0; i < 3; i++)
673+
ASSERT(fabs(obs[i] - exp4[i]) < 0.000001);
674+
675+
obs = ps.pop(6); // GG_OTU_3
676+
set_proportions_range(obs, tree, 6, table, 0, 3, ps);
677+
for(unsigned int i = 0; i < 3; i++)
678+
ASSERT(fabs(obs[i] - exp6[i]) < 0.000001);
679+
680+
obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
681+
set_proportions_range(obs, tree, 3, table, 0, 3, ps);
682+
for(unsigned int i = 0; i < 3; i++)
683+
ASSERT(fabs(obs[i] - exp3[i]) < 0.000001);
684+
}
685+
686+
687+
// end
688+
{
689+
su::PropStack<double> ps(4);
690+
691+
double *obs = ps.pop(4); // GG_OTU_2
692+
set_proportions_range(obs, tree, 4, table, 2, table.n_samples, ps);
693+
for(unsigned int i = 2; i < table.n_samples; i++)
694+
ASSERT(fabs(obs[i-2] - exp4[i]) < 0.000001);
695+
696+
obs = ps.pop(6); // GG_OTU_3
697+
set_proportions_range(obs, tree, 6, table, 2, table.n_samples, ps);
698+
for(unsigned int i = 2; i < table.n_samples; i++)
699+
ASSERT(fabs(obs[i-2] - exp6[i]) < 0.000001);
700+
701+
obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
702+
set_proportions_range(obs, tree, 3, table, 2, table.n_samples, ps);
703+
for(unsigned int i = 2; i < table.n_samples; i++)
704+
ASSERT(fabs(obs[i-2] - exp3[i]) < 0.000001);
705+
}
706+
707+
708+
// middle
709+
{
710+
const unsigned int start = 1;
711+
const unsigned int end = 4;
712+
su::PropStack<double> ps(end-start);
713+
714+
double *obs = ps.pop(4); // GG_OTU_2
715+
set_proportions_range(obs, tree, 4, table, start, end, ps);
716+
for(unsigned int i =start; i < end; i++)
717+
ASSERT(fabs(obs[i-start] - exp4[i]) < 0.000001);
718+
719+
obs = ps.pop(6); // GG_OTU_3
720+
set_proportions_range(obs, tree, 6, table, start, end, ps);
721+
for(unsigned int i = start; i < end; i++)
722+
ASSERT(fabs(obs[i-start] - exp6[i]) < 0.000001);
723+
724+
obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
725+
set_proportions_range(obs, tree, 3, table, start, end, ps);
726+
for(unsigned int i = start; i < end; i++)
727+
ASSERT(fabs(obs[i-start] - exp3[i]) < 0.000001);
728+
}
729+
730+
SUITE_END();
731+
}
732+
733+
void test_unifrac_set_proportions_range_float() {
734+
SUITE_START("test unifrac set proportions range float");
735+
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
736+
// ( ( ) ( ( ) ( ) ) ( ( ) ( ) ) )
737+
su::BPTree tree = su::BPTree("(GG_OTU_1,(GG_OTU_2,GG_OTU_3),(GG_OTU_5,GG_OTU_4));");
738+
su::biom table = su::biom("test.biom");
739+
740+
const float exp4[] = {0.714285714286, 0.333333333333, 0.0, 0.333333333333, 1.0, 0.25};
741+
const float exp6[] = {0.0, 0.0, 0.25, 0.666666666667, 0.0, 0.5};
742+
const float exp3[] = {0.71428571, 0.33333333, 0.25, 1.0, 1.0, 0.75};
743+
744+
// just midle
745+
{
746+
const unsigned int start = 1;
747+
const unsigned int end = 4;
748+
su::PropStack<float> ps(end-start);
749+
750+
float *obs = ps.pop(4); // GG_OTU_2
751+
set_proportions_range(obs, tree, 4, table, start, end, ps);
752+
for(unsigned int i =start; i < end; i++)
753+
ASSERT(fabs(obs[i-start] - exp4[i]) < 0.000001);
754+
755+
obs = ps.pop(6); // GG_OTU_3
756+
set_proportions_range(obs, tree, 6, table, start, end, ps);
757+
for(unsigned int i = start; i < end; i++)
758+
ASSERT(fabs(obs[i-start] - exp6[i]) < 0.000001);
759+
760+
obs = ps.pop(3); // node containing GG_OTU_2 and GG_OTU_3
761+
set_proportions_range(obs, tree, 3, table, start, end, ps);
762+
for(unsigned int i = start; i < end; i++)
763+
ASSERT(fabs(obs[i-start] - exp3[i]) < 0.000001);
764+
}
765+
766+
SUITE_END();
767+
}
768+
769+
770+
634771
void test_unifrac_deconvolute_stripes() {
635772
SUITE_START("test deconvolute stripes");
636773
std::vector<double*> stripes;
@@ -1703,6 +1840,8 @@ int main(int argc, char** argv) {
17031840
test_propstack_get();
17041841

17051842
test_unifrac_set_proportions();
1843+
test_unifrac_set_proportions_range();
1844+
test_unifrac_set_proportions_range_float();
17061845
test_unifrac_deconvolute_stripes();
17071846
test_unifrac_stripes_to_condensed_form_even();
17081847
test_unifrac_stripes_to_condensed_form_odd();

sucpp/tree.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -197,43 +197,43 @@ void BPTree::index_and_cache() {
197197
}
198198
}
199199

200-
uint32_t BPTree::postorderselect(uint32_t k) {
200+
uint32_t BPTree::postorderselect(uint32_t k) const {
201201
return open(select_0_index[k]);
202202
}
203203

204-
uint32_t BPTree::preorderselect(uint32_t k) {
204+
uint32_t BPTree::preorderselect(uint32_t k) const {
205205
return select_1_index[k];
206206
}
207207

208-
inline uint32_t BPTree::open(uint32_t i) {
208+
inline uint32_t BPTree::open(uint32_t i) const {
209209
return structure[i] ? i : openclose[i];
210210
}
211211

212-
inline uint32_t BPTree::close(uint32_t i) {
212+
inline uint32_t BPTree::close(uint32_t i) const {
213213
return structure[i] ? openclose[i] : i;
214214
}
215215

216-
bool BPTree::isleaf(unsigned int idx) {
216+
bool BPTree::isleaf(unsigned int idx) const {
217217
return (structure[idx] && !structure[idx + 1]);
218218
}
219219

220-
uint32_t BPTree::leftchild(uint32_t i) {
220+
uint32_t BPTree::leftchild(uint32_t i) const {
221221
// aka fchild
222222
if(isleaf(i))
223223
return 0; // this is awkward, using 0 which is root, but a root cannot be a child. edge case
224224
else
225225
return i + 1;
226226
}
227227

228-
uint32_t BPTree::rightchild(uint32_t i) {
228+
uint32_t BPTree::rightchild(uint32_t i) const {
229229
// aka lchild
230230
if(isleaf(i))
231231
return 0; // this is awkward, using 0 which is root, but a root cannot be a child. edge case
232232
else
233233
return open(close(i) - 1);
234234
}
235235

236-
uint32_t BPTree::rightsibling(uint32_t i) {
236+
uint32_t BPTree::rightsibling(uint32_t i) const {
237237
// aka nsibling
238238
uint32_t position = close(i) + 1;
239239
if(position >= nparens)
@@ -244,18 +244,18 @@ uint32_t BPTree::rightsibling(uint32_t i) {
244244
return 0;
245245
}
246246

247-
int32_t BPTree::parent(uint32_t i) {
247+
int32_t BPTree::parent(uint32_t i) const {
248248
return enclose(i);
249249
}
250250

251-
int32_t BPTree::enclose(uint32_t i) {
251+
int32_t BPTree::enclose(uint32_t i) const {
252252
if(structure[i])
253253
return bwd(i, -2) + 1;
254254
else
255255
return bwd(i - 1, -2) + 1;
256256
}
257257

258-
int32_t BPTree::bwd(uint32_t i, int d) {
258+
int32_t BPTree::bwd(uint32_t i, int d) const {
259259
uint32_t target_excess = excess[i] + d;
260260
for(int current_idx = i - 1; current_idx >= 0; current_idx--) {
261261
if(excess[current_idx] == target_excess)
@@ -410,7 +410,7 @@ void BPTree::set_node_metadata(unsigned int open_idx, std::string &token) {
410410
lengths[open_idx] = length;
411411
}
412412

413-
inline bool BPTree::is_structure_character(char c) {
413+
inline bool BPTree::is_structure_character(char c) const {
414414
return (c == '(' || c == ')' || c == ',' || c == ';');
415415
}
416416

0 commit comments

Comments
 (0)