Skip to content

Commit a04fd98

Browse files
committed
Introduce "--samples-spr-initial" option to allow optimizing tree topology during building the initial tree
1 parent d7eb390 commit a04fd98

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

tree/tree.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ void cmaple::Tree::doPlacementTemplate(const int num_threads, std::ostream& out_
644644
nodes.reserve(num_seqs + num_seqs);
645645
std::vector<cmaple::Sequence>::size_type i = 0;
646646
std::vector<cmaple::Sequence>::size_type count_every_1K = 0;
647+
const TreeSearchType tree_search_type = cmaple::Tree::parseTreeSearchType(params->tree_search_type_str);
647648

648649
// if users don't input a tree -> create the root from the first sequence
649650
if (!from_input_tree) {
@@ -750,6 +751,8 @@ void cmaple::Tree::doPlacementTemplate(const int num_threads, std::ostream& out_
750751
const int bk_failure_limit_sample = params->failure_limit_sample;
751752
if (parallel_search)
752753
params->failure_limit_sample = 3;
754+
// perform topology optimization
755+
bool perform_topo_optimization = false;
753756
for (size_t j = 0; j < current_chunk_size; ++j)
754757
{
755758
// increase i and move the sequence pointer
@@ -759,6 +762,12 @@ void cmaple::Tree::doPlacementTemplate(const int num_threads, std::ostream& out_
759762
++sequence;
760763
}
761764

765+
// check to perform topology optimization
766+
if (params->num_samples_spr_during_inital_tree
767+
&& i % (params->num_samples_spr_during_inital_tree) == 0
768+
&& i + params->num_samples_spr_during_inital_tree <= num_seqs)
769+
perform_topo_optimization = true;
770+
762771
// show progress
763772
if (cmaple::verbose_mode >= cmaple::VB_MED) {
764773
if (i + 1 - count_every_1K >= 1000)
@@ -855,6 +864,20 @@ void cmaple::Tree::doPlacementTemplate(const int num_threads, std::ostream& out_
855864
// restore the threshold for pleacement search, if it has been changed
856865
if (parallel_search)
857866
params->failure_limit_sample = bk_failure_limit_sample;
867+
868+
// perform topology optimization if needed
869+
if (perform_topo_optimization)
870+
{
871+
if (cmaple::verbose_mode >= cmaple::VB_MAX) {
872+
cout << "Perform topology optimization after processing " << i << " samples" << endl;
873+
}
874+
875+
if (tree_search_type != FAST_TREE_SEARCH) {
876+
// apply short-range SPR search
877+
optimizeTreeTopology<num_states>(num_threads, tree_search_type, true);
878+
879+
}
880+
}
858881
}
859882

860883
// flag denotes whether there is any new nodes added

utils/tools.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ cmaple::Params::Params() {
576576
mutation_update_period = 25;
577577
min_taxa_parallel_placement = 1000;
578578
num_samples_per_thread = 5;
579+
num_samples_spr_during_inital_tree = 0;
579580
upward_search_extension = 5;
580581
failure_limit_sample = 5;
581582
failure_limit_subtree = 4;
@@ -1112,6 +1113,22 @@ void cmaple::parseArg(int argc, char* argv[], Params& params) {
11121113

11131114
continue;
11141115
}
1116+
if (strcmp(argv[cnt], "--samples-spr-initial") == 0 ||
1117+
strcmp(argv[cnt], "-samples-spr-initial") == 0) {
1118+
++cnt;
1119+
1120+
try {
1121+
params.num_samples_spr_during_inital_tree = convert_int(argv[cnt]);
1122+
} catch (std::invalid_argument e) {
1123+
outError(e.what());
1124+
}
1125+
1126+
if (params.num_samples_spr_during_inital_tree < 0) {
1127+
outError("<NUMBER> must be non-negative!");
1128+
}
1129+
1130+
continue;
1131+
}
11151132
if (strcmp(argv[cnt], "--upward-search-extend") == 0 ||
11161133
strcmp(argv[cnt], "-upward-search-extend") == 0) {
11171134
++cnt;

utils/tools.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,12 @@ class Params {
556556
* the first (parallel) search. Default: 2
557557
*/
558558
NumSeqsType upward_search_extension;
559+
560+
/**
561+
* The number of samples added to perform an SPR search
562+
* during the step of building an initial tree. Default: 10000
563+
*/
564+
NumSeqsType num_samples_spr_during_inital_tree;
559565

560566
/**
561567
* Name of the output alignment

0 commit comments

Comments
 (0)