Skip to content

Commit e6d0171

Browse files
author
Farzin
committed
V1 Stable after PLDI submission
1 parent 72feded commit e6d0171

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+67433
-2048
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,5 @@ build/
4343

4444
# Custom makefile artifacts
4545
.deps/
46+
47+
wellcoordination/workload/**

2.log

-10
This file was deleted.

3.log

-10
This file was deleted.

cart-res.sh~

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
PERCENTAGES="5 15 25";
4+
5+
for n in $( seq 3 7 ); do
6+
for p in $PERCENTAGES; do
7+
grep -r -w "total average" wellcoordination/workload/$n-4000000-$p/cart/results/* | sort -t: -n -k2 | awk '{split($0,a); count += 1; sum += a[8]} END {print sum/count;}';
8+
done
9+
done

compile.sh~

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
source load_modules.sh
2-
./build.py crash-consensus;
2+
./build.py crash-consensus -b debug;
33
crash-consensus/libgen/export.sh gcc-release;
4-
crash-consensus/demo/using_conan_fully/build.sh gcc-release;
4+
crash-consensus/demo/using_conan_fully/build.sh gcc-debug;
55
#crash-consensus/experiments/build.sh
66

77
# fix this

crash-consensus/src/consensus.cpp

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
#include "consensus.hpp"
22

33
#include <iostream>
4-
// #include <algorithm>
5-
// #include <functional>
6-
7-
// #include "protocol/nb-protocol.cpp"
84

95
namespace dory {
106
RdmaConsensus::RdmaConsensus(int my_id, std::vector<int>& remote_ids,
@@ -20,7 +16,7 @@ RdmaConsensus::RdmaConsensus(int my_id, std::vector<int>& remote_ids,
2016
LOGGER_INIT(logger, ConsensusConfig::logger_prefix) {
2117
using namespace units;
2218

23-
allocated_size = 2_GiB;
19+
allocated_size = 4_GiB;
2420
alignment = 64;
2521

2622
run();

crash-consensus/src/consensus.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ class RdmaConsensus {
5555
int propose(uint8_t *buf, size_t len);
5656

5757
inline int potentialLeader() { return potential_leader; }
58+
inline bool amILeader() { return am_I_leader.load(); }
59+
60+
void stopHeartbeatThread() {leader_election->stopHeartbreat();}
5861

5962
inline std::pair<uint64_t, uint64_t> proposedReplicatedRange() {
6063
return std::make_pair(majW->range_start, majW->range_end);

crash-consensus/src/crash-consensus.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,12 @@ ProposeError Consensus::propose(uint8_t *buf, size_t len) {
4040
}
4141

4242
int Consensus::potentialLeader() { return impl->potentialLeader(); }
43+
bool Consensus::amILeader() { return impl->amILeader(); }
4344
bool Consensus::blockedResponse() { return impl->response_blocked->load(); }
4445

4546
std::pair<uint64_t, uint64_t> Consensus::proposedReplicatedRange() {
4647
return impl->proposedReplicatedRange();
4748
}
49+
50+
void Consensus::stopHeartbeatThread() { impl->stopHeartbeatThread(); }
4851
} // namespace dory

crash-consensus/src/crash-consensus.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,12 @@ class Consensus {
3737

3838
ProposeError propose(uint8_t *buf, size_t len);
3939
int potentialLeader();
40+
bool amILeader();
4041
bool blockedResponse();
4142
std::pair<uint64_t, uint64_t> proposedReplicatedRange();
4243

44+
void stopHeartbeatThread();
45+
4346
private:
4447
std::unique_ptr<RdmaConsensus> impl;
4548
};

crash-consensus/src/follower.hpp

+2-5
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ class Follower {
101101

102102
private:
103103
void run() {
104-
std::cout << "started follower thread" << std::endl;
105104
int loops = 0;
106105
constexpr unsigned mask = (1 << 14) - 1; // Must be power of 2 minus 1
107106

@@ -123,17 +122,15 @@ class Follower {
123122
log_mutex.lock();
124123
}
125124
}
126-
std::cout << "checking queue" << std::endl;
127125
auto has_next = iter->sampleNext();
128126
if (!has_next) {
129127
continue;
130128
}
131129

132130
ParsedSlot pslot(iter->location());
133131
// std::cout << "Discovered element on position " <<
134-
// uintptr_t(iter->location()) << std::endl; std::cout << "Accepted
135-
// proposal " << pslot.acceptedProposal()
136-
// << std::endl;
132+
// uintptr_t(iter->location()) << std::endl;
133+
// std::cout << "Accepted proposal " << pslot.acceptedProposal() << std::endl;
137134
// std::cout << "First undecided offset " << pslot.firstUndecidedOffset()
138135
// << std::endl;
139136
// auto [buf, len] = pslot.payload();

crash-consensus/src/leader-switch.hpp

+58-31
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ class LeaderHeartbeat {
4444
// Careful, there is a move assignment happening!
4545
}
4646

47+
std::atomic<int> start_id;
48+
4749
void startPoller() {
4850
read_seq = 0;
4951

@@ -202,7 +204,7 @@ class LeaderHeartbeat {
202204
// std::cout << std::endl;
203205
// }
204206

205-
if (leader_pid() == ctx->cc.my_id) {
207+
if (leader_pid(start_id.load()) == ctx->cc.my_id) {
206208
want_leader.store(true);
207209
} else {
208210
std::this_thread::sleep_for(std::chrono::milliseconds(50));
@@ -242,13 +244,15 @@ class LeaderHeartbeat {
242244
};
243245

244246
private:
245-
int leader_pid() {
247+
int leader_pid(int start_index) {
246248
int leader_id = -1;
247249

248-
for (auto &pid : ids) {
250+
for (int i = 0; i < static_cast<int>(ids.size()); i++) {
251+
int pid = ids[i];
249252
// std::cout << pid << " " << status[pid].consecutive_updates <<
250253
// std::endl;
251254
if (status[pid].consecutive_updates > 2) {
255+
// leader_id = pid;
252256
leader_id = pid;
253257
break;
254258
}
@@ -533,7 +537,10 @@ class LeaderSwitcher {
533537
want_leader{&heartbeat->wantLeaderSignal()},
534538
read_slots{ctx->scratchpad.writeLeaderChangeSlots()},
535539
sz{read_slots.size()},
540+
// leader_start_index{(heartbeat->start_id == 0 ? 0 : heartbeat->start_id + 1)},
536541
permission_asker{ctx} {
542+
leader_start_index.store((heartbeat->start_id.load() == 0 ? 0 : heartbeat->start_id.load() + 1));
543+
std::cout << "inside leaderswitcher constructor: " << leader_start_index << std::endl;
537544
prepareScanner();
538545
}
539546

@@ -545,7 +552,7 @@ class LeaderSwitcher {
545552
int force_reset = 0;
546553
auto constexpr shift = 8 * sizeof(uintptr_t) - 1;
547554

548-
for (int i = 0; i < static_cast<int>(sz); i++) {
555+
for (int i = leader_start_index.load(); i < static_cast<int>(sz); i++) {
549556
reading[i] = *reinterpret_cast<uint64_t *>(read_slots[i]);
550557
force_reset = static_cast<int>(reading[i] >> shift);
551558
reading[i] &= (1UL << shift) - 1;
@@ -556,12 +563,12 @@ class LeaderSwitcher {
556563
break;
557564
}
558565
}
559-
560566
// If you discovered a new request for a leader, notify the main event loop
561567
// to give permissions to him and switch to follower.
562568
if (requester > 0) {
563-
// std::cout << "Process with pid " << requester
564-
// << " asked for permissions" << std::endl;
569+
std::cout << "index: " << leader_start_index.load() << std::endl;
570+
std::cout << "Process with pid " << requester
571+
<< " asked for permissions" << std::endl;
565572
leader.store(dory::Leader(requester, reading[requester], force_reset));
566573
want_leader->store(false);
567574
} else {
@@ -594,10 +601,10 @@ class LeaderSwitcher {
594601
Leader current_leader = leader.load();
595602
if (current_leader != prev_leader || force_permission_request) {
596603
// std::cout << "Adjusting connections to leader ("
597-
// << int(current_leader.requester) << " "
598-
// << current_leader.requester_value << ") " << (current_leader
599-
// != prev_leader) << " " << force_permission_request <<
600-
// std::endl;
604+
// << int(current_leader.requester) << " "
605+
// << current_leader.requester_value << ") " << (current_leader
606+
// != prev_leader) << " " << force_permission_request <<
607+
// std::endl;
601608

602609
auto orig_leader = prev_leader;
603610
prev_leader = current_leader;
@@ -611,13 +618,13 @@ class LeaderSwitcher {
611618

612619
// GET_TIMESTAMP(ts_start);
613620

614-
// std::cout << "Asking for permissions: " << hard_reset << std::endl;
621+
std::cout << "Asking for permissions: " << hard_reset << std::endl;
615622
// Ask for permission. Wait for everybody to reply
616623
permission_asker.askForPermissions(hard_reset);
617624

618625
// GET_TIMESTAMP(ts_mid);
619626

620-
// std::cout << "Waiting for approval" << std::endl;
627+
std::cout << "Waiting for approval" << std::endl;
621628
// In order to avoid a distributed deadlock (when two processes try
622629
// to become leaders at the same time), we bail whe the leader
623630
// changes.
@@ -643,7 +650,7 @@ class LeaderSwitcher {
643650
// std::cout << "Asking for permissions: " << hard_reset << std::endl;
644651

645652
// std::cout << "I (process " << c_ctx->my_id << ") got leader "
646-
// << "approval" << std::endl;
653+
// << "approval" << std::endl;
647654

648655
// GET_TIMESTAMP(ts_start);
649656
if (hard_reset) {
@@ -828,6 +835,7 @@ class LeaderSwitcher {
828835
std::vector<uint8_t *> dummy;
829836
std::vector<uint8_t *> &read_slots;
830837
size_t sz;
838+
std::atomic<int> leader_start_index;
831839

832840
LeaderPermissionAsker permission_asker;
833841

@@ -873,6 +881,15 @@ class LeaderElection {
873881
return leader_switcher.leaderSignal();
874882
}
875883

884+
// made public
885+
void stopHeartbreat() {
886+
if (hb_started) {
887+
hb_exit_signal.set_value();
888+
heartbeat_thd.join();
889+
hb_started = false;
890+
}
891+
}
892+
876893
private:
877894
void startHeartbeat() {
878895
if (hb_started) {
@@ -881,7 +898,15 @@ class LeaderElection {
881898
hb_started = true;
882899

883900
leader_heartbeat = LeaderHeartbeat(&ctx);
901+
if(threadConfig.prefix == "Secondary-")
902+
leader_heartbeat.start_id.store(1);
903+
else
904+
leader_heartbeat.start_id.store(0);
884905
std::future<void> ftr = hb_exit_signal.get_future();
906+
uint64_t start_hb = std::chrono::duration_cast<std::chrono::microseconds>(
907+
std::chrono::high_resolution_clock::now().time_since_epoch())
908+
.count();
909+
std::cout << "started heartbeat at " << start_hb << std::endl;
885910
heartbeat_thd = std::thread([this, ftr = std::move(ftr)]() {
886911
leader_heartbeat.startPoller();
887912

@@ -947,14 +972,22 @@ class LeaderElection {
947972
response_blocked.store(false);
948973
leader_heartbeat.scanHeartbeats();
949974
} else if (prev_command == 'c') {
950-
response_blocked.store(true);
975+
response_blocked.store(true);
951976
leader_heartbeat.retract();
952977
}
953978

954979
prev_command = current_command;
955980

956981
std::this_thread::sleep_for(std::chrono::milliseconds(1));
957982

983+
// ADDED FARZIN
984+
// std::cout << "checking exit" << std::endl;
985+
if (ftr.wait_for(std::chrono::seconds(0)) ==
986+
std::future_status::ready) {
987+
std::cout << "exiting heartbeat..." << std::endl;
988+
break;
989+
}
990+
958991
// if (i == 0) {
959992
// if (ftr.wait_for(std::chrono::seconds(0)) !=
960993
// std::future_status::timeout) {
@@ -965,7 +998,8 @@ class LeaderElection {
965998
// std::this_thread::sleep_for(std::chrono::seconds(10));
966999
}
9671000

968-
file_watcher_thd.join();
1001+
// file_watcher_thd.join();
1002+
file_watcher_thd.detach();
9691003
});
9701004

9711005
if (threadConfig.pinThreads) {
@@ -977,32 +1011,25 @@ class LeaderElection {
9771011
}
9781012
}
9791013

980-
void stopHeartbreat() {
981-
if (hb_started) {
982-
hb_exit_signal.set_value();
983-
heartbeat_thd.join();
984-
hb_started = false;
985-
}
986-
}
987-
9881014
void startLeaderSwitcher() {
9891015
if (switcher_started) {
9901016
throw std::runtime_error("Already started");
9911017
}
9921018
switcher_started = true;
993-
9941019
leader_switcher = LeaderSwitcher(&ctx, &leader_heartbeat);
9951020
std::future<void> ftr = switcher_exit_signal.get_future();
9961021
switcher_thd = std::thread([this, ftr = std::move(ftr)]() {
9971022
leader_switcher.startPoller();
9981023
for (unsigned long long i = 0;; i = (i + 1) & iterations_ftr_check) {
9991024
leader_switcher.scanPermissions();
1000-
if (i == 0) {
1001-
if (ftr.wait_for(std::chrono::seconds(0)) !=
1002-
std::future_status::timeout) {
1003-
break;
1004-
}
1005-
}
1025+
// if (i == 0) {
1026+
// std::cout << "inside exit check" << std::endl;
1027+
// if (ftr.wait_for(std::chrono::seconds(0)) !=
1028+
// std::future_status::timeout) {
1029+
// std::cout << "exiting heartbeat..." << std::endl;
1030+
// break;
1031+
// }
1032+
// }
10061033
}
10071034
});
10081035

demo/src/beb.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
using namespace dory;
2626
using dory::units::operator""_GiB;
2727

28-
size_t allocated_size = 1_GiB;
28+
size_t allocated_size = 4_GiB;
2929
int alignment = 64;
3030

3131

extract-responsetime.sh

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
PERCENTAGES="25 15 5";
4+
5+
for p in $PERCENTAGES; do
6+
printf $p" :\n";
7+
for n in $( seq 3 7 ); do
8+
# printf $n" :\n";
9+
grep -r -w "sum_response_time:" wellcoordination/workload/$n-4000000-$p/$1/results/* | sort -t: -n -k2 | head -1 | awk '{split($0,a); print 4000000000/a[2]}';
10+
11+
done
12+
done
13+
14+
#grep -r -w "total average" wellcoordination/workload/4-4000000-50/courseware/results/hamsaz* | sort -t: -n -k2 | awk '{split($0,a); sum += a[8]; count += 1;} END{print sum/count}'
15+
# for p in $PERCENTAGES; do
16+
# printf "prec: "$p" ";
17+
# for n in $( seq 3 7 ); do
18+
# printf "node: "$n" \n";
19+
# for i in $( seq 0 4 ) ; do
20+
# grep -r -w "calls to $i" wellcoordination/workload/4-4000000-$p/$1/results/$2* | sort -t: -n -k2 | awk '{split($0,a); if(a[9] != "-nan") {sum += a[9]; count += 1;}} END{print sum/count;}'
21+
# done
22+
# printf "###\n";
23+
# done
24+
# printf "****\n";
25+
# done

0 commit comments

Comments
 (0)