Skip to content

No public description #1761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions centipede/command.cc
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,10 @@ int Command::Execute() {
struct pollfd poll_fd = {};
int poll_ret = -1;
auto poll_deadline = absl::Now() + options_.timeout;
// The `poll()` syscall can get interrupted: it sets errno==EINTR in that
// case. We should tolerate that.
bool sigterm_sent = false;
bool try_again = false;
do {
try_again = false;
// NOTE: `poll_fd` has to be reset every time.
poll_fd = {
/*fd=*/fork_server_->pipe_[1], // The file descriptor to wait for.
Expand All @@ -344,15 +345,36 @@ int Command::Execute() {
const int poll_timeout_ms = static_cast<int>(absl::ToInt64Milliseconds(
std::max(poll_deadline - absl::Now(), absl::Milliseconds(1))));
poll_ret = poll(&poll_fd, 1, poll_timeout_ms);
} while (poll_ret < 0 && errno == EINTR);
// The `poll()` syscall can get interrupted: it sets errno==EINTR in that
// case. We should tolerate that.
if (poll_ret < 0 && errno == EINTR) {
try_again = true;
continue;
}
if (poll_ret == 0 && !sigterm_sent) {
LogProblemInfo(
absl::StrCat("Timeout while waiting for fork server: timeout is ",
absl::FormatDuration(options_.timeout)));
LOG(INFO) << "Sending SIGTERM to the fork server PID "
<< fork_server_->pid_ << " and wait for 60s";
CHECK_NE(fork_server_->pid_, -1);
kill(fork_server_->pid_, SIGTERM);
sigterm_sent = true;
poll_deadline += absl::Seconds(60);
try_again = true;
continue;
}
} while (try_again);

if (poll_ret != 1 || (poll_fd.revents & POLLIN) == 0) {
// The fork server errored out or timed out, or some other error occurred,
// e.g. the syscall was interrupted.
if (poll_ret == 0) {
CHECK(sigterm_sent);
LogProblemInfo(
absl::StrCat("Timeout while waiting for fork server: timeout is ",
absl::FormatDuration(options_.timeout)));
"Fork server did not respond within 60s after SIGTERM was sent");
// TODO: xinhaoyuan - the right thing to do is to either properly
// recover or request early exit.
} else {
LogProblemInfo(absl::StrCat(
"Error while waiting for fork server: poll() returned ", poll_ret));
Expand Down
2 changes: 1 addition & 1 deletion centipede/command_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ TEST(CommandTest, ForkServer) {
cmd_options.timeout = absl::Seconds(2);
Command cmd{helper, std::move(cmd_options)};
ASSERT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
EXPECT_EQ(cmd.Execute(), EXIT_FAILURE);
EXPECT_EQ(cmd.Execute(), SIGTERM);
std::string log_contents;
ReadFromLocalFile(log, log_contents);
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
Expand Down
96 changes: 94 additions & 2 deletions centipede/runner_fork_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#else // __APPLE__
#include <linux/limits.h> // ARG_MAX
#endif // __APPLE__
#include <signal.h>
#include <sys/wait.h>
#include <unistd.h>

Expand Down Expand Up @@ -205,6 +206,48 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
if (pipe1 < 0) Exit("###open pipe1 failed\n");
Log("###Centipede fork server ready\n");

struct sigaction old_sigterm_act;
struct sigaction sigterm_act;
std::memset(&sigterm_act, 0, sizeof(struct sigaction));
sigterm_act.sa_handler = *[](int _sig) {};
if (sigaction(SIGTERM, &sigterm_act, &old_sigterm_act) != 0) {
Exit("###sigaction failed on SIGTERM for the fork server");
}

struct sigaction old_sigchld_act;
struct sigaction sigchld_act;
std::memset(&sigchld_act, 0, sizeof(struct sigaction));
sigchld_act.sa_handler = *[](int _sig) {};
if (sigaction(SIGCHLD, &sigchld_act, &old_sigchld_act) != 0) {
Exit("###sigaction failed on SIGCHLD for the fork server");
}

sigset_t old_sigset;
sigset_t server_sigset;
if (sigprocmask(SIG_SETMASK, nullptr, &server_sigset) != 0) {
Exit("###sigprocmask() failed to get the existing sigset\n");
}
if (sigaddset(&server_sigset, SIGTERM) != 0) {
Exit("###sigaddset() failed to add SIGTERM\n");
}
if (sigaddset(&server_sigset, SIGCHLD) != 0) {
Exit("###sigaddset() failed to add SIGCHLD\n");
}
if (sigprocmask(SIG_SETMASK, &server_sigset, &old_sigset) != 0) {
Exit("###sigprocmask() failed to set the fork server sigset\n");
}

sigset_t wait_sigset;
if (sigemptyset(&wait_sigset) != 0) {
Exit("###sigemptyset() failed\n");
}
if (sigaddset(&wait_sigset, SIGTERM) != 0) {
Exit("###sigaddset() failed to add SIGTERM to the wait sigset\n");
}
if (sigaddset(&wait_sigset, SIGCHLD) != 0) {
Exit("###sigaddset() failed to add SIGCHLD to the wait sigset\n");
}

// Loop.
while (true) {
Log("###Centipede fork server blocking on pipe0\n");
Expand All @@ -216,6 +259,15 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
if (pid < 0) {
Exit("###fork failed\n");
} else if (pid == 0) {
if (sigaction(SIGTERM, &old_sigterm_act, nullptr) != 0) {
Exit("###sigaction failed on SIGTERM for the child");
}
if (sigaction(SIGCHLD, &old_sigchld_act, nullptr) != 0) {
Exit("###sigaction failed on SIGCHLD for the child");
}
if (sigprocmask(SIG_SETMASK, &old_sigset, nullptr) != 0) {
Exit("###sigprocmask() failed to restore the previous sigset\n");
}
// Child process. Reset stdout/stderr and let it run normally.
for (int fd = 1; fd <= 2; fd++) {
lseek(fd, 0, SEEK_SET);
Expand All @@ -227,7 +279,27 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
} else {
// Parent process.
int status = -1;
if (waitpid(pid, &status, 0) < 0) Exit("###waitpid failed\n");
while (true) {
int sig = -1;
if (sigwait(&wait_sigset, &sig) != 0) {
Exit("###sigwait() failed\n");
}
if (sig == SIGCHLD) {
Log("###Got SIGCHLD\n");
if (waitpid(pid, &status, WNOHANG) < 0) {
Exit("###waitpid failed\n");
}
if (WIFEXITED(status) || WIFSIGNALED(status)) {
Log("###Got exit status\n");
break;
}
} else if (sig == SIGTERM) {
Log("###Got SIGTERM\n");
kill(pid, SIGTERM);
} else {
Exit("###Unknown signal from sigwait\n");
}
}
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == EXIT_SUCCESS)
Log("###Centipede fork returned EXIT_SUCCESS\n");
Expand All @@ -239,8 +311,28 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
Log("###Centipede fork crashed\n");
}
Log("###Centipede fork writing status to pipe1\n");
if (write(pipe1, &status, sizeof(status)) == -1)
if (write(pipe1, &status, sizeof(status)) == -1) {
Exit("###write to pipe1 failed\n");
}
// Deplete any remaining signals before the next execution. Controller
// won't send more signals after write succeeded.
{
sigset_t pending;
while (true) {
if (sigpending(&pending) != 0) {
Exit("###sigpending() failed\n");
}
if (sigismember(&pending, SIGTERM) ||
sigismember(&pending, SIGCHLD)) {
int unused_sig;
if (sigwait(&wait_sigset, &unused_sig) != 0) {
Exit("###sigwait() failed\n");
}
} else {
break;
}
}
}
}
}
// The only way out of the loop is via Exit() or return.
Expand Down