Skip to content

Commit 565abde

Browse files
xinhaoyuancopybara-github
authored andcommitted
No public description
PiperOrigin-RevId: 775821600
1 parent 45a1c3a commit 565abde

File tree

3 files changed

+122
-8
lines changed

3 files changed

+122
-8
lines changed

centipede/command.cc

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -333,9 +333,10 @@ int Command::Execute() {
333333
struct pollfd poll_fd = {};
334334
int poll_ret = -1;
335335
auto poll_deadline = absl::Now() + options_.timeout;
336-
// The `poll()` syscall can get interrupted: it sets errno==EINTR in that
337-
// case. We should tolerate that.
336+
bool sigterm_sent = false;
337+
bool try_again = false;
338338
do {
339+
try_again = false;
339340
// NOTE: `poll_fd` has to be reset every time.
340341
poll_fd = {
341342
/*fd=*/fork_server_->pipe_[1], // The file descriptor to wait for.
@@ -344,15 +345,36 @@ int Command::Execute() {
344345
const int poll_timeout_ms = static_cast<int>(absl::ToInt64Milliseconds(
345346
std::max(poll_deadline - absl::Now(), absl::Milliseconds(1))));
346347
poll_ret = poll(&poll_fd, 1, poll_timeout_ms);
347-
} while (poll_ret < 0 && errno == EINTR);
348+
// The `poll()` syscall can get interrupted: it sets errno==EINTR in that
349+
// case. We should tolerate that.
350+
if (poll_ret < 0 && errno == EINTR) {
351+
try_again = true;
352+
continue;
353+
}
354+
if (poll_ret == 0 && !sigterm_sent) {
355+
LogProblemInfo(
356+
absl::StrCat("Timeout while waiting for fork server: timeout is ",
357+
absl::FormatDuration(options_.timeout)));
358+
LOG(INFO) << "Sending SIGTERM to the fork server PID "
359+
<< fork_server_->pid_ << " and wait for 60s";
360+
CHECK_NE(fork_server_->pid_, -1);
361+
kill(fork_server_->pid_, SIGTERM);
362+
sigterm_sent = true;
363+
poll_deadline += absl::Seconds(60);
364+
try_again = true;
365+
continue;
366+
}
367+
} while (try_again);
348368

349369
if (poll_ret != 1 || (poll_fd.revents & POLLIN) == 0) {
350370
// The fork server errored out or timed out, or some other error occurred,
351371
// e.g. the syscall was interrupted.
352372
if (poll_ret == 0) {
373+
CHECK(sigterm_sent);
353374
LogProblemInfo(
354-
absl::StrCat("Timeout while waiting for fork server: timeout is ",
355-
absl::FormatDuration(options_.timeout)));
375+
"Fork server did not respond within 60s after SIGTERM was sent");
376+
// TODO: xinhaoyuan - the right thing to do is to either properly
377+
// recover or request early exit.
356378
} else {
357379
LogProblemInfo(absl::StrCat(
358380
"Error while waiting for fork server: poll() returned ", poll_ret));

centipede/command_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ TEST(CommandTest, ForkServer) {
190190
cmd_options.timeout = absl::Seconds(2);
191191
Command cmd{helper, std::move(cmd_options)};
192192
ASSERT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
193-
EXPECT_EQ(cmd.Execute(), EXIT_FAILURE);
193+
EXPECT_EQ(cmd.Execute(), SIGTERM);
194194
std::string log_contents;
195195
ReadFromLocalFile(log, log_contents);
196196
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));

centipede/runner_fork_server.cc

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#else // __APPLE__
5858
#include <linux/limits.h> // ARG_MAX
5959
#endif // __APPLE__
60+
#include <signal.h>
6061
#include <sys/wait.h>
6162
#include <unistd.h>
6263

@@ -205,6 +206,48 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
205206
if (pipe1 < 0) Exit("###open pipe1 failed\n");
206207
Log("###Centipede fork server ready\n");
207208

209+
struct sigaction old_sigterm_act;
210+
struct sigaction sigterm_act;
211+
std::memset(&sigterm_act, 0, sizeof(struct sigaction));
212+
sigterm_act.sa_handler = *[](int _sig) {};
213+
if (sigaction(SIGTERM, &sigterm_act, &old_sigterm_act) != 0) {
214+
Exit("###sigaction failed on SIGTERM for the fork server");
215+
}
216+
217+
struct sigaction old_sigchld_act;
218+
struct sigaction sigchld_act;
219+
std::memset(&sigchld_act, 0, sizeof(struct sigaction));
220+
sigchld_act.sa_handler = *[](int _sig) {};
221+
if (sigaction(SIGCHLD, &sigchld_act, &old_sigchld_act) != 0) {
222+
Exit("###sigaction failed on SIGCHLD for the fork server");
223+
}
224+
225+
sigset_t old_sigset;
226+
sigset_t server_sigset;
227+
if (sigprocmask(SIG_SETMASK, nullptr, &server_sigset) != 0) {
228+
Exit("###sigprocmask() failed to get the existing sigset\n");
229+
}
230+
if (sigaddset(&server_sigset, SIGTERM) != 0) {
231+
Exit("###sigaddset() failed to add SIGTERM\n");
232+
}
233+
if (sigaddset(&server_sigset, SIGCHLD) != 0) {
234+
Exit("###sigaddset() failed to add SIGCHLD\n");
235+
}
236+
if (sigprocmask(SIG_SETMASK, &server_sigset, &old_sigset) != 0) {
237+
Exit("###sigprocmask() failed to set the fork server sigset\n");
238+
}
239+
240+
sigset_t wait_sigset;
241+
if (sigemptyset(&wait_sigset) != 0) {
242+
Exit("###sigemptyset() failed\n");
243+
}
244+
if (sigaddset(&wait_sigset, SIGTERM) != 0) {
245+
Exit("###sigaddset() failed to add SIGTERM to the wait sigset\n");
246+
}
247+
if (sigaddset(&wait_sigset, SIGCHLD) != 0) {
248+
Exit("###sigaddset() failed to add SIGCHLD to the wait sigset\n");
249+
}
250+
208251
// Loop.
209252
while (true) {
210253
Log("###Centipede fork server blocking on pipe0\n");
@@ -216,6 +259,15 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
216259
if (pid < 0) {
217260
Exit("###fork failed\n");
218261
} else if (pid == 0) {
262+
if (sigaction(SIGTERM, &old_sigterm_act, nullptr) != 0) {
263+
Exit("###sigaction failed on SIGTERM for the child");
264+
}
265+
if (sigaction(SIGCHLD, &old_sigchld_act, nullptr) != 0) {
266+
Exit("###sigaction failed on SIGCHLD for the child");
267+
}
268+
if (sigprocmask(SIG_SETMASK, &old_sigset, nullptr) != 0) {
269+
Exit("###sigprocmask() failed to restore the previous sigset\n");
270+
}
219271
// Child process. Reset stdout/stderr and let it run normally.
220272
for (int fd = 1; fd <= 2; fd++) {
221273
lseek(fd, 0, SEEK_SET);
@@ -227,7 +279,27 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
227279
} else {
228280
// Parent process.
229281
int status = -1;
230-
if (waitpid(pid, &status, 0) < 0) Exit("###waitpid failed\n");
282+
while (true) {
283+
int sig = -1;
284+
if (sigwait(&wait_sigset, &sig) != 0) {
285+
Exit("###sigwait() failed\n");
286+
}
287+
if (sig == SIGCHLD) {
288+
Log("###Got SIGCHLD\n");
289+
if (waitpid(pid, &status, WNOHANG) < 0) {
290+
Exit("###waitpid failed\n");
291+
}
292+
if (WIFEXITED(status) || WIFSIGNALED(status)) {
293+
Log("###Got exit status\n");
294+
break;
295+
}
296+
} else if (sig == SIGTERM) {
297+
Log("###Got SIGTERM\n");
298+
kill(pid, SIGTERM);
299+
} else {
300+
Exit("###Unknown signal from sigwait\n");
301+
}
302+
}
231303
if (WIFEXITED(status)) {
232304
if (WEXITSTATUS(status) == EXIT_SUCCESS)
233305
Log("###Centipede fork returned EXIT_SUCCESS\n");
@@ -239,8 +311,28 @@ __attribute__((constructor(150))) void ForkServerCallMeVeryEarly() {
239311
Log("###Centipede fork crashed\n");
240312
}
241313
Log("###Centipede fork writing status to pipe1\n");
242-
if (write(pipe1, &status, sizeof(status)) == -1)
314+
if (write(pipe1, &status, sizeof(status)) == -1) {
243315
Exit("###write to pipe1 failed\n");
316+
}
317+
// Deplete any remaining signals before the next execution. Controller
318+
// won't send more signals after write succeeded.
319+
{
320+
sigset_t pending;
321+
while (true) {
322+
if (sigpending(&pending) != 0) {
323+
Exit("###sigpending() failed\n");
324+
}
325+
if (sigismember(&pending, SIGTERM) ||
326+
sigismember(&pending, SIGCHLD)) {
327+
int unused_sig;
328+
if (sigwait(&wait_sigset, &unused_sig) != 0) {
329+
Exit("###sigwait() failed\n");
330+
}
331+
} else {
332+
break;
333+
}
334+
}
335+
}
244336
}
245337
}
246338
// The only way out of the loop is via Exit() or return.

0 commit comments

Comments
 (0)