Skip to content

Commit 9f93188

Browse files
author
Samuel Archambault
committed
healthcheck for non-systemd
1 parent 8900d8e commit 9f93188

File tree

1 file changed

+157
-5
lines changed

1 file changed

+157
-5
lines changed

libpod/healthcheck_nosystemd_linux.go

Lines changed: 157 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,172 @@ package libpod
44

55
import (
66
"context"
7+
"fmt"
8+
"time"
9+
10+
"github.com/containers/podman/v5/libpod/define"
11+
"github.com/sirupsen/logrus"
712
)
813

9-
// createTimer systemd timers for healthchecks of a container
14+
// healthcheckTimer manages the background goroutine for healthchecks
15+
type healthcheckTimer struct {
16+
container *Container
17+
interval time.Duration
18+
ctx context.Context
19+
cancel context.CancelFunc
20+
done chan struct{}
21+
}
22+
23+
// Global map to track active timers (in a real implementation, this would be part of the runtime)
24+
var activeTimers = make(map[string]*healthcheckTimer)
25+
26+
// disableHealthCheckSystemd returns true if healthcheck should be disabled
27+
// For non-systemd builds, we only disable if interval is 0
28+
func (c *Container) disableHealthCheckSystemd(isStartup bool) bool {
29+
if isStartup {
30+
if c.config.StartupHealthCheckConfig != nil && c.config.StartupHealthCheckConfig.Interval == 0 {
31+
return true
32+
}
33+
}
34+
if c.config.HealthCheckConfig != nil && c.config.HealthCheckConfig.Interval == 0 {
35+
return true
36+
}
37+
return false
38+
}
39+
40+
// createTimer creates a goroutine-based timer for healthchecks of a container
1041
func (c *Container) createTimer(interval string, isStartup bool) error {
42+
if c.disableHealthCheckSystemd(isStartup) {
43+
return nil
44+
}
45+
46+
// Parse the interval duration
47+
duration, err := time.ParseDuration(interval)
48+
if err != nil {
49+
return err
50+
}
51+
52+
// Stop any existing timer
53+
if c.state.HCUnitName != "" {
54+
c.stopHealthCheckTimer()
55+
}
56+
57+
// Create context for cancellation
58+
ctx, cancel := context.WithCancel(context.Background())
59+
60+
// Create timer struct
61+
timer := &healthcheckTimer{
62+
container: c,
63+
interval: duration,
64+
ctx: ctx,
65+
cancel: cancel,
66+
done: make(chan struct{}),
67+
}
68+
69+
// Store timer reference globally and in container state
70+
activeTimers[c.ID()] = timer
71+
c.state.HCUnitName = "goroutine-timer"
72+
73+
if err := c.save(); err != nil {
74+
cancel()
75+
delete(activeTimers, c.ID())
76+
return fmt.Errorf("saving container %s healthcheck timer: %w", c.ID(), err)
77+
}
78+
79+
// Start the background goroutine
80+
go timer.run()
81+
82+
logrus.Debugf("Created goroutine-based healthcheck timer for container %s with interval %s", c.ID(), interval)
1183
return nil
1284
}
1385

14-
// startTimer starts a systemd timer for the healthchecks
86+
// startTimer starts the goroutine-based timer for healthchecks
1587
func (c *Container) startTimer(isStartup bool) error {
88+
// Timer is already started in createTimer, nothing to do
1689
return nil
1790
}
1891

19-
// removeTransientFiles removes the systemd timer and unit files
20-
// for the container
92+
// removeTransientFiles stops the goroutine-based timer
2193
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool, unitName string) error {
22-
return nil
94+
return c.stopHealthCheckTimer()
95+
}
96+
97+
// stopHealthCheckTimer stops the background healthcheck goroutine
98+
func (c *Container) stopHealthCheckTimer() error {
99+
timer, exists := activeTimers[c.ID()]
100+
if !exists {
101+
logrus.Debugf("No active healthcheck timer found for container %s", c.ID())
102+
return nil
103+
}
104+
105+
logrus.Debugf("Stopping healthcheck timer for container %s", c.ID())
106+
107+
// Cancel the context to stop the goroutine
108+
timer.cancel()
109+
110+
// Wait for the goroutine to finish (with timeout)
111+
select {
112+
case <-timer.done:
113+
logrus.Debugf("Healthcheck timer for container %s stopped gracefully", c.ID())
114+
case <-time.After(5 * time.Second):
115+
logrus.Warnf("Healthcheck timer for container %s did not stop within timeout", c.ID())
116+
}
117+
118+
// Remove from active timers
119+
delete(activeTimers, c.ID())
120+
121+
// Clear the unit name
122+
c.state.HCUnitName = ""
123+
return c.save()
124+
}
125+
126+
// run executes the healthcheck in a loop with the specified interval
127+
func (t *healthcheckTimer) run() {
128+
defer close(t.done)
129+
130+
ticker := time.NewTicker(t.interval)
131+
defer ticker.Stop()
132+
133+
logrus.Debugf("Starting healthcheck timer for container %s with interval %s", t.container.ID(), t.interval)
134+
135+
for {
136+
select {
137+
case <-t.ctx.Done():
138+
logrus.Debugf("Healthcheck timer for container %s stopped", t.container.ID())
139+
return
140+
case <-ticker.C:
141+
// Run the healthcheck
142+
if err := t.runHealthCheck(); err != nil {
143+
logrus.Errorf("Healthcheck failed for container %s: %v", t.container.ID(), err)
144+
}
145+
}
146+
}
147+
}
148+
149+
// runHealthCheck executes a single healthcheck
150+
func (t *healthcheckTimer) runHealthCheck() error {
151+
// Check if container is still running (without holding lock to avoid deadlock)
152+
state, err := t.container.State()
153+
if err != nil {
154+
return err
155+
}
156+
157+
if state != define.ContainerStateRunning {
158+
logrus.Debugf("Container %s is not running (state: %v), skipping healthcheck", t.container.ID(), state)
159+
return nil
160+
}
161+
162+
// Get healthcheck config (without holding lock)
163+
healthConfig := t.container.HealthCheckConfig()
164+
if healthConfig == nil {
165+
logrus.Debugf("No healthcheck config found for container %s, skipping healthcheck", t.container.ID())
166+
return nil
167+
}
168+
169+
// Run the healthcheck - let runHealthCheck handle its own locking internally
170+
ctx, cancel := context.WithTimeout(context.Background(), healthConfig.Timeout)
171+
defer cancel()
172+
173+
_, _, err = t.container.runHealthCheck(ctx, false)
174+
return err
23175
}

0 commit comments

Comments
 (0)