@@ -4,20 +4,172 @@ package libpod
4
4
5
5
import (
6
6
"context"
7
+ "fmt"
8
+ "time"
9
+
10
+ "github.com/containers/podman/v5/libpod/define"
11
+ "github.com/sirupsen/logrus"
7
12
)
8
13
9
- // createTimer systemd timers for healthchecks of a container
14
+ // healthcheckTimer manages the background goroutine for healthchecks
15
+ type healthcheckTimer struct {
16
+ container * Container
17
+ interval time.Duration
18
+ ctx context.Context
19
+ cancel context.CancelFunc
20
+ done chan struct {}
21
+ }
22
+
23
+ // Global map to track active timers (in a real implementation, this would be part of the runtime)
24
+ var activeTimers = make (map [string ]* healthcheckTimer )
25
+
26
+ // disableHealthCheckSystemd returns true if healthcheck should be disabled
27
+ // For non-systemd builds, we only disable if interval is 0
28
+ func (c * Container ) disableHealthCheckSystemd (isStartup bool ) bool {
29
+ if isStartup {
30
+ if c .config .StartupHealthCheckConfig != nil && c .config .StartupHealthCheckConfig .Interval == 0 {
31
+ return true
32
+ }
33
+ }
34
+ if c .config .HealthCheckConfig != nil && c .config .HealthCheckConfig .Interval == 0 {
35
+ return true
36
+ }
37
+ return false
38
+ }
39
+
40
+ // createTimer creates a goroutine-based timer for healthchecks of a container
10
41
func (c * Container ) createTimer (interval string , isStartup bool ) error {
42
+ if c .disableHealthCheckSystemd (isStartup ) {
43
+ return nil
44
+ }
45
+
46
+ // Parse the interval duration
47
+ duration , err := time .ParseDuration (interval )
48
+ if err != nil {
49
+ return err
50
+ }
51
+
52
+ // Stop any existing timer
53
+ if c .state .HCUnitName != "" {
54
+ c .stopHealthCheckTimer ()
55
+ }
56
+
57
+ // Create context for cancellation
58
+ ctx , cancel := context .WithCancel (context .Background ())
59
+
60
+ // Create timer struct
61
+ timer := & healthcheckTimer {
62
+ container : c ,
63
+ interval : duration ,
64
+ ctx : ctx ,
65
+ cancel : cancel ,
66
+ done : make (chan struct {}),
67
+ }
68
+
69
+ // Store timer reference globally and in container state
70
+ activeTimers [c .ID ()] = timer
71
+ c .state .HCUnitName = "goroutine-timer"
72
+
73
+ if err := c .save (); err != nil {
74
+ cancel ()
75
+ delete (activeTimers , c .ID ())
76
+ return fmt .Errorf ("saving container %s healthcheck timer: %w" , c .ID (), err )
77
+ }
78
+
79
+ // Start the background goroutine
80
+ go timer .run ()
81
+
82
+ logrus .Debugf ("Created goroutine-based healthcheck timer for container %s with interval %s" , c .ID (), interval )
11
83
return nil
12
84
}
13
85
14
- // startTimer starts a systemd timer for the healthchecks
86
+ // startTimer starts the goroutine-based timer for healthchecks
15
87
func (c * Container ) startTimer (isStartup bool ) error {
88
+ // Timer is already started in createTimer, nothing to do
16
89
return nil
17
90
}
18
91
19
- // removeTransientFiles removes the systemd timer and unit files
20
- // for the container
92
+ // removeTransientFiles stops the goroutine-based timer
21
93
func (c * Container ) removeTransientFiles (ctx context.Context , isStartup bool , unitName string ) error {
22
- return nil
94
+ return c .stopHealthCheckTimer ()
95
+ }
96
+
97
+ // stopHealthCheckTimer stops the background healthcheck goroutine
98
+ func (c * Container ) stopHealthCheckTimer () error {
99
+ timer , exists := activeTimers [c .ID ()]
100
+ if ! exists {
101
+ logrus .Debugf ("No active healthcheck timer found for container %s" , c .ID ())
102
+ return nil
103
+ }
104
+
105
+ logrus .Debugf ("Stopping healthcheck timer for container %s" , c .ID ())
106
+
107
+ // Cancel the context to stop the goroutine
108
+ timer .cancel ()
109
+
110
+ // Wait for the goroutine to finish (with timeout)
111
+ select {
112
+ case <- timer .done :
113
+ logrus .Debugf ("Healthcheck timer for container %s stopped gracefully" , c .ID ())
114
+ case <- time .After (5 * time .Second ):
115
+ logrus .Warnf ("Healthcheck timer for container %s did not stop within timeout" , c .ID ())
116
+ }
117
+
118
+ // Remove from active timers
119
+ delete (activeTimers , c .ID ())
120
+
121
+ // Clear the unit name
122
+ c .state .HCUnitName = ""
123
+ return c .save ()
124
+ }
125
+
126
+ // run executes the healthcheck in a loop with the specified interval
127
+ func (t * healthcheckTimer ) run () {
128
+ defer close (t .done )
129
+
130
+ ticker := time .NewTicker (t .interval )
131
+ defer ticker .Stop ()
132
+
133
+ logrus .Debugf ("Starting healthcheck timer for container %s with interval %s" , t .container .ID (), t .interval )
134
+
135
+ for {
136
+ select {
137
+ case <- t .ctx .Done ():
138
+ logrus .Debugf ("Healthcheck timer for container %s stopped" , t .container .ID ())
139
+ return
140
+ case <- ticker .C :
141
+ // Run the healthcheck
142
+ if err := t .runHealthCheck (); err != nil {
143
+ logrus .Errorf ("Healthcheck failed for container %s: %v" , t .container .ID (), err )
144
+ }
145
+ }
146
+ }
147
+ }
148
+
149
+ // runHealthCheck executes a single healthcheck
150
+ func (t * healthcheckTimer ) runHealthCheck () error {
151
+ // Check if container is still running (without holding lock to avoid deadlock)
152
+ state , err := t .container .State ()
153
+ if err != nil {
154
+ return err
155
+ }
156
+
157
+ if state != define .ContainerStateRunning {
158
+ logrus .Debugf ("Container %s is not running (state: %v), skipping healthcheck" , t .container .ID (), state )
159
+ return nil
160
+ }
161
+
162
+ // Get healthcheck config (without holding lock)
163
+ healthConfig := t .container .HealthCheckConfig ()
164
+ if healthConfig == nil {
165
+ logrus .Debugf ("No healthcheck config found for container %s, skipping healthcheck" , t .container .ID ())
166
+ return nil
167
+ }
168
+
169
+ // Run the healthcheck - let runHealthCheck handle its own locking internally
170
+ ctx , cancel := context .WithTimeout (context .Background (), healthConfig .Timeout )
171
+ defer cancel ()
172
+
173
+ _ , _ , err = t .container .runHealthCheck (ctx , false )
174
+ return err
23
175
}
0 commit comments