File tree Expand file tree Collapse file tree 7 files changed +73
-4
lines changed Expand file tree Collapse file tree 7 files changed +73
-4
lines changed Original file line number Diff line number Diff line change @@ -17,7 +17,7 @@ concourse_group: "{{ concourse_user }}"
1717concourse_gid : " {{ concourse_uid }}"
1818concourse_force_restart : no
1919concourse_service_enabled : yes
20-
20+ concourse_service_watchdog_enabled : yes
2121
2222# Concourse source variables
2323
Original file line number Diff line number Diff line change 2424 dest : " {{ concourse_worker_launcher_path }}"
2525 - src : concourse-retire-worker.j2
2626 dest : " {{ concourse_retire_worker_path }}"
27+ - src : concourse-worker-watchdog.j2
28+ dest : " {{ concourse_install_dir }}/concourse-worker-watchdog"
2729
2830- name : create worker service | concourse
2931 template :
30- src : concourse-worker.service.j2
31- dest : /etc/systemd/system/concourse-worker.service
32+ src : " {{ item['src'] }} "
33+ dest : " {{ item['dest'] }} "
3234 owner : root
3335 force : yes
3436 become : yes
3537 become_user : root
38+ with_items :
39+ - src : concourse-worker.service.j2
40+ dest : /etc/systemd/system/concourse-worker.service
41+ - src : concourse-worker-watchdog.service.j2
42+ dest : /etc/systemd/system/concourse-worker-watchdog.service
3643 notify :
3744 - restart concourse worker
Original file line number Diff line number Diff line change 1212 become : yes
1313 when : concourse_worker
1414
15+ - name : configure worker watchdog service | concourse
16+ service :
17+ name : concourse-worker-watchdog
18+ enabled : " {{ concourse_service_watchdog_enabled }}"
19+ become : yes
20+ when : concourse_worker
21+
1522- name : start web service | concourse
1623 service :
1724 name : concourse-web
2532 state : started
2633 become : yes
2734 when : concourse_worker and concourse_service_enabled
35+
36+ - name : start worker watchdog service | concourse
37+ service :
38+ name : concourse-worker-watchdog
39+ state : started
40+ become : yes
41+ when : concourse_worker and concourse_service_enabled
Original file line number Diff line number Diff line change @@ -9,7 +9,7 @@ export {{ key }}="{{ value }}"
99# If $1 PID of concourse worker is provided, do a kill instead of an api call
1010# Mostly used by systemd for concourse compatiility issues https://github.com/concourse/concourse/pull/3929
1111
12- until ! curl --fail 127.0.0.1:7777/ping; do
12+ until ! curl --silent -- fail 127.0.0.1:7777/ping; do
1313
1414 if [[ -z " $1 " ]]; then
1515 {{ concourse_binary_path }} retire-worker \
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+
3+ watchdog () {
4+ # WORKER_PID=$1
5+
6+ RETRY=3
7+ while(true); do
8+ FAIL=0
9+
10+ curl --silent 127.0.0.1:8888 || FAIL=1
11+
12+ # if [[ $FAIL -eq 0 ]]; then
13+ if [[ $FAIL -eq 1 ]]; then
14+ if [[ $RETRY -ne 0 ]]; then
15+ echo " retry $RETRY "
16+ (( RETRY= RETRY- 1 ))
17+ else
18+ echo " restart worker"
19+ # /bin/systemd-notify --pid=$WORKER_PID "WATCHDOG=1";
20+ /bin/systemctl restart concourse-worker
21+ RETRY=3
22+ fi
23+ else
24+ echo " watchdog: concourse-worker healthcheck ok"
25+ # sleep 1
26+ RETRY=3
27+ fi
28+ sleep 15
29+ done
30+ }
31+
32+ watchdog
Original file line number Diff line number Diff line change 1+ # {{ ansible_managed }}
2+
3+ [Unit]
4+ Description=concourse-worker-watchdog
5+ Requires=network-online.target
6+ After=concourse-worker.service
7+
8+ [Service]
9+ ExecStart={{ concourse_install_dir }}/concourse-worker-watchdog
10+ ExecStop=/bin/kill $MAINPID
11+ ExecReload=/bin/kill -HUP $MAINPID
12+ Restart=on-failure
13+
14+ [Install]
15+ WantedBy=multi-user.target
Original file line number Diff line number Diff line change 44Description=concourse-worker
55Requires=network-online.target
66After=network-online.target
7+ Before=concourse-worker-watchdog.service
78
89[Service]
910ExecStart={{ concourse_worker_launcher_path }}
You can’t perform that action at this time.
0 commit comments