Skip to content

Commit 04b7489

Browse files
committed
fix(health): force alert notifications on health command runs
1 parent d725230 commit 04b7489

6 files changed

Lines changed: 110 additions & 12 deletions

File tree

src/__tests__/alerts.test.ts

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import { describe, it, expect, vi, beforeEach } from 'vitest';
2+
import { triggerAlert } from '../monitor/alerts';
3+
import { logger } from '../utils/logger';
4+
import { getConfig } from '../utils/config';
5+
6+
// Mock config
7+
vi.mock('../utils/config', () => ({
8+
getConfig: vi.fn(() => ({
9+
alert_cooldown: 300,
10+
discord_webhook: 'https://discord.com/api/webhooks/dummy',
11+
})),
12+
getSMTPSettings: vi.fn(() => ({
13+
host: '',
14+
port: 587,
15+
auth: { user: '', pass: '' },
16+
to: '',
17+
})),
18+
}));
19+
20+
// Mock logger
21+
vi.mock('../utils/logger', () => ({
22+
logger: {
23+
info: vi.fn(),
24+
error: vi.fn(),
25+
},
26+
}));
27+
28+
// Mock fetch globally
29+
const mockFetch = vi.fn().mockResolvedValue({ ok: true });
30+
global.fetch = mockFetch;
31+
32+
describe('alerts monitoring', () => {
33+
beforeEach(() => {
34+
vi.clearAllMocks();
35+
});
36+
37+
it('should trigger alert on first invocation', async () => {
38+
const alertId = 'container:test-first:failure';
39+
await triggerAlert({
40+
id: alertId,
41+
message: 'Container crashed',
42+
type: 'container',
43+
severity: 'critical',
44+
});
45+
46+
expect(logger.info).toHaveBeenCalledWith(expect.stringContaining(`Triggering alert for ${alertId}`));
47+
expect(mockFetch).toHaveBeenCalledTimes(1);
48+
});
49+
50+
it('should respect cooldown and suppress alert on subsequent invocations within cooldown window', async () => {
51+
const alertId = 'container:test-cooldown:failure';
52+
53+
// First trigger - should send
54+
await triggerAlert({
55+
id: alertId,
56+
message: 'Container crashed first time',
57+
type: 'container',
58+
severity: 'critical',
59+
});
60+
expect(mockFetch).toHaveBeenCalledTimes(1);
61+
62+
// Second trigger within cooldown - should be suppressed
63+
await triggerAlert({
64+
id: alertId,
65+
message: 'Container crashed second time',
66+
type: 'container',
67+
severity: 'critical',
68+
});
69+
// Call count should still be 1
70+
expect(mockFetch).toHaveBeenCalledTimes(1);
71+
});
72+
73+
it('should bypass cooldown and trigger alert on subsequent invocations if force option is true', async () => {
74+
const alertId = 'container:test-force:failure';
75+
76+
// First trigger - should send
77+
await triggerAlert({
78+
id: alertId,
79+
message: 'Container crashed first time',
80+
type: 'container',
81+
severity: 'critical',
82+
});
83+
expect(mockFetch).toHaveBeenCalledTimes(1);
84+
85+
// Second trigger with force: true - should send regardless of cooldown
86+
await triggerAlert({
87+
id: alertId,
88+
message: 'Container crashed second time',
89+
type: 'container',
90+
severity: 'critical',
91+
}, { force: true });
92+
93+
// Call count should be 2 now
94+
expect(mockFetch).toHaveBeenCalledTimes(2);
95+
});
96+
});

src/__tests__/health.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ describe('health command', () => {
8585

8686
await program.parseAsync(['node', 'test', 'health', 'all']);
8787

88+
expect(getRunningContainers).toHaveBeenCalledWith({ forceAlert: true });
89+
expect(getRunningPods).toHaveBeenCalledWith({ forceAlert: true });
8890
expect(logger.info).toHaveBeenCalledWith('Showing health for all...');
8991
expect(tableUtils.renderTable).toHaveBeenCalledWith(
9092
expect.objectContaining({

src/commands/health.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ const healthColor = (status: string): string => {
4848
return chalk.yellow(status);
4949
};
5050

51-
const fetchHealthRows = async (target: string): Promise<(string | number)[][]> => {
51+
const fetchHealthRows = async (target: string, options?: { forceAlert?: boolean }): Promise<(string | number)[][]> => {
5252
const rows: (string | number)[][] = [];
5353
const shouldFetchContainers = target === 'all' || target === 'containers';
5454
const shouldFetchPods = target === 'all' || target === 'pods';
5555

5656
const [containerResult, podResult] = await Promise.allSettled([
57-
shouldFetchContainers ? getRunningContainers() : Promise.resolve([]),
58-
shouldFetchPods ? getRunningPods() : Promise.resolve([]),
57+
shouldFetchContainers ? getRunningContainers({ forceAlert: options?.forceAlert }) : Promise.resolve([]),
58+
shouldFetchPods ? getRunningPods({ forceAlert: options?.forceAlert }) : Promise.resolve([]),
5959
]);
6060

6161
if (shouldFetchContainers) {
@@ -150,7 +150,7 @@ export const showHealth = async (target: string, options: HealthOptions = {}): P
150150
return;
151151
}
152152

153-
const rows = await fetchHealthRows(target);
153+
const rows = await fetchHealthRows(target, { forceAlert: true });
154154

155155
// Clear terminal screen
156156
process.stdout.write('\x1Bc');
@@ -180,7 +180,7 @@ export const showHealth = async (target: string, options: HealthOptions = {}): P
180180
await poll();
181181
} else {
182182
const spinner = createSpinner(`Checking ${target} health...`).start();
183-
const rows = await fetchHealthRows(target);
183+
const rows = await fetchHealthRows(target, { forceAlert: true });
184184
spinner.stop();
185185

186186
if (rows.length === 0) {

src/docker/containers.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export interface ContainerData {
1010
status: string;
1111
}
1212

13-
export const getRunningContainers = async (): Promise<ContainerData[]> => {
13+
export const getRunningContainers = async (options?: { forceAlert?: boolean }): Promise<ContainerData[]> => {
1414
const docker = getDockerClient();
1515
try {
1616
// Try to list containers, use a timeout if possible or just catch common connection errors
@@ -27,7 +27,7 @@ export const getRunningContainers = async (): Promise<ContainerData[]> => {
2727
type: 'container',
2828
severity: 'warning',
2929
message: `Docker container ${name} (${id}) is restarting.`,
30-
});
30+
}, { force: options?.forceAlert });
3131
} else if (c.State === 'exited') {
3232
const match = c.Status.match(/Exited \((\d+)\)/);
3333
const exitCode = match ? parseInt(match[1], 10) : 0;
@@ -38,7 +38,7 @@ export const getRunningContainers = async (): Promise<ContainerData[]> => {
3838
type: 'container',
3939
severity: 'critical',
4040
message: `Docker container ${name} (${id}) exited with code ${exitCode}.`,
41-
});
41+
}, { force: options?.forceAlert });
4242
}
4343
}
4444

src/kubernetes/pods.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ export interface PodData {
1111
node: string;
1212
}
1313

14-
export const getRunningPods = async (): Promise<PodData[]> => {
14+
export const getRunningPods = async (options?: { forceAlert?: boolean }): Promise<PodData[]> => {
1515
const api = getK8sApi();
1616
try {
1717
const res = await api.listPodForAllNamespaces();
@@ -43,7 +43,7 @@ export const getRunningPods = async (): Promise<PodData[]> => {
4343
type: 'pod',
4444
severity: 'critical',
4545
message: `Pod ${name} in namespace ${pod.metadata?.namespace} failed: ${failureReason}`,
46-
});
46+
}, { force: options?.forceAlert });
4747
}
4848

4949
return {

src/monitor/alerts.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,13 @@ const sendEmailNotification = async (alert: Alert) => {
7070
}
7171
};
7272

73-
export const triggerAlert = async (alert: Alert) => {
73+
export const triggerAlert = async (alert: Alert, options?: { force?: boolean }) => {
7474
const now = Date.now();
7575
const { alert_cooldown = 300 } = getConfig(); // Default 5 minutes
7676
const cooldownMs = alert_cooldown * 1000;
7777

7878
const lastAlert = cooldownTracker.get(alert.id);
79-
if (lastAlert && (now - lastAlert) < cooldownMs) {
79+
if (!options?.force && lastAlert && (now - lastAlert) < cooldownMs) {
8080
return; // Still in cooldown
8181
}
8282

0 commit comments

Comments
 (0)