Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 38 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,38 @@ Each of these status conditions can have any number of rules associated with the

Rules for conditions are evaluated using [Rhai](https://rhai.rs), and are evaluated in order. The first matching rule will set the given status for the case in question. Valid variables for the conditions are:

- `stratum0_servers`: The number of stratum0 servers successfully scraped
- `stratum1_servers`: The number of stratum1 servers successfully scraped
- `sync_servers`: The number of sync servers successfully scraped
- `repos_out_of_sync`: The number of repositories out of sync across all servers scraped
### Repository related

- `repos_out_of_sync`: The number of unique repositories out of sync across all servers scraped
- `repos_total`: The total number of unique repositories scraped across all servers

### Server counts, legacy variables

- `stratum0_servers`: The number of stratum0 servers successfully scraped with a state of OK
- `stratum1_servers`: The number of stratum1 servers successfully scraped with a state of OK
- `sync_servers`: The number of sync servers successfully scraped with a state of OK

### Server counts, detailed variables

- `stratum0_ok`: The number of stratum0 servers with status OK (legacy: `stratum0_servers`)
- `stratum0_degraded`: The number of stratum0 servers with status DEGRADED
- `stratum0_warning`: The number of stratum0 servers with status WARNING
- `stratum0_failed`: The number of stratum0 servers with status FAILED
- `stratum0_total`: The total number of stratum0 servers scraped (should equal stratum0_ok + stratum0_degraded + stratum0_warning + stratum0_failed)

- `stratum1_ok`: The number of stratum1 servers with status OK (legacy: `stratum1_servers`)
- `stratum1_degraded`: The number of stratum1 servers with status DEGRADED
- `stratum1_warning`: The number of stratum1 servers with status WARNING
- `stratum1_failed`: The number of stratum1 servers with status FAILED
- `stratum1_total`: The total number of stratum1 servers scraped (should equal stratum1_ok + stratum1_degraded + stratum1_warning + stratum1_failed)

- `syncserver_ok`: The number of sync servers with status OK (legacy: `sync_servers`)
- `syncserver_degraded`: The number of sync servers with status DEGRADED
- `syncserver_warning`: The number of sync servers with status WARNING
- `syncserver_failed`: The number of sync servers with status FAILED
- `syncserver_total`: The total number of sync servers scraped (should equal syncserver_ok + syncserver_degraded + syncserver_warning + syncserver_failed)

Note: It's `syncserver` (no underscore and singular, like stratum0/1).

### Example of rules

Expand All @@ -100,27 +128,27 @@ Imagine these conditions for the overall status, `eessi_status`:
"conditions": [
{
"status": "FAILED",
"when": "stratum1_servers == 0"
"when": "stratum1_ok == 0"
},
{
"status": "WARNING",
"when": "stratum0_servers == 0 && stratum1_servers > 1"
"when": "stratum0_ok == 0 && stratum1_ok > 1"
},
{
"status": "WARNING",
"when": "sync_servers == 0 && stratum1_servers > 1"
"when": "syncserver_ok == 0 && stratum1_ok > 1"
},
{
"status": "DEGRADED",
"when": "stratum0_servers == 1 && stratum1_servers == 1"
"when": "stratum0_ok == 1 && ( stratum1_warning + stratum1_degraded + stratum1_failed ) > 0"
},
{
"status": "DEGRADED",
"when": "repos_out_of_sync > 1"
},
{
"status": "OK",
"when": "stratum0_servers > 0 && stratum1_servers > 1 && sync_servers > 0"
"when": "stratum0_ok > 0 && stratum1_ok > 1 && syncserver_ok > 0"
}
]
}
Expand All @@ -131,7 +159,7 @@ In this example, as the rules are applied in order, the engine will check, in or
1. If there are no stratum1 servers online, the status is set to `FAILED`.
2. If there are no stratum0 servers online and more than one stratum1 server, the status is set to `WARNING`.
3. If there are no sync servers online and more than one stratum1 server, the status is set to `WARNING`.
4. If the stratum0 server is online and only one stratum1 server was found, the status is set to `DEGRADED`.
4. If the stratum0 server is online and any stratum1 server has the status degraded, warning, or failed, the status is set to `DEGRADED`.
5. If more than one repository is out of sync, the status is set to `DEGRADED`.
6. If there is at least one stratum0 server, more than one stratum1 server, and at least one sync server, the status is set to `OK`.

Expand Down
204 changes: 201 additions & 3 deletions src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,37 @@ impl StatusManager {
let mut scope = Scope::new();
let engine = Engine::new();

for server_type in vec![
ServerType::Stratum0,
ServerType::Stratum1,
ServerType::SyncServer,
] {
let mut total = 0;

for status in Status::iter() {
let count = self
.get_by_status(status)
.iter()
.filter(|s| s.server_type == server_type)
.count() as i64;
let key = format!(
"{}_{}",
server_type.to_label(),
status.as_ref().to_lowercase()
);
total += count;
println!("Adding to scope: {} = {}", key, count);
scope.push(&key, count);
}

scope.push(&format!("{}_total", server_type.to_label()), total);
println!(
"Adding to scope: {} = {}",
format!("{}_total", server_type.to_label()),
total
);
}

scope.push(
"stratum0_servers",
self.get_by_type_ok(ServerType::Stratum0).len() as i64,
Expand All @@ -493,6 +524,11 @@ impl StatusManager {
self.get_by_type_ok(ServerType::SyncServer).len() as i64,
);

scope.push(
"repos_total",
self.get_status_per_unique_repo().len() as i64,
);

let not_ok_repos = self
.get_status_per_unique_repo()
.iter()
Expand All @@ -503,6 +539,7 @@ impl StatusManager {

for condition in conditions {
debug!("Evaluating condition: {:?}", condition);

if evaluate_condition(&condition, &mut scope, &engine) {
return condition.status;
}
Expand Down Expand Up @@ -555,9 +592,13 @@ fn compare_with_stratum0(
}

fn evaluate_condition(condition: &Condition, scope: &mut Scope, engine: &Engine) -> bool {
engine
.eval_expression_with_scope::<bool>(scope, &condition.when)
.unwrap_or(false)
match engine.eval_expression_with_scope::<bool>(scope, &condition.when) {
Ok(result) => result,
Err(e) => {
debug!("Failed to evaluate condition '{}': {}", condition.when, e);
false
}
}
}

fn evaluate_conditions_with_key_value(
Expand All @@ -581,3 +622,160 @@ fn evaluate_conditions_with_key_value(
.find(|&condition| evaluate_condition(condition, &mut scope, &engine))
.map_or(Status::FAILED, |condition| condition.status)
}

#[cfg(test)]
mod tests {
use std::str::FromStr;

use super::*;
use yare::parameterized;

use cvmfs_server_scraper::Hostname;

fn create_status_manager() -> StatusManager {
let servers = vec![
Server {
server_type: ServerType::Stratum0,
backend_type: ServerBackendType::CVMFS,
backend_detected: Some(ServerBackendType::CVMFS),
hostname: Hostname::from_str("stratum0.example.com").unwrap(),
repositories: vec![],
status: Status::OK,
metadata: None,
},
Server {
server_type: ServerType::Stratum1,
backend_type: ServerBackendType::AutoDetect,
backend_detected: Some(ServerBackendType::CVMFS),
hostname: Hostname::from_str("stratum1-auto-cvmfs-degraded.example.com").unwrap(),
repositories: vec![],
status: Status::DEGRADED,
metadata: None,
},
Server {
server_type: ServerType::Stratum1,
backend_type: ServerBackendType::CVMFS,
backend_detected: Some(ServerBackendType::CVMFS),
hostname: Hostname::from_str("stratum1-cvmfs-cvmfs-ok.example.com").unwrap(),
repositories: vec![],
status: Status::OK,
metadata: None,
},
Server {
server_type: ServerType::SyncServer,
backend_type: ServerBackendType::CVMFS,
backend_detected: Some(ServerBackendType::CVMFS),
hostname: Hostname::from_str("syncserver.example.com").unwrap(),
repositories: vec![],
status: Status::OK,
metadata: None,
},
];

StatusManager { servers }
}

fn create_conditions_overall_legacy() -> Vec<Condition> {
vec![
Condition {
when: "stratum0_servers >= 1 && stratum1_servers >= 2".to_string(),
status: Status::OK,
},
Condition {
when: "stratum0_servers >= 1 && stratum1_servers >= 1".to_string(),
status: Status::DEGRADED,
},
Condition {
when: "stratum0_servers >= 1".to_string(),
status: Status::WARNING,
},
]
}

fn create_conditions_overall_new() -> Vec<Condition> {
vec![
Condition {
when: "stratum0_ok >= 1 && stratum1_ok >= 2".to_string(),
status: Status::OK,
},
Condition {
when: "stratum0_ok >= 1 && stratum1_ok >= 1".to_string(),
status: Status::DEGRADED,
},
Condition {
when: "stratum0_ok >= 1".to_string(),
status: Status::WARNING,
},
]
}

#[test]
fn test_status_ordering() {
assert!(Status::OK < Status::DEGRADED);
assert!(Status::DEGRADED < Status::WARNING);
assert!(Status::WARNING < Status::FAILED);
assert!(Status::FAILED < Status::MAINTENANCE);
}

#[test]
fn test_conditions_overall_legacy() {
let status_manager = create_status_manager();
let conditions = create_conditions_overall_legacy();
let overall_status = status_manager.evaluate_overall_conditions(conditions);
assert_eq!(overall_status, Status::DEGRADED);
}

#[test]
fn test_conditions_overall_new() {
let status_manager = create_status_manager();
let conditions = create_conditions_overall_new();
let overall_status = status_manager.evaluate_overall_conditions(conditions);
assert_eq!(overall_status, Status::DEGRADED);
}

#[test]
fn test_conditions_invalid_key_is_ignored() {
let status_manager = create_status_manager();
let conditions = vec![
Condition {
when: "invalid_key >= 1".to_string(),
status: Status::OK,
},
Condition {
when: "stratum0_ok <= 1".to_string(),
status: Status::DEGRADED,
},
];
let overall_status = status_manager.evaluate_overall_conditions(conditions);
assert_eq!(overall_status, Status::DEGRADED);
}

#[parameterized(
stratum0 = { "stratum0", 1 },
stratum1 = { "stratum1", 2 },
syncserver = { "syncserver", 1 }

)]
fn test_conditions_totals_equals_all_others(server_type: &str, count: usize) {
let status_manager = create_status_manager();

let when = format!(
"{server_type}_ok + {server_type}_degraded + {server_type}_warning + {server_type}_failed == {server_type}_total",
);

let conditions = vec![Condition {
when,
status: Status::OK,
}];
let overall_status = status_manager.evaluate_overall_conditions(conditions);
assert_eq!(overall_status, Status::OK);

let when = format!("{count} == {server_type}_total",);
let conditions = vec![Condition {
when,
status: Status::OK,
}];
let overall_status = status_manager.evaluate_overall_conditions(conditions);
assert_eq!(overall_status, Status::OK);
}
}