|
| 1 | +/* |
| 2 | +* Monitors interface link status and also detects interface errors and |
| 3 | +* notifies when anomalies are found. |
| 4 | +* |
| 5 | +* Two input control detection |
| 6 | +* |
| 7 | +* 1) input-interface, is a regular expression that matches the |
| 8 | +* interfaces that you would like to monitor. By default it '.*', |
| 9 | +* which matches all interfaces. Use something like 'ge.*' to |
| 10 | +* match only gigabit ethernet interfaces. |
| 11 | +* |
| 12 | +* 2) error-threshold, is the threshold that causes the rule to report |
| 13 | +* an anomaly. By default it's 1. This rule will set a dashboard |
| 14 | +* color to red when *all* the error increases are greater than |
| 15 | +* 'errors-threshold' for 9m. If it sees any errors increase for a |
| 16 | +* period of less than 9m, it'll turn the color to yellow, |
| 17 | +* otherwise color is set to green. |
| 18 | +*/ |
| 19 | +healthbot { |
| 20 | + topic interface.statistics { |
| 21 | + rule check-nexus-interface-stats { |
| 22 | + keys interface-name; |
| 23 | + synopsis "Interface statistics analyzer"; |
| 24 | + description "Monitors and notify interface statistics i.e. link state, input errors and output errors"; |
| 25 | + sensor interface { |
| 26 | + iAgent { |
| 27 | + file CiscoNexusInterfaceTable.yml; |
| 28 | + table CiscoNexusInterfaceTable; |
| 29 | + frequency 3m; |
| 30 | + } |
| 31 | + } |
| 32 | + field admin-status { |
| 33 | + sensor interface { |
| 34 | + path admin-status; |
| 35 | + } |
| 36 | + type string; |
| 37 | + description "Interface admin status"; |
| 38 | + } |
| 39 | + field error-threshold { |
| 40 | + constant { |
| 41 | + value "{{error-threshold}}"; |
| 42 | + } |
| 43 | + type integer; |
| 44 | + description "Interface error threshold"; |
| 45 | + } |
| 46 | + field in-errors { |
| 47 | + sensor interface { |
| 48 | + path in-errors; |
| 49 | + data-if-missing { |
| 50 | + value 0; |
| 51 | + } |
| 52 | + } |
| 53 | + type integer; |
| 54 | + description "Interface input errors"; |
| 55 | + } |
| 56 | + field in-packets { |
| 57 | + sensor interface { |
| 58 | + path in-packets; |
| 59 | + data-if-missing { |
| 60 | + value 0; |
| 61 | + } |
| 62 | + } |
| 63 | + type integer; |
| 64 | + description "Interface input packets"; |
| 65 | + } |
| 66 | + field interface-name { |
| 67 | + sensor interface { |
| 68 | + where "admin-status =~ /up/"; |
| 69 | + where "interface-name =~ /{{input-interface}}/"; |
| 70 | + path interface-name; |
| 71 | + } |
| 72 | + type string; |
| 73 | + description "Interface name"; |
| 74 | + } |
| 75 | + field link-status { |
| 76 | + sensor interface { |
| 77 | + path link-status; |
| 78 | + } |
| 79 | + type string; |
| 80 | + description "Interface link status"; |
| 81 | + } |
| 82 | + field out-errors { |
| 83 | + sensor interface { |
| 84 | + path out-errors; |
| 85 | + data-if-missing { |
| 86 | + value 0; |
| 87 | + } |
| 88 | + } |
| 89 | + type integer; |
| 90 | + description "Interface output errors"; |
| 91 | + } |
| 92 | + field out-packets { |
| 93 | + sensor interface { |
| 94 | + path out-packets; |
| 95 | + data-if-missing { |
| 96 | + value 0; |
| 97 | + } |
| 98 | + } |
| 99 | + type integer; |
| 100 | + description "Interface output packets"; |
| 101 | + } |
| 102 | + trigger in-errors { |
| 103 | + frequency 1o; |
| 104 | + term is-error-count-increasing { |
| 105 | + when { |
| 106 | + increasing-at-least-by-value "$in-errors" { |
| 107 | + value "$error-threshold"; |
| 108 | + time-range 3o; |
| 109 | + } |
| 110 | + } |
| 111 | + then { |
| 112 | + status { |
| 113 | + color red; |
| 114 | + message "In-error count $in-errors continuously increasing on $interface-name"; |
| 115 | + } |
| 116 | + } |
| 117 | + } |
| 118 | + term is-error-count-intermittent { |
| 119 | + when { |
| 120 | + increasing-at-least-by-value "$in-errors" { |
| 121 | + value "$error-threshold"; |
| 122 | + time-range 3m; |
| 123 | + any; |
| 124 | + } |
| 125 | + } |
| 126 | + then { |
| 127 | + status { |
| 128 | + color yellow; |
| 129 | + message "In-error count $in-errors is intermittent on $interface-name"; |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + term no-errors { |
| 134 | + then { |
| 135 | + status { |
| 136 | + color green; |
| 137 | + message "In-error count $in-errors is normal on $interface-name"; |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | + } |
| 142 | + trigger interface-status { |
| 143 | + frequency 1o; |
| 144 | + term is-interface-up { |
| 145 | + when { |
| 146 | + matches-with "$link-status" up; |
| 147 | + } |
| 148 | + then { |
| 149 | + status { |
| 150 | + color green; |
| 151 | + message "$interface-name link status is $link-status"; |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + term interface-down { |
| 156 | + then { |
| 157 | + status { |
| 158 | + color red; |
| 159 | + message "$interface-name link status is $link-status"; |
| 160 | + } |
| 161 | + } |
| 162 | + } |
| 163 | + } |
| 164 | + trigger out-errors { |
| 165 | + frequency 1o; |
| 166 | + term is-error-count-increasing { |
| 167 | + when { |
| 168 | + increasing-at-least-by-value "$out-errors" { |
| 169 | + value "$error-threshold"; |
| 170 | + time-range 3o; |
| 171 | + } |
| 172 | + } |
| 173 | + then { |
| 174 | + status { |
| 175 | + color red; |
| 176 | + message "Out-error count $out-errors continuously increasing on $interface-name"; |
| 177 | + } |
| 178 | + } |
| 179 | + } |
| 180 | + term is-error-count-intermittent { |
| 181 | + when { |
| 182 | + increasing-at-least-by-value "$out-errors" { |
| 183 | + value "$error-threshold"; |
| 184 | + } |
| 185 | + } |
| 186 | + then { |
| 187 | + status { |
| 188 | + color yellow; |
| 189 | + message "Out-error count $out-errors is intermittent on $interface-name"; |
| 190 | + } |
| 191 | + } |
| 192 | + } |
| 193 | + term no-errors { |
| 194 | + then { |
| 195 | + status { |
| 196 | + color green; |
| 197 | + message "Out-error count $out-errors is normal on $interface-name"; |
| 198 | + } |
| 199 | + } |
| 200 | + } |
| 201 | + } |
| 202 | + variable error-threshold { |
| 203 | + value 1; |
| 204 | + description "Static error threshold value for input and output errors"; |
| 205 | + type int; |
| 206 | + } |
| 207 | + variable input-interface { |
| 208 | + value .*; |
| 209 | + description "Enter interface name in regex i.e. ge-.*"; |
| 210 | + type string; |
| 211 | + } |
| 212 | + rule-properties { |
| 213 | + supported-devices { |
| 214 | + other-vendor cisco { |
| 215 | + vendor-name cisco; |
| 216 | + operating-system nexus; |
| 217 | + } |
| 218 | + } |
| 219 | + } |
| 220 | + } |
| 221 | + } |
| 222 | +} |
0 commit comments