From 98b2cfc7453f101e65eadfc491cde0a6873c623e Mon Sep 17 00:00:00 2001 From: Feng_Qi Date: Mon, 4 Sep 2017 11:04:52 +0800 Subject: [PATCH] 4.0.6.2 --- ChangeLog.md | 10 ++++++++++ README.md | 31 +++++++++++++++++++++++++++---- cfg.example.json | 3 ++- funcs/custmetric.go | 10 +++++++++- funcs/swsystem.go | 5 +++-- g/cfg.go | 13 +++---------- g/const.go | 3 ++- g/var.go | 33 +++++++++++++++++++++++++++++---- http/admin.go | 35 ++++++++++++++++++++++++++++++++++- 9 files changed, 119 insertions(+), 24 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index dea234b..de0e3ad 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,4 +1,14 @@ # Changelog # +## 4.0.6.2 ## +#### 新功能 #### +1. 增加了动态重载配置的功能,详见 [README](https://github.com/gaochao1/swcollector/blob/master/README.md}) +#### bug修复 #### +1. 现在当 tag 为空时,debug 的时候应该不会重复的打印日志了 +#### 改进 #### +1. 现在自定义 Oid 采集时,可以支持 string 类型的返回了。系统会强制转换成 float64 上报,如果转换出错则抛出错误 +2. 现在对交换机类型的判断时,也会采取重试(由配置中的 retry 决定重试次数)来规避偶发性的异常了。 + + ## 4.0.6.1 ## #### bug修复 #### 1. 现在当采集异常 Channel 关闭时,应该会正常的抛弃而不会给 transfer 上报一个空的 endpoint 了 diff --git a/README.md b/README.md index b67c89d..f11ce0d 100644 --- a/README.md +++ b/README.md @@ -125,11 +125,11 @@ swcollector需要部署到有交换机SNMP访问权限的服务器上。 "limitCon": 4 #对于单台交换机上,多个指标采集的并发限制 }, "switchhosts":{ - "enabled":true, + "enabled":false, "hosts":"./hosts.json" #自定义的host与Ip地址对应表,如果配置,则上报时会用这里的host替换ip地址 }, "customMetrics":{ - "enabled":true, + "enabled":false, "template":"./custom.json" #自定义的metric列表,如果配置,会根据这个配置文件,采集额外的自定义metric }, "transfer": { @@ -139,8 +139,9 @@ swcollector需要部署到有交换机SNMP访问权限的服务器上。 "timeout": 1000 }, "http": { - "enabled": true, - "listen": ":1989" + "enabled": false, + "listen": ":1989", + "trustIps":["192.168.0.1","192.168.0.2"] } } @@ -199,6 +200,28 @@ swcollector需要部署到有交换机SNMP访问权限的服务器上。 } ``` +#### 配置的热重载 +4.0.6.2 版本起,支持配置的热重载。修改配置后无需重启 swcollector 了。 +开启配置热重载需要开启 swcollector 的 http 模块。然后使用以下接口重载配置。 +``` +# curl http://127.0.0.1:1990/config/reload +``` +注意对于 transfer 的 interval 修改,热重载无效,还是需要重启 swcollector + +同时也可以使用下列接口来查看 swcollector 的相关信息(类似于 Open-Falcon 的官方的 agent) +``` +# curl http://127.0.0.1:1990/ips +查看当前 trustIp 的列表 +# curl http://127.0.0.1:1990/workdir +查看当前的工作目录 +# curl http://127.0.0.1:1990/exit +远程退出进程 +# curl http://127.0.0.1:1990/health +查看当前状态 +# curl http://127.0.0.1:1990/version +查看当前版本 +``` + #### 部署说明 由于是并发采集,因此每个周期的采集耗时,主要取决于被采集的交换机中,最慢的那个。 因此我们可以在 debug 模式下观察每个交换机的采集耗时。 diff --git a/cfg.example.json b/cfg.example.json index 5b709ba..ad2a9e1 100644 --- a/cfg.example.json +++ b/cfg.example.json @@ -53,6 +53,7 @@ }, "http": { "enabled": false, - "listen": ":1989" + "listen": ":1989", + "trustIps":[["192.168.0.1","192.168.0.2"]] } } diff --git a/funcs/custmetric.go b/funcs/custmetric.go index b825908..0b28845 100644 --- a/funcs/custmetric.go +++ b/funcs/custmetric.go @@ -3,6 +3,7 @@ package funcs import ( "errors" "log" + "strconv" "time" @@ -154,8 +155,15 @@ func interfaceTofloat64(v interface{}) (float64, error) { return float64(value), nil case float64: return value, nil + case string: + value_parsed, err := strconv.ParseFloat(value, 64) + if err != nil { + return 0, err + } else { + return value_parsed, nil + } default: - err = errors.New("value is not digital") + err = errors.New("value cannot not Parse to digital") return 0, err } } diff --git a/funcs/swsystem.go b/funcs/swsystem.go index 5897340..084d0ac 100644 --- a/funcs/swsystem.go +++ b/funcs/swsystem.go @@ -1,9 +1,10 @@ package funcs import ( + "log" + "github.com/gaochao1/sw" "github.com/gaochao1/swcollector/g" - "log" ) type SwSystem struct { @@ -71,7 +72,7 @@ func swSystemInfo(ip string, ch chan SwSystem) { swSystem.Mem = memUtili } - swModel, err := sw.SysModel(ip, g.Config().Switch.Community, timeout) + swModel, err := sw.SysModel(ip, g.Config().Switch.Community, timeout, 1) if err != nil { log.Println(err) } else { diff --git a/g/cfg.go b/g/cfg.go index 3980bd4..671e247 100644 --- a/g/cfg.go +++ b/g/cfg.go @@ -48,13 +48,6 @@ type SwitchConfig struct { FastPingMode bool `json:"fastPingMode"` } -type HeartbeatConfig struct { - Enabled bool `json:"enabled"` - Addr string `json:"addr"` - Interval int `json:"interval"` - Timeout int `json:"timeout"` -} - type TransferConfig struct { Enabled bool `json:"enabled"` Addr string `json:"addr"` @@ -63,8 +56,9 @@ type TransferConfig struct { } type HttpConfig struct { - Enabled bool `json:"enabled"` - Listen string `json:"listen"` + Enabled bool `json:"enabled"` + Listen string `json:"listen"` + TrustIps []string `json:trustIps` } type SwitchHostsConfig struct { @@ -81,7 +75,6 @@ type GlobalConfig struct { Debug bool `json:"debug"` Debugmetric *DebugmetricConfig `json:"debugmetric` Switch *SwitchConfig `json:"switch"` - Heartbeat *HeartbeatConfig `json:"heartbeat"` Transfer *TransferConfig `json:"transfer"` SwitchHosts *SwitchHostsConfig `json:switchhosts` CustomMetrics *CustomMetricsConfig `json:customMetrics` diff --git a/g/const.go b/g/const.go index 018cce6..3836932 100644 --- a/g/const.go +++ b/g/const.go @@ -19,7 +19,8 @@ import ( // 4.0.4 add lock on map;add limconn for switch snmp request // 4.0.5 add custom metric,custom host // 4.0.6.1 fix channal closed bug +// 4.0.6.2 fix Vendor bug;add remote config api const ( - VERSION = "4.0.6.1" + VERSION = "4.0.6.2" COLLECT_INTERVAL = time.Second ) diff --git a/g/var.go b/g/var.go index f6e691f..f976496 100644 --- a/g/var.go +++ b/g/var.go @@ -4,6 +4,9 @@ import ( "log" "os" "strings" + "sync" + + "github.com/toolkits/slice" "time" @@ -71,10 +74,6 @@ func SendToTransfer(metrics []*model.MetricValue) { if array_include(debug_Tags, metric_tags) { log.Printf("=> %v\n", len(metrics), metric) } - if debug_tags == "" { - log.Printf("=> %v\n", len(metrics), metric) - } - } } } @@ -113,3 +112,29 @@ func in_array(a string, array []string) bool { } return false } + +var ( + ips []string + ipsLock = new(sync.Mutex) +) + +func TrustableIps() []string { + ipsLock.Lock() + defer ipsLock.Unlock() + ips := Config().Http.TrustIps + return ips +} + +func IsTrustable(remoteAddr string) bool { + ip := remoteAddr + idx := strings.LastIndex(remoteAddr, ":") + if idx > 0 { + ip = remoteAddr[0:idx] + } + + if ip == "127.0.0.1" { + return true + } + + return slice.ContainsString(TrustableIps(), ip) +} diff --git a/http/admin.go b/http/admin.go index a9c5e90..157ce75 100644 --- a/http/admin.go +++ b/http/admin.go @@ -2,6 +2,10 @@ package http import ( "net/http" + "os" + "time" + + "github.com/gaochao1/swcollector/g" "github.com/toolkits/file" ) @@ -11,5 +15,34 @@ func configAdminRoutes() { http.HandleFunc("/workdir", func(w http.ResponseWriter, r *http.Request) { RenderDataJson(w, file.SelfDir()) }) - + http.HandleFunc("/ips", func(w http.ResponseWriter, r *http.Request) { + RenderDataJson(w, g.TrustableIps()) + }) + http.HandleFunc("/exit", func(w http.ResponseWriter, r *http.Request) { + if g.IsTrustable(r.RemoteAddr) { + w.Write([]byte("exiting...")) + go func() { + time.Sleep(time.Second) + os.Exit(0) + }() + } else { + w.Write([]byte("no privilege")) + } + }) + http.HandleFunc("/config/reload", func(w http.ResponseWriter, r *http.Request) { + if g.IsTrustable(r.RemoteAddr) { + g.ParseConfig(g.ConfigFile) + if g.Config().SwitchHosts.Enabled { + hostcfg := g.Config().SwitchHosts.Hosts + g.ParseHostConfig(hostcfg) + } + if g.Config().CustomMetrics.Enabled { + custMetrics := g.Config().CustomMetrics.Template + g.ParseCustConfig(custMetrics) + } + RenderDataJson(w, g.Config()) + } else { + w.Write([]byte("no privilege")) + } + }) }