diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c9040d9fa9..df38bb5bba 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,7 +37,7 @@ jobs: - name: Setup Go id: go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable cache: false diff --git a/.github/workflows/check-licenses.yml b/.github/workflows/check-licenses.yml index 583b55a0a1..c93a15c052 100644 --- a/.github/workflows/check-licenses.yml +++ b/.github/workflows/check-licenses.yml @@ -19,7 +19,7 @@ jobs: - name: Setup Go id: go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable cache: false diff --git a/.github/workflows/codeql-analysis-go.yml b/.github/workflows/codeql-analysis-go.yml index 5b42aa1f61..c647ca9e40 100644 --- a/.github/workflows/codeql-analysis-go.yml +++ b/.github/workflows/codeql-analysis-go.yml @@ -31,7 +31,7 @@ jobs: - name: Set up Go id: go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: cache: false go-version: stable diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 868d9116e4..c2beb1e4c2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -33,7 +33,7 @@ jobs: - name: Setup Go id: go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: cache: false go-version: stable @@ -71,7 +71,7 @@ jobs: - name: Setup Go id: go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: cache: false go-version: stable @@ -105,7 +105,7 @@ jobs: - name: Setup Go id: go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: cache: false go-version: stable diff --git a/.github/workflows/vmui.yml b/.github/workflows/vmui.yml index 683d3d0903..04c023bef6 100644 --- a/.github/workflows/vmui.yml +++ b/.github/workflows/vmui.yml @@ -33,7 +33,7 @@ jobs: uses: actions/checkout@v5 - name: Setup Node - uses: actions/setup-node@v4 + uses: actions/setup-node@v5 with: node-version: '24.x' diff --git a/app/vlinsert/elasticsearch/elasticsearch.go b/app/vlinsert/elasticsearch/elasticsearch.go index bcf1390711..56b6d3993c 100644 --- a/app/vlinsert/elasticsearch/elasticsearch.go +++ b/app/vlinsert/elasticsearch/elasticsearch.go @@ -187,7 +187,10 @@ func readBulkLine(lr *insertutil.LineReader, timeFields, msgFields []string, lmp // Continue parsing next lines. return true, nil } + p := logstorage.GetJSONParser() + defer logstorage.PutJSONParser(p) + if err := p.ParseLogMessage(line); err != nil { return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err) } @@ -201,7 +204,6 @@ func readBulkLine(lr *insertutil.LineReader, timeFields, msgFields []string, lmp } logstorage.RenameField(p.Fields, msgFields, "_msg") lmp.AddRow(ts, p.Fields, nil) - logstorage.PutJSONParser(p) return true, nil } diff --git a/app/vlinsert/opentelemetry/opentelemetry.go b/app/vlinsert/opentelemetry/opentelemetry.go index ebee79c51d..ad1db77f88 100644 --- a/app/vlinsert/opentelemetry/opentelemetry.go +++ b/app/vlinsert/opentelemetry/opentelemetry.go @@ -50,7 +50,7 @@ func handleProtobuf(r *http.Request, w http.ResponseWriter) { encoding := r.Header.Get("Content-Encoding") err = protoparserutil.ReadUncompressedData(r.Body, encoding, maxRequestSize, func(data []byte) error { - lmp := cp.NewLogMessageProcessor("opentelelemtry_protobuf", false) + lmp := cp.NewLogMessageProcessor("opentelemetry_protobuf", false) useDefaultStreamFields := len(cp.StreamFields) == 0 err := pushProtobufRequest(data, lmp, cp.MsgFields, useDefaultStreamFields) lmp.MustClose() diff --git a/app/vlinsert/syslog/syslog.go b/app/vlinsert/syslog/syslog.go index 46b2df8db3..3fefd4241f 100644 --- a/app/vlinsert/syslog/syslog.go +++ b/app/vlinsert/syslog/syslog.go @@ -35,65 +35,84 @@ var ( syslogTimezone = flag.String("syslog.timezone", "Local", "Timezone to use when parsing timestamps in RFC3164 syslog messages. Timezone must be a valid IANA Time Zone. "+ "For example: America/New_York, Europe/Berlin, Etc/GMT+3 . See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/") + listenAddrTCP = flagutil.NewArrayString("syslog.listenAddr.tcp", "Comma-separated list of TCP addresses to listen to for Syslog messages. "+ + "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/") + listenAddrUDP = flagutil.NewArrayString("syslog.listenAddr.udp", "Comma-separated list of UDP addresses to listen to for Syslog messages. "+ + "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/") + listenAddrUnix = flagutil.NewArrayString("syslog.listenAddr.unix", "Comma-separated list of Unix socket filepaths to listen to for Syslog messages. "+ + "Filepaths may be prepended with 'unixgram:' for listening for SOCK_DGRAM sockets. By default SOCK_STREAM sockets are used. "+ + "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/") + + tlsEnable = flagutil.NewArrayBool("syslog.tls", "Whether to enable TLS for receiving syslog messages at the corresponding -syslog.listenAddr.tcp. "+ + "The corresponding -syslog.tlsCertFile and -syslog.tlsKeyFile must be set if -syslog.tls is set. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") + tlsCertFile = flagutil.NewArrayString("syslog.tlsCertFile", "Path to file with TLS certificate for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. "+ + "Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. "+ + "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") + tlsKeyFile = flagutil.NewArrayString("syslog.tlsKeyFile", "Path to file with TLS key for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. "+ + "The provided key file is automatically re-read every second, so it can be dynamically updated. "+ + "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") + tlsCipherSuites = flagutil.NewArrayString("syslog.tlsCipherSuites", "Optional list of TLS cipher suites for -syslog.listenAddr.tcp if -syslog.tls is set. "+ + "See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants . "+ + "See also https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") + tlsMinVersion = flag.String("syslog.tlsMinVersion", "TLS13", "The minimum TLS version to use for -syslog.listenAddr.tcp if -syslog.tls is set. "+ + "Supported values: TLS10, TLS11, TLS12, TLS13. "+ + "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") + streamFieldsTCP = flagutil.NewArrayString("syslog.streamFields.tcp", "Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.tcp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields`) streamFieldsUDP = flagutil.NewArrayString("syslog.streamFields.udp", "Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.udp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields`) + streamFieldsUnix = flagutil.NewArrayString("syslog.streamFields.unix", "Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.unix. "+ + `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields`) ignoreFieldsTCP = flagutil.NewArrayString("syslog.ignoreFields.tcp", "Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.tcp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields`) ignoreFieldsUDP = flagutil.NewArrayString("syslog.ignoreFields.udp", "Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.udp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields`) + ignoreFieldsUnix = flagutil.NewArrayString("syslog.ignoreFields.unix", "Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.unix. "+ + `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields`) decolorizeFieldsTCP = flagutil.NewArrayString("syslog.decolorizeFields.tcp", "Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.tcp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields`) decolorizeFieldsUDP = flagutil.NewArrayString("syslog.decolorizeFields.udp", "Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.udp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields`) + decolorizeFieldsUnix = flagutil.NewArrayString("syslog.decolorizeFields.unix", "Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.unix. "+ + `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields`) extraFieldsTCP = flagutil.NewArrayString("syslog.extraFields.tcp", "Fields to add to logs ingested via the corresponding -syslog.listenAddr.tcp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields`) extraFieldsUDP = flagutil.NewArrayString("syslog.extraFields.udp", "Fields to add to logs ingested via the corresponding -syslog.listenAddr.udp. "+ `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields`) + extraFieldsUnix = flagutil.NewArrayString("syslog.extraFields.unix", "Fields to add to logs ingested via the corresponding -syslog.listenAddr.unix. "+ + `See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields`) tenantIDTCP = flagutil.NewArrayString("syslog.tenantID.tcp", "TenantID for logs ingested via the corresponding -syslog.listenAddr.tcp. "+ "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy") tenantIDUDP = flagutil.NewArrayString("syslog.tenantID.udp", "TenantID for logs ingested via the corresponding -syslog.listenAddr.udp. "+ "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy") - - listenAddrTCP = flagutil.NewArrayString("syslog.listenAddr.tcp", "Comma-separated list of TCP addresses to listen to for Syslog messages. "+ - "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/") - listenAddrUDP = flagutil.NewArrayString("syslog.listenAddr.udp", "Comma-separated list of UDP address to listen to for Syslog messages. "+ - "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/") - - tlsEnable = flagutil.NewArrayBool("syslog.tls", "Whether to enable TLS for receiving syslog messages at the corresponding -syslog.listenAddr.tcp. "+ - "The corresponding -syslog.tlsCertFile and -syslog.tlsKeyFile must be set if -syslog.tls is set. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") - tlsCertFile = flagutil.NewArrayString("syslog.tlsCertFile", "Path to file with TLS certificate for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. "+ - "Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. "+ - "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") - tlsKeyFile = flagutil.NewArrayString("syslog.tlsKeyFile", "Path to file with TLS key for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. "+ - "The provided key file is automatically re-read every second, so it can be dynamically updated. "+ - "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") - tlsCipherSuites = flagutil.NewArrayString("syslog.tlsCipherSuites", "Optional list of TLS cipher suites for -syslog.listenAddr.tcp if -syslog.tls is set. "+ - "See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants . "+ - "See also https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") - tlsMinVersion = flag.String("syslog.tlsMinVersion", "TLS13", "The minimum TLS version to use for -syslog.listenAddr.tcp if -syslog.tls is set. "+ - "Supported values: TLS10, TLS11, TLS12, TLS13. "+ - "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security") + tenantIDUnix = flagutil.NewArrayString("syslog.tenantID.unix", "TenantID for logs ingested via the corresponding -syslog.listenAddr.unix. "+ + "See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy") compressMethodTCP = flagutil.NewArrayString("syslog.compressMethod.tcp", "Compression method for syslog messages received at the corresponding -syslog.listenAddr.tcp. "+ "Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression") compressMethodUDP = flagutil.NewArrayString("syslog.compressMethod.udp", "Compression method for syslog messages received at the corresponding -syslog.listenAddr.udp. "+ "Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression") + compressMethodUnix = flagutil.NewArrayString("syslog.compressMethod.unix", "Compression method for syslog messages received at the corresponding -syslog.listenAddr.unix. "+ + "Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression") useLocalTimestampTCP = flagutil.NewArrayBool("syslog.useLocalTimestamp.tcp", "Whether to use local timestamp instead of the original timestamp for the ingested syslog messages "+ "at the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps") useLocalTimestampUDP = flagutil.NewArrayBool("syslog.useLocalTimestamp.udp", "Whether to use local timestamp instead of the original timestamp for the ingested syslog messages "+ "at the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps") + useLocalTimestampUnix = flagutil.NewArrayBool("syslog.useLocalTimestamp.unix", "Whether to use local timestamp instead of the original timestamp for the ingested syslog messages "+ + "at the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps") useRemoteIPTCP = flagutil.NewArrayBool("syslog.useRemoteIP.tcp", "Whether to add remote ip address as 'remote_ip' log field for syslog messages ingested "+ "via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address") useRemoteIPUDP = flagutil.NewArrayBool("syslog.useRemoteIP.udp", "Whether to add remote ip address as 'remote_ip' log field for syslog messages ingested "+ "via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address") + useRemoteIPUnix = flagutil.NewArrayBool("syslog.useRemoteIP.unix", "Whether to add remote ip address as 'remote_ip' log field for syslog messages ingested "+ + "via the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address") ) // MustInit initializes syslog parser at the given -syslog.listenAddr.tcp and -syslog.listenAddr.udp ports @@ -123,6 +142,14 @@ func MustInit() { }(addr, argIdx) } + for argIdx, addr := range *listenAddrUnix { + workersWG.Add(1) + go func(addr string, argIdx int) { + runUnixListener(addr, argIdx) + workersWG.Done() + }(addr, argIdx) + } + currentYear := time.Now().Year() globalCurrentYear.Store(int64(currentYear)) workersWG.Add(1) @@ -169,51 +196,91 @@ func MustStop() { workersStopCh = nil } -func runUDPListener(addr string, argIdx int) { - ln, err := net.ListenPacket(netutil.GetUDPNetwork(), addr) +func runUnixListener(addr string, argIdx int) { + cfg, err := getConfigs("unix", argIdx, streamFieldsUnix, ignoreFieldsUnix, decolorizeFieldsUnix, extraFieldsUnix, tenantIDUnix, compressMethodUnix, useLocalTimestampUnix, useRemoteIPUnix) if err != nil { - logger.Fatalf("cannot start UDP syslog server at %q: %s", addr, err) + logger.Fatalf("cannot parse configs for -syslog.listenAddr.unix=%q: %s", addr, err) } - tenantIDStr := tenantIDUDP.GetOptionalArg(argIdx) - tenantID, err := logstorage.ParseTenantID(tenantIDStr) + laddr := getUnixSocketNetworkAndPath(addr) + if laddr.Net == "unix" { + runUnixStreamListener(laddr, cfg) + } else { + runUnixPacketListener(laddr, cfg) + } +} + +func runUnixStreamListener(laddr *net.UnixAddr, cfg *configs) { + ln, err := net.ListenUnix(laddr.Net, laddr) if err != nil { - logger.Fatalf("cannot parse -syslog.tenantID.udp=%q for -syslog.listenAddr.udp=%q: %s", tenantIDStr, addr, err) + logger.Fatalf("cannot start Unix socket syslog server at %q: %s", laddr, err) } - compressMethod := compressMethodUDP.GetOptionalArg(argIdx) - checkCompressMethod(compressMethod, addr, "udp") + doneCh := make(chan struct{}) + go func() { + serveStreamListener(ln, cfg) + close(doneCh) + }() - useLocalTimestamp := useLocalTimestampUDP.GetOptionalArg(argIdx) - useRemoteIP := useRemoteIPUDP.GetOptionalArg(argIdx) + logger.Infof("started accepting syslog messages at %q", laddr) + <-workersStopCh + if err := ln.Close(); err != nil { + logger.Fatalf("syslog: cannot close UDP listener at %s: %s", laddr, err) + } + <-doneCh + logger.Infof("finished accepting syslog messages at -syslog.listenAddr.unix=%q", laddr) +} - streamFieldsStr := streamFieldsUDP.GetOptionalArg(argIdx) - streamFields, err := parseFieldsList(streamFieldsStr) +func runUnixPacketListener(laddr *net.UnixAddr, cfg *configs) { + ln, err := net.ListenUnixgram(laddr.Net, laddr) if err != nil { - logger.Fatalf("cannot parse -syslog.streamFields.udp=%q for -syslog.listenAddr.udp=%q: %s", streamFieldsStr, addr, err) + logger.Fatalf("cannot start Unix socket syslog server at %q: %s", laddr, err) } - ignoreFieldsStr := ignoreFieldsUDP.GetOptionalArg(argIdx) - ignoreFields, err := parseFieldsList(ignoreFieldsStr) - if err != nil { - logger.Fatalf("cannot parse -syslog.ignoreFields.udp=%q for -syslog.listenAddr.udp=%q: %s", ignoreFieldsStr, addr, err) + doneCh := make(chan struct{}) + go func() { + servePacketListener(ln, cfg) + close(doneCh) + }() + + logger.Infof("started accepting syslog messages at %q", laddr) + <-workersStopCh + if err := ln.Close(); err != nil { + logger.Fatalf("syslog: cannot close UDP listener at %s: %s", laddr, err) } + <-doneCh + logger.Infof("finished accepting syslog messages at %q", laddr) +} - decolorizeFieldsStr := decolorizeFieldsUDP.GetOptionalArg(argIdx) - decolorizeFields, err := parseFieldsList(decolorizeFieldsStr) +func getUnixSocketNetworkAndPath(addr string) *net.UnixAddr { + // An optional network such as unix or unixgram can be specified in front of addr and followed by ':' + n := strings.IndexByte(addr, ':') + if n < 0 { + return &net.UnixAddr{ + Net: "unix", + Name: addr, + } + } + return &net.UnixAddr{ + Net: addr[:n], + Name: addr[n+1:], + } +} + +func runUDPListener(addr string, argIdx int) { + ln, err := net.ListenPacket(netutil.GetUDPNetwork(), addr) if err != nil { - logger.Fatalf("cannot parse -syslog.decolorizeFields.udp=%q for -syslog.listenAddr.udp=%q: %s", decolorizeFieldsStr, addr, err) + logger.Fatalf("cannot start UDP syslog server at %q: %s", addr, err) } - extraFieldsStr := extraFieldsUDP.GetOptionalArg(argIdx) - extraFields, err := parseExtraFields(extraFieldsStr) + cfg, err := getConfigs("udp", argIdx, streamFieldsUDP, ignoreFieldsUDP, decolorizeFieldsUDP, extraFieldsUDP, tenantIDUDP, compressMethodUDP, useLocalTimestampUDP, useRemoteIPUDP) if err != nil { - logger.Fatalf("cannot parse -syslog.extraFields.udp=%q for -syslog.listenAddr.udp=%q: %s", extraFieldsStr, addr, err) + logger.Fatalf("cannot parse configs for -syslog.listenAddr.udp=%q: %s", addr, err) } doneCh := make(chan struct{}) go func() { - serveUDP(ln, tenantID, compressMethod, useLocalTimestamp, useRemoteIP, streamFields, ignoreFields, decolorizeFields, extraFields) + servePacketListener(ln, cfg) close(doneCh) }() @@ -243,45 +310,14 @@ func runTCPListener(addr string, argIdx int) { logger.Fatalf("syslog: cannot start TCP listener at %s: %s", addr, err) } - tenantIDStr := tenantIDTCP.GetOptionalArg(argIdx) - tenantID, err := logstorage.ParseTenantID(tenantIDStr) - if err != nil { - logger.Fatalf("cannot parse -syslog.tenantID.tcp=%q for -syslog.listenAddr.tcp=%q: %s", tenantIDStr, addr, err) - } - - compressMethod := compressMethodTCP.GetOptionalArg(argIdx) - checkCompressMethod(compressMethod, addr, "tcp") - - useLocalTimestamp := useLocalTimestampTCP.GetOptionalArg(argIdx) - useRemoteIP := useRemoteIPTCP.GetOptionalArg(argIdx) - - streamFieldsStr := streamFieldsTCP.GetOptionalArg(argIdx) - streamFields, err := parseFieldsList(streamFieldsStr) - if err != nil { - logger.Fatalf("cannot parse -syslog.streamFields.tcp=%q for -syslog.listenAddr.tcp=%q: %s", streamFieldsStr, addr, err) - } - - ignoreFieldsStr := ignoreFieldsTCP.GetOptionalArg(argIdx) - ignoreFields, err := parseFieldsList(ignoreFieldsStr) - if err != nil { - logger.Fatalf("cannot parse -syslog.ignoreFields.tcp=%q for -syslog.listenAddr.tcp=%q: %s", ignoreFieldsStr, addr, err) - } - - decolorizeFieldsStr := decolorizeFieldsTCP.GetOptionalArg(argIdx) - decolorizeFields, err := parseFieldsList(decolorizeFieldsStr) - if err != nil { - logger.Fatalf("cannot parse -syslog.decolorizeFields.tcp=%q for -syslog.listenAddr.tcp=%q: %s", decolorizeFieldsStr, addr, err) - } - - extraFieldsStr := extraFieldsTCP.GetOptionalArg(argIdx) - extraFields, err := parseExtraFields(extraFieldsStr) + cfg, err := getConfigs("tcp", argIdx, streamFieldsTCP, ignoreFieldsTCP, decolorizeFieldsTCP, extraFieldsTCP, tenantIDTCP, compressMethodTCP, useLocalTimestampTCP, useRemoteIPTCP) if err != nil { - logger.Fatalf("cannot parse -syslog.extraFields.tcp=%q for -syslog.listenAddr.tcp=%q: %s", extraFieldsStr, addr, err) + logger.Fatalf("cannot parse configs for -syslog.listenAddr.tcp=%q: %s", addr, err) } doneCh := make(chan struct{}) go func() { - serveTCP(ln, tenantID, compressMethod, useLocalTimestamp, useRemoteIP, streamFields, ignoreFields, decolorizeFields, extraFields) + serveStreamListener(ln, cfg) close(doneCh) }() @@ -294,16 +330,7 @@ func runTCPListener(addr string, argIdx int) { logger.Infof("finished accepting syslog messages at -syslog.listenAddr.tcp=%q", addr) } -func checkCompressMethod(compressMethod, addr, protocol string) { - switch compressMethod { - case "", "none", "zstd", "gzip", "deflate": - return - default: - logger.Fatalf("unsupported -syslog.compressMethod.%s=%q for -syslog.listenAddr.%s=%q; supported values: 'none', 'zstd', 'gzip', 'deflate'", protocol, compressMethod, protocol, addr) - } -} - -func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, encoding string, useLocalTimestamp bool, useRemoteIP bool, streamFields, ignoreFields, decolorizeFields []string, extraFields []logstorage.Field) { +func servePacketListener(ln net.PacketConn, cfg *configs) { gomaxprocs := cgroup.AvailableCPUs() var wg sync.WaitGroup localAddr := ln.LocalAddr() @@ -311,7 +338,7 @@ func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, encoding string, wg.Add(1) go func() { defer wg.Done() - cp := insertutil.GetCommonParamsForSyslog(tenantID, streamFields, ignoreFields, decolorizeFields, extraFields) + cp := insertutil.GetCommonParamsForSyslog(cfg.tenantID, cfg.streamFields, cfg.ignoreFields, cfg.decolorizeFields, cfg.extraFields) var bb bytesutil.ByteBuffer bb.B = bytesutil.ResizeNoCopyNoOverallocate(bb.B, 64*1024) for { @@ -323,7 +350,7 @@ func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, encoding string, var ne net.Error if errors.As(err, &ne) { if ne.Temporary() { - logger.Errorf("syslog: temporary error when listening for UDP at %q: %s", localAddr, err) + logger.Errorf("syslog: temporary error when listening for %s at %q: %s", cfg.typ, localAddr, err) time.Sleep(time.Second) continue } @@ -331,16 +358,16 @@ func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, encoding string, break } } - logger.Errorf("syslog: cannot read UDP data from %s at %s: %s", remoteAddr, localAddr, err) + logger.Errorf("syslog: cannot read %s data from %s at %s: %s", cfg.typ, remoteAddr, localAddr, err) continue } bb.B = bb.B[:n] udpRequestsTotal.Inc() - remoteIP := getRemoteIP(remoteAddr, useRemoteIP) + remoteIP := getRemoteIP(remoteAddr, cfg.useRemoteIP) - if err := processStream("udp", bb.NewReader(), encoding, useLocalTimestamp, remoteIP, cp); err != nil { - logger.Errorf("syslog: cannot process UDP data from %s at %s: %s", remoteAddr, localAddr, err) + if err := processStream(cfg.typ, bb.NewReader(), cfg.compressMethod, cfg.useLocalTimestamp, remoteIP, cp); err != nil { + logger.Errorf("syslog: cannot process %s data from %s at %s: %s", cfg.typ, remoteAddr, localAddr, err) } } }() @@ -348,7 +375,7 @@ func serveUDP(ln net.PacketConn, tenantID logstorage.TenantID, encoding string, wg.Wait() } -func serveTCP(ln net.Listener, tenantID logstorage.TenantID, encoding string, useLocalTimestamp bool, useRemoteIP bool, streamFields, ignoreFields, decolorizeFields []string, extraFields []logstorage.Field) { +func serveStreamListener(ln net.Listener, cfg *configs) { var cm ingestserver.ConnsMap cm.Init("syslog") @@ -360,16 +387,16 @@ func serveTCP(ln net.Listener, tenantID logstorage.TenantID, encoding string, us var ne net.Error if errors.As(err, &ne) { if ne.Temporary() { - logger.Errorf("syslog: temporary error when listening for TCP addr %q: %s", addr, err) + logger.Errorf("syslog: temporary error when listening for %s addr %q: %s", cfg.typ, addr, err) time.Sleep(time.Second) continue } if strings.Contains(err.Error(), "use of closed network connection") { break } - logger.Fatalf("syslog: unrecoverable error when accepting TCP connections at %q: %s", addr, err) + logger.Fatalf("syslog: unrecoverable error when accepting %s connections at %q: %s", cfg.typ, addr, err) } - logger.Fatalf("syslog: unexpected error when accepting TCP connections at %q: %s", addr, err) + logger.Fatalf("syslog: unexpected error when accepting %s connections at %q: %s", cfg.typ, addr, err) } if !cm.Add(c) { _ = c.Close() @@ -378,12 +405,12 @@ func serveTCP(ln net.Listener, tenantID logstorage.TenantID, encoding string, us wg.Add(1) go func() { - cp := insertutil.GetCommonParamsForSyslog(tenantID, streamFields, ignoreFields, decolorizeFields, extraFields) + cp := insertutil.GetCommonParamsForSyslog(cfg.tenantID, cfg.streamFields, cfg.ignoreFields, cfg.decolorizeFields, cfg.extraFields) remoteAddr := c.RemoteAddr() - remoteIP := getRemoteIP(remoteAddr, useRemoteIP) - if err := processStream("tcp", c, encoding, useLocalTimestamp, remoteIP, cp); err != nil { - logger.Errorf("syslog: cannot process TCP data at %q: %s", addr, err) + remoteIP := getRemoteIP(remoteAddr, cfg.useRemoteIP) + if err := processStream(cfg.typ, c, cfg.compressMethod, cfg.useLocalTimestamp, remoteIP, cp); err != nil { + logger.Errorf("syslog: cannot process %s data at %q: %s", cfg.typ, addr, err) } cm.Delete(c) @@ -397,20 +424,20 @@ func serveTCP(ln net.Listener, tenantID logstorage.TenantID, encoding string, us } // processStream parses a stream of syslog messages from r and ingests them into vlstorage. -func processStream(protocol string, r io.Reader, encoding string, useLocalTimestamp bool, remoteIP string, cp *insertutil.CommonParams) error { +func processStream(protocol string, r io.Reader, compressMethod string, useLocalTimestamp bool, remoteIP string, cp *insertutil.CommonParams) error { if err := insertutil.CanWriteData(); err != nil { return err } lmp := cp.NewLogMessageProcessor("syslog_"+protocol, true) - err := processStreamInternal(r, encoding, useLocalTimestamp, remoteIP, lmp) + err := processStreamInternal(r, compressMethod, useLocalTimestamp, remoteIP, lmp) lmp.MustClose() return err } -func processStreamInternal(r io.Reader, encoding string, useLocalTimestamp bool, remoteIP string, lmp insertutil.LogMessageProcessor) error { - reader, err := protoparserutil.GetUncompressedReader(r, encoding) +func processStreamInternal(r io.Reader, compressMethod string, useLocalTimestamp bool, remoteIP string, lmp insertutil.LogMessageProcessor) error { + reader, err := protoparserutil.GetUncompressedReader(r, compressMethod) if err != nil { return fmt.Errorf("cannot decode syslog data: %w", err) } @@ -647,3 +674,73 @@ func parseExtraFields(s string) ([]logstorage.Field, error) { }) return fields, nil } + +type configs struct { + typ string + + streamFields []string + ignoreFields []string + decolorizeFields []string + extraFields []logstorage.Field + tenantID logstorage.TenantID + compressMethod string + useLocalTimestamp bool + useRemoteIP bool +} + +func getConfigs(typ string, argIdx int, streamFieldsArg, ignoreFieldsArg, decolorizeFieldsArg, extraFieldsArg, tenantIDArg, compressMethodArg *flagutil.ArrayString, + useLocalTimestampArg, useRemoteIPArg *flagutil.ArrayBool) (*configs, error) { + + streamFieldsStr := streamFieldsArg.GetOptionalArg(argIdx) + streamFields, err := parseFieldsList(streamFieldsStr) + if err != nil { + return nil, fmt.Errorf("cannot parse -syslog.streamFields.%s=%q: %w", typ, streamFieldsStr, err) + } + + ignoreFieldsStr := ignoreFieldsArg.GetOptionalArg(argIdx) + ignoreFields, err := parseFieldsList(ignoreFieldsStr) + if err != nil { + return nil, fmt.Errorf("cannot parse -syslog.ignoreFields.%s=%q: %w", typ, ignoreFieldsStr, err) + } + + decolorizeFieldsStr := decolorizeFieldsArg.GetOptionalArg(argIdx) + decolorizeFields, err := parseFieldsList(decolorizeFieldsStr) + if err != nil { + return nil, fmt.Errorf("cannot parse -syslog.decolorizeFields.%s=%q: %w", typ, decolorizeFieldsStr, err) + } + + extraFieldsStr := extraFieldsArg.GetOptionalArg(argIdx) + extraFields, err := parseExtraFields(extraFieldsStr) + if err != nil { + return nil, fmt.Errorf("cannot parse -syslog.extraFields.%s=%q: %w", typ, extraFieldsStr, err) + } + + tenantIDStr := tenantIDArg.GetOptionalArg(argIdx) + tenantID, err := logstorage.ParseTenantID(tenantIDStr) + if err != nil { + return nil, fmt.Errorf("cannot parse -syslog.tenantID.%s=%q: %w", typ, tenantIDStr, err) + } + + compressMethod := compressMethodArg.GetOptionalArg(argIdx) + switch compressMethod { + case "", "none", "zstd", "gzip", "deflate": + // These methods are supported + default: + return nil, fmt.Errorf("unsupported -syslog.compressMethod.%s=%q; supported values: 'none', 'zstd', 'gzip', 'deflate'", typ, compressMethod) + } + + useLocalTimestamp := useLocalTimestampArg.GetOptionalArg(argIdx) + useRemoteIP := useRemoteIPArg.GetOptionalArg(argIdx) + + return &configs{ + typ: typ, + streamFields: streamFields, + ignoreFields: ignoreFields, + decolorizeFields: decolorizeFields, + extraFields: extraFields, + tenantID: tenantID, + compressMethod: compressMethod, + useLocalTimestamp: useLocalTimestamp, + useRemoteIP: useRemoteIP, + }, nil +} diff --git a/app/vlselect/internalselect/internalselect.go b/app/vlselect/internalselect/internalselect.go index d0d10b294d..75f23863df 100644 --- a/app/vlselect/internalselect/internalselect.go +++ b/app/vlselect/internalselect/internalselect.go @@ -2,6 +2,7 @@ package internalselect import ( "context" + "flag" "fmt" "net/http" "strconv" @@ -21,10 +22,42 @@ import ( "github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage" ) +var maxConcurrentRequests = flag.Int("internalselect.maxConcurrentRequests", 100, "The limit on the number of concurrent requests to /internal/select/* endpoints; "+ + "other requests are put into the wait queue; see https://docs.victoriametrics.com/victorialogs/cluster/") + // RequestHandler processes requests to /internal/select/* func RequestHandler(ctx context.Context, w http.ResponseWriter, r *http.Request) { startTime := time.Now() + select { + case concurrencyLimitCh <- struct{}{}: + if d := time.Since(startTime); d > 100*time.Millisecond { + // Measure the wait duration for requests, which hit the concurrency limit and waited for more than 100 milliseconds to be executed. + concurrentRequestsWaitDuration.Update(d.Seconds()) + } + requestHandler(ctx, w, r, startTime) + <-concurrencyLimitCh + case <-ctx.Done(): + // Unconditionally measure the wait time until the the request is canceled by the client. + concurrentRequestsWaitDuration.UpdateDuration(startTime) + } +} + +// Init initializes internalselect package. +func Init() { + concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests) +} + +// Stop stops vlselect +func Stop() { + concurrencyLimitCh = nil +} + +var concurrencyLimitCh chan struct{} + +var concurrentRequestsWaitDuration = metrics.NewSummary(`vl_concurrent_internalselect_requests_wait_duration`) + +func requestHandler(ctx context.Context, w http.ResponseWriter, r *http.Request, startTime time.Time) { path := r.URL.Path rh := requestHandlers[path] if rh == nil { @@ -277,14 +310,18 @@ type commonParams struct { TenantIDs []logstorage.TenantID Query *logstorage.Query + // Whether to disable compression of the response sent to the vlselect. DisableCompression bool + // Whether to allow partial response when some of vlstorage nodes are unavailable. + AllowPartialResponse bool + // qs contains execution statistics for the Query. qs logstorage.QueryStats } func (cp *commonParams) NewQueryContext(ctx context.Context) *logstorage.QueryContext { - return logstorage.NewQueryContext(ctx, &cp.qs, cp.TenantIDs, cp.Query) + return logstorage.NewQueryContext(ctx, &cp.qs, cp.TenantIDs, cp.Query, cp.AllowPartialResponse) } func (cp *commonParams) UpdatePerQueryStatsMetrics() { @@ -314,10 +351,14 @@ func getCommonParams(r *http.Request, expectedProtocolVersion string) (*commonPa return nil, fmt.Errorf("cannot unmarshal query=%q: %w", qStr, err) } - s := r.FormValue("disable_compression") - disableCompression, err := strconv.ParseBool(s) - if err != nil { - return nil, fmt.Errorf("cannot parse disable_compression=%q: %w", s, err) + disableCompression := false + if err := getBoolFromRequest(&disableCompression, r, "disable_compression"); err != nil { + return nil, err + } + + allowPartialResponse := false + if err := getBoolFromRequest(&allowPartialResponse, r, "allow_partial_response"); err != nil { + return nil, err } cp := &commonParams{ @@ -325,6 +366,8 @@ func getCommonParams(r *http.Request, expectedProtocolVersion string) (*commonPa Query: q, DisableCompression: disableCompression, + + AllowPartialResponse: allowPartialResponse, } return cp, nil } @@ -369,3 +412,16 @@ func getInt64FromRequest(r *http.Request, argName string) (int64, error) { } return n, nil } + +func getBoolFromRequest(dst *bool, r *http.Request, argName string) error { + s := r.FormValue(argName) + if s == "" { + return nil + } + b, err := strconv.ParseBool(s) + if err != nil { + return fmt.Errorf("cannot parse %s=%q as bool: %w", argName, s, err) + } + *dst = b + return nil +} diff --git a/app/vlselect/logsql/logsql.go b/app/vlselect/logsql/logsql.go index 0c4ebb656f..57bc8e3170 100644 --- a/app/vlselect/logsql/logsql.go +++ b/app/vlselect/logsql/logsql.go @@ -17,6 +17,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/atomicutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -29,8 +30,12 @@ import ( ) var ( - maxQueryTimeRange = flag.Duration("search.maxQueryTimeRange", 0, "The maximum time range, which can be set in the query sent to querying APIs. "+ + maxQueryTimeRange = flagutil.NewExtendedDuration("search.maxQueryTimeRange", "0", "The maximum time range, which can be set in the query sent to querying APIs. "+ "Queries with bigger time ranges are rejected. See https://docs.victoriametrics.com/victorialogs/querying/#resource-usage-limits") + + allowPartialResponseFlag = flag.Bool("search.allowPartialResponse", false, "Whether to allow returning partial responses when some of vlstorage nodes "+ + "from the -storageNode list are unavaialbe for querying. This flag works only for cluster setup of VictoriaLogs. "+ + "See https://docs.victoriametrics.com/victorialogs/querying/#partial-responses") ) // ProcessFacetsRequest handles /select/logsql/facets request. @@ -79,9 +84,19 @@ func ProcessFacetsRequest(ctx context.Context, w http.ResponseWriter, r *http.Re logger.Panicf("BUG: expecting 3 columns; got %d columns", len(columns)) } - fieldNames := columns[0].Values - fieldValues := columns[1].Values - hits := columns[2].Values + // Fetch columns by name to avoid relying on column ordering at VictoriaLogs cluster. + // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/648 + cFieldName := db.GetColumnByName("field_name") + cFieldValue := db.GetColumnByName("field_value") + cHits := db.GetColumnByName("hits") + if cFieldName == nil || cFieldValue == nil || cHits == nil { + logger.Panicf("BUG: missing expected columns for facets response: field_name=%v, field_value=%v, hits=%v", + cFieldName != nil, cFieldValue != nil, cHits != nil) + } + + fieldNames := cFieldName.Values + fieldValues := cFieldValue.Values + hits := cHits.Values bb := blockResultPool.Get() for i := range fieldNames { @@ -1093,6 +1108,10 @@ type commonArgs struct { // tenantIDs is the list of tenantIDs to query. tenantIDs []logstorage.TenantID + // Whether to allow partial response when some of vlstorage nodes are unavailable for querying. + // This option makes sense only for cluster setup when vlselect queries vlstorage nodes. + allowPartialResponse bool + // minTimestamp and maxTimestamp is the time range specified in the original query, // without taking into account extra_filters and (start, end) query args. minTimestamp int64 @@ -1103,7 +1122,7 @@ type commonArgs struct { } func (ca *commonArgs) newQueryContext(ctx context.Context) *logstorage.QueryContext { - return logstorage.NewQueryContext(ctx, &ca.qs, ca.tenantIDs, ca.q) + return logstorage.NewQueryContext(ctx, &ca.qs, ca.tenantIDs, ca.q, ca.allowPartialResponse) } func (ca *commonArgs) updatePerQueryStatsMetrics() { @@ -1200,22 +1219,29 @@ func parseCommonArgs(r *http.Request) (*commonArgs, error) { } } - if *maxQueryTimeRange > 0 { + if maxRange := maxQueryTimeRange.Duration(); maxRange > 0 { start, end := q.GetFilterTimeRange() if end > start { queryTimeRange := end - start - if queryTimeRange < 0 || queryTimeRange > maxQueryTimeRange.Nanoseconds() { + if queryTimeRange < 0 || queryTimeRange > maxRange.Nanoseconds() { return nil, fmt.Errorf("too big time range selected: [%s, %s]; it cannot exceed -search.maxQueryTimeRange=%s; "+ "see https://docs.victoriametrics.com/victorialogs/querying/#resource-usage-limits", - timestampToString(start), timestampToString(end), *maxQueryTimeRange) + timestampToString(start), timestampToString(end), maxRange) } } } + allowPartialResponse := *allowPartialResponseFlag + if err := getBoolFromRequest(&allowPartialResponse, r, "allow_partial_response"); err != nil { + return nil, err + } + ca := &commonArgs{ q: q, tenantIDs: tenantIDs, + allowPartialResponse: allowPartialResponse, + minTimestamp: minTimestamp, maxTimestamp: maxTimestamp, } @@ -1363,6 +1389,19 @@ func getPositiveInt(r *http.Request, argName string) (int, error) { return n, nil } +func getBoolFromRequest(dst *bool, r *http.Request, argName string) error { + s := r.FormValue(argName) + if s == "" { + return nil + } + b, err := strconv.ParseBool(s) + if err != nil { + return fmt.Errorf("cannot parse %s=%q as bool: %w", argName, s, err) + } + *dst = b + return nil +} + func writeRequestDuration(h http.Header, startTime time.Time) { h.Set("Access-Control-Expose-Headers", "VL-Request-Duration-Seconds") h.Set("VL-Request-Duration-Seconds", fmt.Sprintf("%.3f", time.Since(startTime).Seconds())) diff --git a/app/vlselect/main.go b/app/vlselect/main.go index 4670d8bf3e..7b52ff23f1 100644 --- a/app/vlselect/main.go +++ b/app/vlselect/main.go @@ -48,10 +48,15 @@ func getDefaultMaxConcurrentRequests() int { // Init initializes vlselect func Init() { concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests) + + internalselect.Init() } // Stop stops vlselect func Stop() { + internalselect.Stop() + + concurrencyLimitCh = nil } var concurrencyLimitCh chan struct{} diff --git a/app/vlstorage/lastnoptimization.go b/app/vlstorage/lastnoptimization.go index 7cee3a6832..88bf5100b8 100644 --- a/app/vlstorage/lastnoptimization.go +++ b/app/vlstorage/lastnoptimization.go @@ -38,10 +38,9 @@ func runOptimizedLastNResultsQuery(qctx *logstorage.QueryContext, offset, limit } func getLastNQueryResults(qctx *logstorage.QueryContext, limit uint64) ([]logRow, error) { - qOrig := qctx.Query - timestamp := qOrig.GetTimestamp() + timestamp := qctx.Query.GetTimestamp() - q := qOrig.Clone(timestamp) + q := qctx.Query.Clone(timestamp) q.AddPipeOffsetLimit(0, 2*limit) qctxLocal := qctx.WithQuery(q) rows, err := getQueryResults(qctxLocal) @@ -56,15 +55,14 @@ func getLastNQueryResults(qctx *logstorage.QueryContext, limit uint64) ([]logRow // Slow path - use binary search for adjusting time range for selecting up to 2*limit rows. start, end := q.GetFilterTimeRange() - d := end/2 - start/2 - start += d + start += end/2 - start/2 n := limit var rowsFound []logRow var lastNonEmptyRows []logRow for { - q = qOrig.CloneWithTimeFilter(timestamp, start, end) + q = qctx.Query.CloneWithTimeFilter(timestamp, start, end) q.AddPipeOffsetLimit(0, 2*n) qctxLocal := qctx.WithQuery(q) rows, err := getQueryResults(qctxLocal) @@ -72,45 +70,61 @@ func getLastNQueryResults(qctx *logstorage.QueryContext, limit uint64) ([]logRow return nil, err } - if d == 0 || start >= end { - // The [start ... end] time range equals to one nanosecond, e.g. it cannot be adjusted more. Return up to limit rows - // from the found rows and the last non-empty rows. - rowsFound = append(rowsFound, rows...) + if end/2-start/2 <= 0 { + // The [start ... end] time range doesn't exceed a nanosecond, e.g. it cannot be adjusted more. + // Return up to limit rows from the found rows and the last non-empty rows. rowsFound = append(rowsFound, lastNonEmptyRows...) + rowsFound = append(rowsFound, rows...) rowsFound = getLastNRows(rowsFound, limit) return rowsFound, nil } - dLastBit := d & 1 - d /= 2 - if uint64(len(rows)) >= 2*n { // The number of found rows on the [start ... end] time range exceeds 2*n, - // so reduce the time range to further to [start+d ... end]. - start += d + // so search for the rows on the adjusted time range [start+(end/2-start/2) ... end]. + if !logstorage.CanApplyLastNResultsOptimization(start, end) { + // It is faster obtaining the last N logs as is on such a small time range instead of using binary search. + rows, err := getLogRowsLastN(qctx, start, end, n) + if err != nil { + return nil, err + } + rowsFound = append(rowsFound, rows...) + rowsFound = getLastNRows(rowsFound, limit) + return rowsFound, nil + } + start += end/2 - start/2 lastNonEmptyRows = rows continue } - if uint64(len(rows)) >= n { - // The number of found rows is in the range [n ... 2*n). - // This means that found rows contains the needed limit rows with the biggest timestamps. + if uint64(len(rowsFound)+len(rows)) >= limit { + // The found rows contains the needed limit rows with the biggest timestamps. rowsFound = append(rowsFound, rows...) rowsFound = getLastNRows(rowsFound, limit) return rowsFound, nil } - // The number of found rows on [start ... end] time range is below the limit. - // This means the time range doesn't cover the needed logs, so it must be extended. - // Append the found rows to rowsFound, adjust the limit, so it doesn't take into account already found rows - // and adjust the time range to search logs to [start-d ... start). + // The number of found rows is below the limit. This means the [start ... end] time range + // doesn't cover the needed logs, so it must be extended. + // Append the found rows to rowsFound, adjust n, so it doesn't take into account already found rows + // and adjust the time range to search logs at [start-(end/2-start/2) ... start). rowsFound = append(rowsFound, rows...) n -= uint64(len(rows)) + d := end/2 - start/2 end = start - 1 - start -= d + dLastBit + start -= d } } +func getLogRowsLastN(qctx *logstorage.QueryContext, start, end int64, n uint64) ([]logRow, error) { + timestamp := qctx.Query.GetTimestamp() + q := qctx.Query.CloneWithTimeFilter(timestamp, start, end) + q.AddPipeSortByTimeDesc() + q.AddPipeOffsetLimit(0, n) + qctxLocal := qctx.WithQuery(q) + return getQueryResults(qctxLocal) +} + func getQueryResults(qctx *logstorage.QueryContext) ([]logRow, error) { var rowsLock sync.Mutex var rows []logRow diff --git a/app/vlstorage/main.go b/app/vlstorage/main.go index 81035c7e59..9abf343245 100644 --- a/app/vlstorage/main.go +++ b/app/vlstorage/main.go @@ -8,6 +8,7 @@ import ( "net/http" "time" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" @@ -24,6 +25,11 @@ var ( retentionPeriod = flagutil.NewRetentionDuration("retentionPeriod", "7d", "Log entries with timestamps older than now-retentionPeriod are automatically deleted; "+ "log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); "+ "see https://docs.victoriametrics.com/victorialogs/#retention ; see also -retention.maxDiskSpaceUsageBytes and -retention.maxDiskUsagePercent") + + defaultParallelReaders = flag.Int("defaultParallelReaders", 2*cgroup.AvailableCPUs(), "Default number of parallel data readers to use for executing every query; "+ + "higher number of readers may help increasing query performance on high-latency storage such as NFS or S3 at the cost of higher RAM usage; "+ + "see https://docs.victoriametrics.com/victorialogs/logsql/#parallel_readers-query-option") + maxDiskSpaceUsageBytes = flagutil.NewBytes("retention.maxDiskSpaceUsageBytes", 0, "The maximum disk space usage at -storageDataPath before older per-day "+ "partitions are automatically dropped; see https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage ; see also -retentionPeriod") maxDiskUsagePercent = flag.Int("retention.maxDiskUsagePercent", 0, "The maximum allowed disk usage percentage (1-100) for the filesystem that contains -storageDataPath before older per-day partitions are automatically dropped; mutually exclusive with -retention.maxDiskSpaceUsageBytes; see https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage-percent") @@ -59,6 +65,8 @@ var ( "Disabled compression reduces CPU usage at the cost of higher network usage") storageNodeUsername = flagutil.NewArrayString("storageNode.username", "Optional basic auth username to use for the corresponding -storageNode") + storageNodeUsernameFile = flagutil.NewArrayString("storageNode.usernameFile", "Optional path to basic auth username to use for the corresponding -storageNode. "+ + "The file is re-read every second") storageNodePassword = flagutil.NewArrayString("storageNode.password", "Optional basic auth password to use for the corresponding -storageNode") storageNodePasswordFile = flagutil.NewArrayString("storageNode.passwordFile", "Optional path to basic auth password to use for the corresponding -storageNode. "+ "The file is re-read every second") @@ -113,6 +121,7 @@ func initLocalStorage() { } cfg := &logstorage.StorageConfig{ Retention: retentionPeriod.Duration(), + DefaultParallelReaders: *defaultParallelReaders, MaxDiskSpaceUsageBytes: maxDiskSpaceUsageBytes.N, MaxDiskUsagePercent: *maxDiskUsagePercent, FlushInterval: *inmemoryDataFlushInterval, @@ -163,12 +172,14 @@ func initNetworkStorage() { func newAuthConfigForStorageNode(argIdx int) *promauth.Config { username := storageNodeUsername.GetOptionalArg(argIdx) + usernameFile := storageNodeUsernameFile.GetOptionalArg(argIdx) password := storageNodePassword.GetOptionalArg(argIdx) passwordFile := storageNodePasswordFile.GetOptionalArg(argIdx) var basicAuthCfg *promauth.BasicAuthConfig - if username != "" || password != "" || passwordFile != "" { + if username != "" || usernameFile != "" || password != "" || passwordFile != "" { basicAuthCfg = &promauth.BasicAuthConfig{ Username: username, + UsernameFile: usernameFile, Password: promauth.NewSecret(password), PasswordFile: passwordFile, } diff --git a/app/vlstorage/netinsert/netinsert.go b/app/vlstorage/netinsert/netinsert.go index 03bef8ce14..91d4cc5865 100644 --- a/app/vlstorage/netinsert/netinsert.go +++ b/app/vlstorage/netinsert/netinsert.go @@ -243,7 +243,7 @@ func (sn *storageNode) sendInsertRequest(pendingData *bytesutil.ByteBuffer) erro reqURL := sn.getRequestURL("/internal/insert") req, err := http.NewRequestWithContext(ctx, "POST", reqURL, body) if err != nil { - logger.Panicf("BUG: unexpected error when creating an http request: %s", err) + return fmt.Errorf("cannot create an http request for %q: %w", reqURL, err) } req.Header.Set("Content-Type", "application/octet-stream") if !sn.s.disableCompression { diff --git a/app/vlstorage/netselect/netselect.go b/app/vlstorage/netselect/netselect.go index 6c52751f1c..0f2117e7ae 100644 --- a/app/vlstorage/netselect/netselect.go +++ b/app/vlstorage/netselect/netselect.go @@ -138,7 +138,7 @@ func (sn *storageNode) runQuery(qctx *logstorage.QueryContext, processBlock func // The end of response stream return nil } - return fmt.Errorf("cannot read block size from %q: %w", path, err) + return fmt.Errorf("cannot read block size from %q: %w", reqURL, err) } blockLen := encoding.UnmarshalUint64(dataLenBuf[:]) if blockLen > math.MaxInt { @@ -237,6 +237,7 @@ func (sn *storageNode) getCommonArgs(version string, qctx *logstorage.QueryConte args.Set("query", qctx.Query.String()) args.Set("timestamp", fmt.Sprintf("%d", qctx.Query.GetTimestamp())) args.Set("disable_compression", fmt.Sprintf("%v", sn.s.disableCompression)) + args.Set("allow_partial_response", fmt.Sprintf("%v", qctx.AllowPartialResponse)) return args } @@ -278,7 +279,7 @@ func (sn *storageNode) getResponseBodyForPathAndArgs(ctx context.Context, path s reqBody := strings.NewReader(args.Encode()) req, err := http.NewRequestWithContext(ctx, "POST", reqURL, reqBody) if err != nil { - logger.Panicf("BUG: unexpected error when creating a request for %q: %s", reqURL, err) + return nil, "", fmt.Errorf("cannot create a request for %q: %w", reqURL, err) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") if err := sn.ac.SetHeaders(req, true); err != nil { @@ -288,6 +289,11 @@ func (sn *storageNode) getResponseBodyForPathAndArgs(ctx context.Context, path s // send the request to the storage node resp, err := sn.c.Do(req) if err != nil { + // Wrap the error into httpserver.ErrorWithStatusCode in order to return the proper status code to the client. + // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/576 + // + // This is also used by isUnavailableBackendError() function in order to differentiate unavailable backend errors + // from improper configuration errors. return nil, "", &httpserver.ErrorWithStatusCode{ Err: fmt.Errorf("cannot connect to storage node at %q: %w", reqURL, err), StatusCode: http.StatusBadGateway, @@ -368,21 +374,12 @@ func (s *Storage) runQuery(stopCh <-chan struct{}, qctx *logstorage.QueryContext err := sn.runQuery(qctxLocal, func(db *logstorage.DataBlock) { writeBlock(uint(nodeIdx), db) }) - if err != nil { - if !errors.Is(err, context.Canceled) { - sn.sendErrors.Inc() - } - - // Cancel the remaining parallel queries - cancel() - } - - errs[nodeIdx] = err + errs[nodeIdx] = sn.handleError(ctxWithCancel, cancel, err, qctx.AllowPartialResponse) }(i) } wg.Wait() - return getFirstNonCancelError(errs) + return getFirstError(errs, qctx.AllowPartialResponse) } // GetFieldNames executes qctx and returns field names seen in results. @@ -459,21 +456,12 @@ func (s *Storage) getValuesWithHits(qctx *logstorage.QueryContext, limit uint64, sn := s.sns[nodeIdx] vhs, err := callback(ctxWithCancel, sn) results[nodeIdx] = vhs - errs[nodeIdx] = err - - if err != nil { - if !errors.Is(err, context.Canceled) { - sn.sendErrors.Inc() - } - - // Cancel the remaining parallel requests - cancel() - } + errs[nodeIdx] = sn.handleError(ctxWithCancel, cancel, err, qctx.AllowPartialResponse) }(i) } wg.Wait() - if err := getFirstNonCancelError(errs); err != nil { + if err := getFirstError(errs, qctx.AllowPartialResponse); err != nil { return nil, err } @@ -482,13 +470,64 @@ func (s *Storage) getValuesWithHits(qctx *logstorage.QueryContext, limit uint64, return vhs, nil } -func getFirstNonCancelError(errs []error) error { +func (sn *storageNode) handleError(ctx context.Context, cancel func(), err error, allowPartialResponse bool) error { + if err == nil { + // Nothing to handle. + return nil + } + if err := ctx.Err(); err != nil { + // The context error overrides all the other errors. + // It must be handled separately by the caller. + return nil + } + + sn.sendErrors.Inc() + + if !allowPartialResponse || !isUnavailableBackendError(err) { + // Cancel the remaining parallel queries, since the error must be returned to the client ASAP + // without waiting for the remaining parallel queries to other backends. + cancel() + } + + return err +} + +func getFirstError(errs []error, allowPartialResponse bool) error { + if len(errs) == 0 { + logger.Panicf("BUG: len(errs) must be bigger than 0") + } + + if !allowPartialResponse { + for _, err := range errs { + if err != nil { + return err + } + } + return nil + } + + // allowPartialResponse == true. Return the error only if all the backends are unavailable + // or if some of the backends are improperly configured. for _, err := range errs { - if err != nil && !errors.Is(err, context.Canceled) { - return err + if err == nil { + // At least a single vlstorage returned full response. + return nil + } + if !isUnavailableBackendError(err) { + // Return the first error, which isn't related to the backend unavailability, to the client, + // since this error may point to configuration issues, which must be fixed ASAP. + // Hiding this error would complicate troubleshooting of improperly configured system. + return fmt.Errorf("the vlstorage node is available, but it returns an error, which may point to configuration issues: %w", err) } } - return nil + + return fmt.Errorf("all the vlstorage nodes are unavailable for querying; a sample error: %w", errs[0]) +} + +func isUnavailableBackendError(err error) bool { + // It is expected that unavailable backend errors are wrapped into httpserver.ErrorWithStatusCode. + var es *httpserver.ErrorWithStatusCode + return errors.As(err, &es) } func unmarshalValuesWithHits(qctx *logstorage.QueryContext, src []byte) ([]logstorage.ValueWithHits, error) { diff --git a/app/vmui/packages/vmui/package-lock.json b/app/vmui/packages/vmui/package-lock.json index b57e39a9aa..66f672a907 100644 --- a/app/vmui/packages/vmui/package-lock.json +++ b/app/vmui/packages/vmui/package-lock.json @@ -19,7 +19,7 @@ "react-input-mask": "^2.0.4", "react-router-dom": "^7.6.3", "uplot": "^1.6.32", - "vite": "^7.0.0", + "vite": "^7.1.7", "web-vitals": "^5.0.3" }, "devDependencies": { @@ -6811,13 +6811,13 @@ "license": "MIT" }, "node_modules/tinyglobby": { - "version": "0.2.14", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz", - "integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", + "version": "0.2.15", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", + "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", "license": "MIT", "dependencies": { - "fdir": "^6.4.4", - "picomatch": "^4.0.2" + "fdir": "^6.5.0", + "picomatch": "^4.0.3" }, "engines": { "node": ">=12.0.0" @@ -6827,10 +6827,13 @@ } }, "node_modules/tinyglobby/node_modules/fdir": { - "version": "6.4.6", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz", - "integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==", + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, "peerDependencies": { "picomatch": "^3 || ^4" }, @@ -6841,9 +6844,9 @@ } }, "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz", - "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", + "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", "engines": { "node": ">=12" @@ -7147,17 +7150,17 @@ "license": "MIT" }, "node_modules/vite": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/vite/-/vite-7.0.4.tgz", - "integrity": "sha512-SkaSguuS7nnmV7mfJ8l81JGBFV7Gvzp8IzgE8A8t23+AxuNX61Q5H1Tpz5efduSN7NHC8nQXD3sKQKZAu5mNEA==", + "version": "7.1.7", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.1.7.tgz", + "integrity": "sha512-VbA8ScMvAISJNJVbRDTJdCwqQoAareR/wutevKanhR2/1EkoXVZVkkORaYm/tNVCjP/UDTKtcw3bAkwOUdedmA==", "license": "MIT", "dependencies": { "esbuild": "^0.25.0", - "fdir": "^6.4.6", - "picomatch": "^4.0.2", + "fdir": "^6.5.0", + "picomatch": "^4.0.3", "postcss": "^8.5.6", - "rollup": "^4.40.0", - "tinyglobby": "^0.2.14" + "rollup": "^4.43.0", + "tinyglobby": "^0.2.15" }, "bin": { "vite": "bin/vite.js" @@ -7262,10 +7265,13 @@ } }, "node_modules/vite/node_modules/fdir": { - "version": "6.4.6", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz", - "integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==", + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, "peerDependencies": { "picomatch": "^3 || ^4" }, @@ -7276,9 +7282,9 @@ } }, "node_modules/vite/node_modules/picomatch": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz", - "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", + "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", "engines": { "node": ">=12" diff --git a/app/vmui/packages/vmui/package.json b/app/vmui/packages/vmui/package.json index a4f260fb15..1f367a3d44 100644 --- a/app/vmui/packages/vmui/package.json +++ b/app/vmui/packages/vmui/package.json @@ -27,7 +27,7 @@ "react-input-mask": "^2.0.4", "react-router-dom": "^7.6.3", "uplot": "^1.6.32", - "vite": "^7.0.0", + "vite": "^7.1.7", "web-vitals": "^5.0.3" }, "devDependencies": { diff --git a/deployment/docker/compose-vl-cluster.yml b/deployment/docker/compose-vl-cluster.yml index c77455f43a..7766ff3e35 100644 --- a/deployment/docker/compose-vl-cluster.yml +++ b/deployment/docker/compose-vl-cluster.yml @@ -34,30 +34,30 @@ services: user: root vlinsert: - image: victoriametrics/victoria-logs:v1.32.0 + image: victoriametrics/victoria-logs:v1.34.0 command: - "--storageNode=vlstorage-1:9428" - "--storageNode=vlstorage-2:9428" vlselect-1: - image: victoriametrics/victoria-logs:v1.32.0 + image: victoriametrics/victoria-logs:v1.34.0 command: - "--storageNode=vlstorage-1:9428" - "--storageNode=vlstorage-2:9428" vlselect-2: - image: victoriametrics/victoria-logs:v1.32.0 + image: victoriametrics/victoria-logs:v1.34.0 command: - "--storageNode=vlstorage-1:9428" - "--storageNode=vlstorage-2:9428" vlstorage-1: - image: victoriametrics/victoria-logs:v1.32.0 + image: victoriametrics/victoria-logs:v1.34.0 command: - "--storageDataPath=/vlogs" volumes: - vldata-1:/vlogs vlstorage-2: - image: victoriametrics/victoria-logs:v1.32.0 + image: victoriametrics/victoria-logs:v1.34.0 command: - "--storageDataPath=/vlogs" volumes: diff --git a/deployment/docker/compose-vl-single.yml b/deployment/docker/compose-vl-single.yml index a3795b0190..5414b43264 100644 --- a/deployment/docker/compose-vl-single.yml +++ b/deployment/docker/compose-vl-single.yml @@ -36,7 +36,7 @@ services: # VictoriaLogs instance, a single process responsible for # storing logs and serving read queries. victorialogs: - image: victoriametrics/victoria-logs:v1.32.0 + image: victoriametrics/victoria-logs:v1.34.0 ports: - "9428:9428" command: diff --git a/deployment/docker/victorialogs/compose-base.yml b/deployment/docker/victorialogs/compose-base.yml index add0b058fc..1995742e6b 100644 --- a/deployment/docker/victorialogs/compose-base.yml +++ b/deployment/docker/victorialogs/compose-base.yml @@ -1,7 +1,7 @@ services: # meta service will be ignored by compose .victorialogs: - image: docker.io/victoriametrics/victoria-logs:v1.32.0 + image: docker.io/victoriametrics/victoria-logs:v1.34.0 command: - -storageDataPath=/vlogs - -loggerFormat=json diff --git a/deployment/logs-benchmark/docker-compose.yml b/deployment/logs-benchmark/docker-compose.yml index db567db952..63ed340aec 100644 --- a/deployment/logs-benchmark/docker-compose.yml +++ b/deployment/logs-benchmark/docker-compose.yml @@ -3,7 +3,7 @@ version: "3" services: # Run `make package-victoria-logs` to build victoria-logs image vlogs: - image: docker.io/victoriametrics/victoria-logs:v1.27.0 + image: docker.io/victoriametrics/victoria-logs:v1.34.0 volumes: - vlogs:/vlogs ports: @@ -46,7 +46,7 @@ services: - "--config=/config.yml" vmsingle: - image: victoriametrics/victoria-metrics:v1.109.0 + image: victoriametrics/victoria-metrics:v1.125.1 ports: - "8428:8428" command: @@ -58,7 +58,7 @@ services: - ./vmsingle/promscrape.yml:/promscrape.yml grafana: - image: grafana/grafana:12.0.2 + image: grafana/grafana:12.1.1 depends_on: [vmsingle] ports: - 3000:3000 diff --git a/deployment/logs-benchmark/readme.md b/deployment/logs-benchmark/readme.md index 72e4f45cc3..4dfe1eb555 100644 --- a/deployment/logs-benchmark/readme.md +++ b/deployment/logs-benchmark/readme.md @@ -24,7 +24,7 @@ For Grafana Loki suite it uses: - VictoriaLogs instance - vmsingle - port forwarded to `localhost:8428` to see UI -- exporters for system metris +- exporters for system metrics ELK suite uses [docker-compose-elk.yml](./docker-compose-elk.yml) with the following services: @@ -54,7 +54,7 @@ Each filebeat than writes logs to elastic and VictoriaLogs via elasticsearch-com 1. Download and unarchive logs by running: ```shell -cd source_logs +cd source_logs bash download.sh ``` @@ -98,15 +98,14 @@ output.elasticsearch: 4. Choose a suite to run. In order to run ELK suite use the following command: - ``` + ```sh make docker-up-elk ``` In order to run Loki suite use the following command: - ``` + ```sh make docker-up-loki ``` - 5. Navigate to `http://localhost:3000/` to see Grafana dashboards with resource usage comparison. diff --git a/docs/victorialogs/Articles.md b/docs/victorialogs/Articles.md index 423338ebdf..6d27055230 100644 --- a/docs/victorialogs/Articles.md +++ b/docs/victorialogs/Articles.md @@ -1,11 +1,11 @@ --- -weight: 29 +weight: 70 title: Articles menu: docs: parent: 'victorialogs' identifier: "victorialogs-articles" - weight: 29 + weight: 70 tags: [] --- diff --git a/docs/victorialogs/CHANGELOG.md b/docs/victorialogs/CHANGELOG.md index e1eb3801a6..ea35bfc6a6 100644 --- a/docs/victorialogs/CHANGELOG.md +++ b/docs/victorialogs/CHANGELOG.md @@ -1,11 +1,11 @@ --- -weight: 7 +weight: 101 title: CHANGELOG menu: docs: identifier: "victorialogs-changelog" parent: "victorialogs" - weight: 7 + weight: 101 title: CHANGELOG tags: - logs @@ -18,7 +18,52 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): add [`equals_common_case` filter](https://docs.victoriametrics.com/victorialogs/logsql/#equals_common_case-filter) and [`contains_common_case` filter](https://docs.victoriametrics.com/victorialogs/logsql/#contains_common_case-filter). These filters can be used as faster alternatives to [`i(...)` filter](https://docs.victoriametrics.com/victorialogs/logsql/#case-insensitive-filter). For example, `_msg:contains_common_case("VictoriaMetrics")` matches logs with the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) containing at least one of the following [words](https://docs.victoriametrics.com/victorialogs/logsql/#word): `VictoriaMetrics`, `victoriametrics`, `VICTORIAMETRICS`, `Victoriametrics` or `victoriaMetrics`. + +* BUGFIX: [`facets` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#facets-pipe): properly skip field names with more than `max_values_per_field` unique values. Previously these fields could be returned with incomplete number of hits. + +## [v1.34.0](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.34.0) + +Released at 2025-09-22 + +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): add an ability to return partial responses in [VictoriaLogs cluster setup](https://docs.victoriametrics.com/victorialogs/cluster/) when some of `vlstorage` nodes are unavailable. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#partial-responses) for details. See [#72](https://github.com/VictoriaMetrics/VictoriaLogs/issues/72). +* FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): add an ability to configure the number of parallel readers to use during query execution for reading the queried data from storage. This may help improving query performance for high-latency storage systems such as NFS, Ceph or S3. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#parallel_readers-query-option) for details. +* FEATURE: [vlselect](https://docs.victoriametrics.com/victorialogs/cluster/): add `-storageNode.usernameFile` command-line flag for dynamically reloading basic auth username for the corresponding `-storageNode` from the given file. See [#459](https://github.com/VictoriaMetrics/VictoriaLogs/issues/459). + +* BUGFIX: [`/select/logsql/query` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-logs) at [VictoriaLogs cluster](https://docs.victoriametrics.com/victorialogs/cluster/): properly execute the query with the `limit=N` query arg. Such queries are issued by [the built-in web UI for VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/#web-ui) and by [Grafana plugin for VictoriaLogs](https://docs.victoriametrics.com/victorialogs/victorialogs-datasource/) for returning the last N logs for the given query on the given time range. Previously it could return unexpected results. The issue has been introduced in [v1.30.0](#v1.30.0) while fixing [#587](https://github.com/VictoriaMetrics/VictoriaLogs/issues/587). +* BUGFIX: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): disallow incorrectly written filters such as `foo = bar`, `foo != bar`, `foo > bar`, etc. They must be written as `foo:=bar`, `foo:!=bar`, `foo:>bar`. See [#590](https://github.com/VictoriaMetrics/VictoriaLogs/issues/590). +* BUGFIX: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): allow using unquoted [pipe names](https://docs.victoriametrics.com/victorialogs/logsql/#pipes) inside [LogsQL filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters). For example, `fields.foo:bar` is allowed now, while previously it should be written as `"fields.foo":bar`. See [#669](https://github.com/VictoriaMetrics/VictoriaLogs/issues/669). +* BUGFIX: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): properly account for `result_prefix` in [unpack_json](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe), [unpack_logfmt](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe) and [unpack_syslog](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe) when determining the list of log fields to select. See [#671](https://github.com/VictoriaMetrics/VictoriaLogs/issues/671). +* BUGFIX: [vlselect](https://docs.victoriametrics.com/victorialogs/cluster/): avoid `unexpected number of imported shards` error in [`count_uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#count_uniq-stats) and [`count_uniq_hash`](https://docs.victoriametrics.com/victorialogs/logsql/#count_uniq_hash-stats) stats functions when `vlselect` and `vlstorage` instances have different number of CPU cores. See [#682](https://github.com/VictoriaMetrics/VictoriaLogs/issues/682). +* BUGFIX: properly detele unneeded directories at [Ossfs2 filesystem](https://www.alibabacloud.com/help/en/oss/developer-reference/ossfs-2-0/). See [#649](https://github.com/VictoriaMetrics/VictoriaLogs/issues/649). Thanks to @xiaozongyang for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9709). +* BUGFIX: all [Enterprise version of VictoriaLogs](https://docs.victoriametrics.com/victoriametrics/enterprise/): fix support for automatic issuing of TLS certificates for HTTPS server via Let's Encrypt service using TLS-ALPN-01 challenge. See [Automatic issuing of TLS certificates](https://docs.victoriametrics.com/victorialogs/#automatic-issuing-of-tls-certificates) for more info. +* BUGFIX: all components: lower severity of the log message for unavailable [Pressure Stall Information (PSI) metrics](https://docs.kernel.org/accounting/psi.html) from ERROR to INFO level. See [this issue](https://github.com/VictoriaMetrics/metrics/pull/101) for details. +* BUGFIX: all components: properly expose metadata for summaries and histograms in VictoriaMetrics components with enabled `-metrics.exposeMetadata` cmd-line flag. See [metrics#98](https://github.com/VictoriaMetrics/metrics/issues/98) for details. +* BUGFIX: [OpenTelemetry ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/opentelemetry/): fix log processor name from "opentelelemtry_protobuf" to "opentelelemtry_protobuf". This changes the `type` label value for ingestion metrics such as `vl_rows_ingested_total`, `vl_bytes_ingested_total`, and `vl_insert_flush_duration_seconds` for OpenTelemetry data ingestion. + +## [v1.33.1](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.33.1) + +Released at 2025-09-11 + +* FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): optimize queries containing `| offset X | limit Y` for VictoriaLogs cluster. It also improves performance of queries in vmui and Grafana when retrieving sorted logs. See [#620](https://github.com/VictoriaMetrics/VictoriaLogs/issues/620). + +* BUGFIX: [`/select/logsql/query` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-logs): properly optimize the query execution when the `limit` query arg is set, while the `offset` query arg isn't set. It wasn't addressed properly in [v1.33.0](#v1330). See [#620](https://github.com/VictoriaMetrics/VictoriaLogs/issues/620). +* BUGFIX: [`/select/logsql/hits` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats): take into account results from the [`union` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#union-pipe) while calculating log hits. See [#641](https://github.com/VictoriaMetrics/VictoriaLogs/issues/641). +* BUGFIX: [`/select/logsql/hits` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats): take into account results from the [`join` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#join-pipe) while calculating log hits. +* BUGFIX: [Syslog data ingesting](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/): keep nanosecond precision when parsing RFC3339 timestamp. See [#303](https://github.com/VictoriaMetrics/VictoriaLogs/issues/303). + +## [v1.33.0](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.33.0) + +Released at 2025-09-10 + +* FEATURE: [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe): the `<...>` placeholder now matches quoted strings in single quotes additionally to strings in double quotes and backticks. For example, the `` placeholder at the `... | extact "login=,"` now matches `foo,bar` for the log message with the text `login='foo,bar'`. * FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): add [pattern match filter](https://docs.victoriametrics.com/victorialogs/logsql/#pattern-match-filter) for searching logs by the given patterns such as `: user_id=, ip=, trace_id=`. These filters are needed for [#518](https://github.com/VictoriaMetrics/VictoriaLogs/issues/518). +* FEATURE: [Syslog data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/): support for receiving Syslog messages from Unix sockets of `SOCK_STREAM` and `SOCK_DGRAM` types via `-syslog.listenAddr.unix=/path/to/socket` and `-syslog.listenAddr.unix=unixgram:/path/to/socket` command-line flags. See [#570](https://github.com/VictoriaMetrics/VictoriaLogs/issues/570). + +* BUGFIX: [querying](https://docs.victoriametrics.com/victorialogs/querying/): `-search.maxQueryTimeRange` command-line flag now supports day (`d`), week (`w`) and year (`y`) suffixes additionally to the supported hour (`h`), minute (`m`) and second (`s`) suffixes. See [#50](https://github.com/VictoriaMetrics/VictoriaLogs/issues/50#issuecomment-3244097676). +* BUGFIX: [`/select/logsql/query` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-logs): improve query performance when the `limit` query arg is set, while the `offset` query arg is missing. The issue has been introduced in [v1.28.0](#v1280). See [#620](https://github.com/VictoriaMetrics/VictoriaLogs/issues/620). +* BUGFIX: [`/select/logsql/facets` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-facets): prevent from incorrect swap of `hits` and `field_values` fields in [VictoriaLogs cluster](https://docs.victoriametrics.com/victorialogs/cluster/). See [#648](https://github.com/VictoriaMetrics/VictoriaLogs/issues/648). +* BUGFIX: [querying](https://docs.victoriametrics.com/victorialogs/querying/) in [VictoriaLogs cluster](https://docs.victoriametrics.com/victorialogs/cluster/): properly handle queries with [pipe](https://docs.victoriametrics.com/victorialogs/logsql/#pipes) executed only at the `vlstorage` side without the need to post-process at the `vlselect` side. The issue has been introduced in [v1.31.0](#v1310) while working on [#52](https://github.com/VictoriaMetrics/VictoriaLogs/issues/52). ## [v1.32.0](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.32.0) @@ -30,7 +75,7 @@ Released at 2025-09-03 * FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): add an ability to sort logs returned by [`json_values` stats function](https://docs.victoriametrics.com/victorialogs/logsql/#json_values-stats) via `... | stats json_values() sort by (...)` syntax. The sorting can be combined with the `limit N` in order to get top N logs. For example, the following query returns up to 3 the most recent logs per every `host` over the last 5 minutes: `_time:5m | stats json_values() sort by (_time desc) limit 3 as last_3_host_logs`. * FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): return errors on typical incorrectly written queries. For example, `foo=~"bar"`, `foo~="bar"`, `foo=bar`, `foo!=bar`, `foo>bar`, `foo==bar`, etc. See [#590](https://github.com/VictoriaMetrics/VictoriaLogs/issues/590). * FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): add [substring filter](https://docs.victoriametrics.com/victorialogs/logsql/#substring-filter), which searches for logs containing the given substring. Previously the [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter) was used instead of substring filter. The downside of the regexp filter is that it needs proper escaping of special regexp chars. -* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_found_rows` [histogram](https://docs.victoriametrics.com/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log entries found per each query. +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_found_rows` [histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log entries found per each query. ## [v1.31.0](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.31.0) @@ -38,10 +83,10 @@ Released at 2025-08-31 * FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): add `-search.maxQueryTimeRange` command-line flag, which can be used for limiting resource usage by queries with too broad [time range filters](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter). See [resource usage limits docs](https://docs.victoriametrics.com/victorialogs/querying/#resource-usage-limits) for details. See [#50](https://github.com/VictoriaMetrics/VictoriaLogs/issues/50#issuecomment-3043590508) and [#611](https://github.com/VictoriaMetrics/VictoriaLogs/issues/611). * FEATURE: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): add [`query_stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#query_stats-pipe) for analyzing query execution statistics. This pipe may help understanding why the given query is slow and may help optimizing slow queries. See [#52](https://github.com/VictoriaMetrics/VictoriaLogs/issues/52). -* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_read_values` [histogram](https://docs.victoriametrics.com/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log field values read per each query. -* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_read_timestamps` [histogram](https://docs.victoriametrics.com/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log timestamps read per each query. -* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_uncompressed_values_processed_bytes` [histogram](https://docs.victoriametrics.com/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the amounts of bytes processed for uncompressed field values per each query. -* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_processed_rows` [histogram](https://docs.victoriametrics.com/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log entries processed per each query. +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_read_values` [histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log field values read per each query. +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_read_timestamps` [histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log timestamps read per each query. +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_uncompressed_values_processed_bytes` [histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the amounts of bytes processed for uncompressed field values per each query. +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_processed_rows` [histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) at [`/metrics`](https://docs.victoriametrics.com/victorialogs/#monitoring) page, which shows the number of log entries processed per each query. * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): always sort field names in alphabetical order at JSON view. This simplifies exploring logs with big number of fields (such as [wide events](https://jeremymorrell.dev/blog/a-practitioners-guide-to-wide-events/)). See [#87](https://github.com/VictoriaMetrics/VictoriaLogs/issues/87). * FEATURE: add `/internal/partition/snapshot/create` HTTP endpoint for creating [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) for per-day partitions. See [these docs](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle) for details. * FEATURE: add `/internal/partition/snapshot/list` HTTP endpoint, which returns the list of absolute paths to snapshots created via `/internal/partition/snapshot/create`. See [these docs](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle) for details. @@ -106,8 +151,8 @@ Released at 2025-08-08 * FEATURE: [Syslog data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/): add an ability to record the remote IP address from the received syslog messages into the `remote_ip` log field. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address) for details. Thanks to @biancalana for [the pull request](https://github.com/VictoriaMetrics/VictoriaLogs/pull/527). See [#40](https://github.com/VictoriaMetrics/VictoriaLogs/issues/40). * FEATURE: [retention](https://docs.victoriametrics.com/victorialogs/#retention): support disk space percentage-based retention (`-retention.maxDiskUsagePercent`), which helps dynamically manage total disk space usage. Only one of `-retention.maxDiskSpaceUsageBytes` or `-retention.maxDiskUsagePercent` can be set; otherwise, the application will panic. See [#513](https://github.com/VictoriaMetrics/VictoriaLogs/issues/513). * FEATURE: add an ability to dynamically attach and detach per-day partitions. This simplifies creating multi-tier storage schemes when recently ingested logs are stored on a fast storage (such as NVMe or SSD), while historical logs are gradually migrated to less expensive storage with bigger capacity (such as HDD). See [these docs](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle) and [#432](https://github.com/VictoriaMetrics/VictoriaLogs/issues/432). -* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_processed_blocks` [histogram](https://docs.victoriametrics.com/keyconcepts/#histogram), which shows the number of data blocks processed per every query. This histogram can be used for analysing query performance issues. See [#45](https://github.com/VictoriaMetrics/VictoriaLogs/issues/45). -* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose [histograms](https://docs.victoriametrics.com/keyconcepts/#histogram) on the number of bytes read from disk for various data types per each query (see [#45](https://github.com/VictoriaMetrics/VictoriaLogs/issues/45)): +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose `vl_storage_per_query_processed_blocks` [histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram), which shows the number of data blocks processed per every query. This histogram can be used for analysing query performance issues. See [#45](https://github.com/VictoriaMetrics/VictoriaLogs/issues/45). +* FEATURE: [querying](https://docs.victoriametrics.com/victorialogs/querying/): expose [histograms](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) on the number of bytes read from disk for various data types per each query (see [#45](https://github.com/VictoriaMetrics/VictoriaLogs/issues/45)): * `vl_storage_per_query_total_read_bytes` - the total number of bytes read during query execution. * `vl_storage_per_query_values_read_bytes` - the number of bytes read for [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values. * `vl_storage_per_query_timestamps_read_bytes` - the number of bytes read for the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field). @@ -607,7 +652,7 @@ Released at 2024-11-08 * FEATURE: [`join` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#join-pipe): add an ability to add prefix to all the log field names from the joined query, by using `| join by () () prefix "some_prefix"` syntax. * FEATURE: [`_time` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter): allow specifying offset without time range. For example, `_time:offset 1d` matches all the logs until `now-1d` in the [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field). This is useful when building graphs for time ranges with some offset in the past. -* FEATURE: [`/select/logsql/tail` HTTP endpoint](): support for `offset` query arg, which can be used for delayed emission of matching logs during live tailing. Thanks to @Fusl for the initial idea and implementation in [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7428). +* FEATURE: [`/select/logsql/tail` HTTP endpoint](https://docs.victoriametrics.com/victorialogs/querying/#http-api): support for `offset` query arg, which can be used for delayed emission of matching logs during live tailing. Thanks to @Fusl for the initial idea and implementation in [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7428). * FEATURE: [vlogscli](https://docs.victoriametrics.com/victorialogs/querying/vlogscli/): allow enabling and disabling wrapping of long lines, which do not fit screen width, with `\wrap_long_lines` command. * FEATURE: [syslog data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/): allow overriding default [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) with the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) during data ingestion. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7480). * FEATURE: [syslog data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/): allow adding arbitrary [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) via `[label1=value1 ... labelN=valueN]` syntax inside Syslog messages. For example, `<165>1 2024-06-03T17:42:00.000Z example.com appname 12345 ID47 [field1=value1 field2=value2] some message`. @@ -958,7 +1003,7 @@ Released at 2024-05-28 * FEATURE: add [`math` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#math-pipe) for mathematical calculations over [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). * FEATURE: add [`field_values` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe), which returns unique values for the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). * FEATURE: allow omitting `stats` prefix in [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe). For example, `_time:5m | count() rows` is a valid query now. It is equivalent to `_time:5m | stats count() as rows`. -* FEATURE: allow omitting `filter` prefix in [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe) if the filter doesn't clash with [pipe names](#https://docs.victoriametrics.com/victorialogs/logsql/#pipes). For example, `_time:5m | stats by (host) count() rows | rows:>1000` is a valid query now. It is equivalent to `_time:5m | stats by (host) count() rows | filter rows:>1000`. +* FEATURE: allow omitting `filter` prefix in [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe) if the filter doesn't clash with [pipe names](https://docs.victoriametrics.com/victorialogs/logsql/#pipes). For example, `_time:5m | stats by (host) count() rows | rows:>1000` is a valid query now. It is equivalent to `_time:5m | stats by (host) count() rows | filter rows:>1000`. * FEATURE: allow [`head` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe) without number. For example, `error | head`. In this case 10 first values are returned as `head` Unix command does by default. * FEATURE: allow using [comparison filters](https://docs.victoriametrics.com/victorialogs/logsql/#range-comparison-filter) with strings. For example, `some_text_field:>="foo"` matches [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with `some_text_field` field values bigger or equal to `foo`. @@ -1060,7 +1105,7 @@ Released at 2024-05-20 Released at 2024-05-15 * FEATURE: add support for optional `start` and `end` query args to [HTTP querying API](https://docs.victoriametrics.com/victorialogs/querying/#http-api), which can be used for limiting the time range for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/). -* FEATURE: add ability to return the first `N` results from [`sort` pipe](#https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe). This is useful when `N` biggest or `N` smallest values must be returned from large amounts of logs. +* FEATURE: add ability to return the first `N` results from [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe). This is useful when `N` biggest or `N` smallest values must be returned from large amounts of logs. * FEATURE: add [`quantile`](https://docs.victoriametrics.com/victorialogs/logsql/#quantile-stats) and [`median`](https://docs.victoriametrics.com/victorialogs/logsql/#median-stats) [stats functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe). ## [v0.6.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.6.1-victorialogs) diff --git a/docs/victorialogs/FAQ.md b/docs/victorialogs/FAQ.md index 1ddecd4e2d..0b685cf8bd 100644 --- a/docs/victorialogs/FAQ.md +++ b/docs/victorialogs/FAQ.md @@ -1,11 +1,11 @@ --- -weight: 6 +weight: 13 title: FAQ menu: docs: identifier: "victorialogs-faq" parent: "victorialogs" - weight: 6 + weight: 13 title: FAQ tags: - logs @@ -79,7 +79,7 @@ See [this article](https://itnext.io/why-victorialogs-is-a-better-alternative-to ## What is the difference between VictoriaLogs and ClickHouse? ClickHouse is an extremely fast and efficient analytical database. It can be used for logs storage, analysis and processing. -VictoriaLogs is designed solely for logs. VictoriaLogs uses [similar design ideas as ClickHouse](#how-does-victorialogs-work) for achieving high performance. +VictoriaLogs is designed solely for logs. VictoriaLogs uses [similar design ideas as ClickHouse](https://docs.victoriametrics.com/victorialogs/faq/#how-does-victorialogs-work) for achieving high performance. - ClickHouse is good for logs if you know the set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) and the expected query types beforehand. Then you can create a table with a column per each log field, and use the most optimal settings for the table - @@ -243,11 +243,11 @@ or [Grafana plugin for VictoriaLogs](https://docs.victoriametrics.com/victorialo then make sure the selected time range covers the last day. Otherwise, the query above returns results on the intersection of the last day and the selected time range. -See [why the log field occupies a lot of disk space](#why-the-log-field-occupies-a-lot-of-disk-space). +See [why the log field occupies a lot of disk space](https://docs.victoriametrics.com/victorialogs/faq/#why-the-log-field-occupies-a-lot-of-disk-space). ## Why the log field occupies a lot of disk space? -See [how to determine which log fields occupy the most of disk space](#how-to-determine-which-log-fields-occupy-the-most-of-disk-space). +See [how to determine which log fields occupy the most of disk space](https://docs.victoriametrics.com/victorialogs/faq/#how-to-determine-which-log-fields-occupy-the-most-of-disk-space). Log field may occupy a lot of disk space if it contains values with many unique parts (aka "random" values). Such values do not compress well, so they occupy a lot of disk space. If you want reducing the amounts of occupied disk space, then either remove the given log field from the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs diff --git a/docs/victorialogs/QuickStart.md b/docs/victorialogs/QuickStart.md index 925f10e362..fe7d239bd7 100644 --- a/docs/victorialogs/QuickStart.md +++ b/docs/victorialogs/QuickStart.md @@ -23,10 +23,10 @@ before you start working with VictoriaLogs. There are the following options exist: -- [To run pre-built binaries](#pre-built-binaries) -- [To run Docker image](#docker-image) -- [To run in Kubernetes with Helm charts](#helm-charts) -- [To build VictoriaLogs from source code](#building-from-source-code) +- [To run pre-built binaries](https://docs.victoriametrics.com/victorialogs/quickstart/#pre-built-binaries) +- [To run Docker image](https://docs.victoriametrics.com/victorialogs/quickstart/#docker-image) +- [To run in Kubernetes with Helm charts](https://docs.victoriametrics.com/victorialogs/quickstart/#helm-charts) +- [To build VictoriaLogs from source code](https://docs.victoriametrics.com/victorialogs/quickstart/#building-from-source-code) ### Pre-built binaries @@ -36,8 +36,8 @@ Just download archive for the needed Operating system and architecture, unpack i For example, the following commands download VictoriaLogs archive for Linux/amd64, unpack and run it: ```sh -curl -L -O https://github.com/VictoriaMetrics/VictoriaLogs/releases/download/v1.32.0/victoria-logs-linux-amd64-v1.32.0.tar.gz -tar xzf victoria-logs-linux-amd64-v1.32.0.tar.gz +curl -L -O https://github.com/VictoriaMetrics/VictoriaLogs/releases/download/v1.34.0/victoria-logs-linux-amd64-v1.34.0.tar.gz +tar xzf victoria-logs-linux-amd64-v1.34.0.tar.gz ./victoria-logs-prod -storageDataPath=victoria-logs-data ``` @@ -49,7 +49,7 @@ the needed indexes during [data ingestion](https://docs.victoriametrics.com/vict See also: -- [How to configure VictoriaLogs](#how-to-configure-victorialogs) +- [How to configure VictoriaLogs](https://docs.victoriametrics.com/victorialogs/quickstart/#how-to-configure-victorialogs) - [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/) - [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/) @@ -60,20 +60,20 @@ Here is the command to run VictoriaLogs in a Docker container: ```sh docker run --rm -it -p 9428:9428 -v ./victoria-logs-data:/victoria-logs-data \ - docker.io/victoriametrics/victoria-logs:v1.32.0 -storageDataPath=victoria-logs-data + docker.io/victoriametrics/victoria-logs:v1.34.0 -storageDataPath=victoria-logs-data ``` See also: -- [How to configure VictoriaLogs](#how-to-configure-victorialogs) +- [How to configure VictoriaLogs](https://docs.victoriametrics.com/victorialogs/quickstart/#how-to-configure-victorialogs) - [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/) - [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/) ### Helm charts You can run VictoriaLogs in Kubernetes environment -with [VictoriaLogs single](https://docs.victoriametrics.com/helm/victorialogs-single/) -or [cluster](https://docs.victoriametrics.com/helm/victorialogs-cluster) helm charts. +with [VictoriaLogs single](https://docs.victoriametrics.com/helm/victoria-logs-single/) +or [cluster](https://docs.victoriametrics.com/helm/victoria-logs-cluster/) helm charts. ### Building from source code @@ -121,7 +121,7 @@ This will build `victoria-logs-prod` executable inside the `bin` folder. See also: -- [How to configure VictoriaLogs](#how-to-configure-victorialogs) +- [How to configure VictoriaLogs](https://docs.victoriametrics.com/victorialogs/quickstart/#how-to-configure-victorialogs) - [How to ingest logs into VictoriaLogs](https://docs.victoriametrics.com/victorialogs/data-ingestion/) - [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/) @@ -165,6 +165,6 @@ Docker-compose demos that integrate VictoriaLogs and various log collectors: - [Vector demo](https://github.com/VictoriaMetrics/VictoriaLogs/tree/master/deployment/docker/victorialogs/vector) - [Promtail demo](https://github.com/VictoriaMetrics/VictoriaLogs/tree/master/deployment/docker/victorialogs/promtail) -You can use [VictoriaLogs single](https://docs.victoriametrics.com/helm/victorialogs-single/) -or [cluster](https://docs.victoriametrics.com/helm/victorialogs-cluster) helm charts as a demo for running Vector +You can use [VictoriaLogs single](https://docs.victoriametrics.com/helm/victoria-logs-single/) +or [cluster](https://docs.victoriametrics.com/helm/victoria-logs-cluster/) helm charts as a demo for running Vector in Kubernetes with VictoriaLogs. diff --git a/docs/victorialogs/README.md b/docs/victorialogs/README.md index 75caa8d4e0..9aacc850c0 100644 --- a/docs/victorialogs/README.md +++ b/docs/victorialogs/README.md @@ -9,19 +9,22 @@ sitemap: VictoriaLogs is [open source](https://github.com/VictoriaMetrics/VictoriaLogs/) user-friendly database for logs from [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/). +## Features + VictoriaLogs provides the following features: - It is resource-efficient and fast. It uses up to 30x less RAM and up to 15x less disk space than other solutions such as Elasticsearch and Grafana Loki. - See [these benchmarks](#benchmarks) and [this article](https://itnext.io/how-do-open-source-solutions-for-logs-work-elasticsearch-loki-and-victorialogs-9f7097ecbc2f) for details. - See also [the post from a happy user, who replaced 27-node Elasticsearch with a single-node VictoriaLogs](https://aus.social/@phs/114583927679254536). + See [these benchmarks](https://docs.victoriametrics.com/victorialogs/#benchmarks) and [this article](https://itnext.io/how-do-open-source-solutions-for-logs-work-elasticsearch-loki-and-victorialogs-9f7097ecbc2f) for details. + See also [the post from a happy user, who replaced 27-node Elasticsearch with a single-node VictoriaLogs](https://aus.social/@phs/114583927679254536), + [this post from happy users, who replaced Loki with VictoriaLogs](https://www.truefoundry.com/blog/victorialogs-vs-loki) + and [this post from a happy user who replaced grep with VictoriaLogs](https://chronicles.mad-scientist.club/tales/grepping-logs-remains-terrible/). - VictoriaLogs' capacity and performance scales linearly with the available resources (CPU, RAM, disk IO, disk space). It runs smoothly on Raspberry PI and on servers with hundreds of CPU cores and terabytes of RAM. - It can scale horizontally to many nodes in [cluster mode](https://docs.victoriametrics.com/victorialogs/cluster/). + It can scale horizontally to hundreds of nodes in [cluster mode](https://docs.victoriametrics.com/victorialogs/cluster/). - It can accept logs from popular log collectors. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/). -- It is much easier to set up and operate compared to Elasticsearch and Grafana Loki, since it is basically zero-config. +- It is much easier to set up and operate compared to Elasticsearch and Grafana Loki, since it is a single zero-config executable. See [these docs](https://docs.victoriametrics.com/victorialogs/quickstart/). -- It provides easy yet powerful query language with full-text search capabilities across - all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). +- It provides easy yet powerful query language, which supports fast full-text search, fast advanced analytics and fast data extraction and transformation at query time. See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/). - It provides [built-in web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui) for logs' exploration. - It provides [Grafana plugin](https://docs.victoriametrics.com/victorialogs/victorialogs-datasource/) for building arbitrary dashboards in Grafana. @@ -30,11 +33,12 @@ VictoriaLogs provides the following features: See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#command-line) for details. - It support [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) with high cardinality (e.g. high number of unique values) such as `trace_id`, `user_id` and `ip`. - It is optimized for logs with hundreds of fields (aka [`wide events`](https://jeremymorrell.dev/blog/a-practitioners-guide-to-wide-events/)). -- It supports multitenancy - see [these docs](#multitenancy). +- It supports multitenancy - see [these docs](https://docs.victoriametrics.com/victorialogs/#multitenancy). - It supports out-of-order logs' ingestion aka backfilling. - It supports live tailing for newly ingested logs. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#live-tailing). - It supports selecting surrounding logs in front and after the selected logs. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stream_context-pipe). - It supports alerting - see [these docs](https://docs.victoriametrics.com/victorialogs/vmalert/). +- It fits well [RUM](https://en.wikipedia.org/wiki/Real_user_monitoring) and [SIEM](https://en.wikipedia.org/wiki/Security_information_and_event_management) use cases. See also [articles about VictoriaLogs](https://docs.victoriametrics.com/victorialogs/articles/). @@ -45,7 +49,8 @@ you can join it via [Slack Inviter](https://slack.victoriametrics.com/). See [quick start docs](https://docs.victoriametrics.com/victorialogs/quickstart/) for start working with VictoriaLogs. If you want playing with VictoriaLogs web UI and [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) query language, -then go to [VictoriaLogs demo playground](https://play-vmlogs.victoriametrics.com/) and to [Grafana plugin playground for VictoriaLogs](https://play-grafana.victoriametrics.com/d/be5zidev72m80f/k8s-logs-via-victorialogs). +then go to [VictoriaLogs demo playground](https://play-vmlogs.victoriametrics.com/) and +to [Grafana plugin playground for VictoriaLogs](https://play-grafana.victoriametrics.com/d/be5zidev72m80f/k8s-logs-via-victorialogs). ## Tuning @@ -105,7 +110,7 @@ For example, the following command starts VictoriaLogs with the retention of 8 w /path/to/victoria-logs -retentionPeriod=8w ``` -See also [retention by disk space usage](#retention-by-disk-space-usage). +See also [retention by disk space usage](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage). VictoriaLogs stores the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs in per-day partition directories. It automatically drops partition directories outside the configured retention. @@ -138,8 +143,8 @@ VictoriaLogs can be configured to automatically drop older per-day partitions ba ### Absolute disk space limit Use the `-retention.maxDiskSpaceUsageBytes` command-line flag to set a fixed threshold. VictoriaLogs will drop old per-day partitions -if the total size of data at [`-storageDataPath` directory](#storage) becomes bigger than the specified limit. -For example, the following command starts VictoriaLogs, which drops old per-day partitions if the total [storage](#storage) size becomes bigger than `100GiB`: +if the total size of data at [`-storageDataPath` directory](https://docs.victoriametrics.com/victorialogs/#storage) becomes bigger than the specified limit. +For example, the following command starts VictoriaLogs, which drops old per-day partitions if the total [storage](https://docs.victoriametrics.com/victorialogs/#storage) size becomes bigger than `100GiB`: ```sh /path/to/victoria-logs -retention.maxDiskSpaceUsageBytes=100GiB @@ -148,7 +153,7 @@ For example, the following command starts VictoriaLogs, which drops old per-day ### Percentage-based disk space limit Use the `-retention.maxDiskUsagePercent` command-line flag to set a dynamic threshold based on the filesystem's total capacity. -VictoriaLogs will drop old per-day partitions if the filesystem containing the [`-storageDataPath` directory](#storage) exceeds the specified percentage usage. +VictoriaLogs will drop old per-day partitions if the filesystem containing the [`-storageDataPath` directory](https://docs.victoriametrics.com/victorialogs/#storage) exceeds the specified percentage usage. For example, the following command starts VictoriaLogs, which drops old per-day partitions if the filesystem usage exceeds 80%: ```sh @@ -168,7 +173,7 @@ VictoriaLogs keeps at least two last days of data in order to guarantee that the This means that the total disk space usage may exceed the configured threshold if the size of the last two days of data exceeds the limit. -The [`-retentionPeriod`](#retention) is applied independently to the disk space usage limits. This means that +The [`-retentionPeriod`](https://docs.victoriametrics.com/victorialogs/#retention) is applied independently to the disk space usage limits. This means that VictoriaLogs automatically drops logs older than 7 days by default if only a disk space usage flag is set. Set the `-retentionPeriod` to some big value (e.g. `100y` - 100 years) if logs shouldn't be dropped because of time-based retention. For example: @@ -193,7 +198,7 @@ For example, the following command starts VictoriaLogs, which stores the data at ``` VictoriaLogs automatically creates the `-storageDataPath` directory on the first run if it is missing. VictoriaLogs stores logs -per every day into a spearate subdirectory (aka per-day partition). See [partitions lifecycle](#partitions-lifecycle) for details. +per every day into a spearate subdirectory (aka per-day partition). See [partitions lifecycle](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle) for details. VictoriaLogs switches to cluster mode if `-storageNode` command-line flag is specified: @@ -208,7 +213,7 @@ The ingested logs are stored in per-day subdirectories (partitions) at the `<-st For example, the directory with the name `20250418` contains logs with [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) values at April 18, 2025 UTC. This allows flexible data management. -For example, old per-day data is automatically and quickly deleted according to the provided [retention policy](#retention) by removing the corresponding per-day subdirectory (partition). +For example, old per-day data is automatically and quickly deleted according to the provided [retention policy](https://docs.victoriametrics.com/victorialogs/#retention) by removing the corresponding per-day subdirectory (partition). VictoriaLogs supports the following HTTP API endpoints at `victoria-logs:9428` address for managing partitions: @@ -223,11 +228,11 @@ VictoriaLogs supports the following HTTP API endpoints at `victoria-logs:9428` a - `/internal/partition/list` - returns JSON-encoded list of currently active partitions, which can be passed to `/internal/partition/detach` endpoint via `name` query arg. - `/internal/partition/snapshot/create?name=YYYYMMDD` - creates a [snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) for the partition for the given day `YYYYMMDD`. The endpoint returns a JSON string with the absolute filesystem path to the created snapshot. It is safe to make backups from - the created snapshots according to [these instructions](#backup-and-restore). It is safe removing the created snapshots with `rm -rf` command. + the created snapshots according to [these instructions](https://docs.victoriametrics.com/victorialogs/#backup-and-restore). It is safe removing the created snapshots with `rm -rf` command. It is recommended removing unneeded snapshots on a regular basis in order to free up storage space occupied by these snapshots. - `/internal/partition/snapshot/list` - returns JSON-encoded list of absolute paths to per-day partition snapshots created via `/internal/partition/snapshot/create`. -These endpoints can be protected from unauthorized access via `-partitionManageAuthKey` [command-line flag](#list-of-command-line-flags). +These endpoints can be protected from unauthorized access via `-partitionManageAuthKey` [command-line flag](https://docs.victoriametrics.com/victorialogs/#list-of-command-line-flags). These endpoints can be used also for setting up automated multi-tier storage schemes where recently ingested logs are stored to VictoriaLogs instances with fast NVMe (SSD) disks, while historical logs are gradully migrated to VictoriaLogs instances with slower, but bigger and less expensive HDD disks. @@ -255,7 +260,7 @@ merge for September 21, 2024 partition. The call to `/internal/force_merge` retu Forced merges may require additional CPU, disk IO and storage space resources. It is unnecessary to run forced merge under normal conditions, since VictoriaLogs automatically performs optimal merges in background when new data is ingested into it. -The `/internal/force_merge` endpoint can be protected from unauthorized access via `-forceMergeAuthKey` [command-line flag](#list-of-command-line-flags). +The `/internal/force_merge` endpoint can be protected from unauthorized access via `-forceMergeAuthKey` [command-line flag](https://docs.victoriametrics.com/victorialogs/#list-of-command-line-flags). ## Forced flush @@ -268,7 +273,7 @@ It isn't recommended requesting the `/internal/force_flush` HTTP endpoint on a r and slows down data ingestion. It is expected that the `/internal/force_flush` is requested in automated tests, which need querying the recently ingested data. -The `/internal/force_flush` endpoint can be protected from unauthorized access via `-forceFlushAuthKey` [command-line flag](#list-of-command-line-flags). +The `/internal/force_flush` endpoint can be protected from unauthorized access via `-forceFlushAuthKey` [command-line flag](https://docs.victoriametrics.com/victorialogs/#list-of-command-line-flags). ## High Availability @@ -302,7 +307,7 @@ VictoriaLogs stores data into independent per-day partitions. Every partition is The following steps must be performed to make a backup of the given `YYYYMMDD` partition: -1. To create a snapshot for the given per-day partition via `/internal/partition/snapshot/create?name=YYYYMMDD` HTTP endpoint (see [partitions lifecycle](#partitions-lifecycle) docs). +1. To create a snapshot for the given per-day partition via `/internal/partition/snapshot/create?name=YYYYMMDD` HTTP endpoint (see [partitions lifecycle](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle) docs). This endpoint returns an absolute filesystem path to the created snapshot - ``. 1. To backup the created snapshot with [`rsync`](https://en.wikipedia.org/wiki/Rsync): @@ -319,7 +324,7 @@ The following steps must be performed to make a backup of the given `YYYYMMDD` p The following steps must be performed for restoring the partition data from backup: 1. To stop VictoriaLogs instance or to detach the `YYYYMMDD` partition, which is going to be restored from backup, - from the running VictoriaLogs via `/internal/partition/detach?name=YYYYMMDD` HTTP endpoint according to [these docs](#partitions-lifecycle). + from the running VictoriaLogs via `/internal/partition/detach?name=YYYYMMDD` HTTP endpoint according to [these docs](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle). 1. To copy the partition from backup with `rsync`: @@ -330,7 +335,7 @@ The following steps must be performed for restoring the partition data from back The `--delete` option is required in the command above in order to ensure that the partition contains the full copy of the backup without superfluous files. 1. To start VictoriaLogs instance or to attach the restored partition to the running VictoriaLogs instance via `/internal/partition/attach?name=YYYYMMDD` HTTP endpoint - according to [these docs](#partitions-lifecycle). + according to [these docs](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle). It is also possible to use **the disk snapshot** feature provided by the operating system or cloud provider in order to perform a backup. @@ -389,9 +394,9 @@ or similar authorization proxies. See [Security and Load balancing docs](https:/ It is recommended protecting internal HTTP endpoints from unauthorized access: -- [`/internal/force_flush`](#forced-flush) - via `-forceFlushAuthKey` [command-line flag](#list-of-command-line-flags). -- [`/internal/force_merge`](#forced-merge) - via `-forceMergeAuthKey` [command-line flag](#list-of-command-line-flags). -- [`/internal/partition/*`](#partitions-lifecycle) - via `-partitionManageAuthKey` [command-line flag](#list-of-command-line-flags). +- [`/internal/force_flush`](https://docs.victoriametrics.com/victorialogs/#forced-flush) - via `-forceFlushAuthKey` [command-line flag](https://docs.victoriametrics.com/victorialogs/#list-of-command-line-flags). +- [`/internal/force_merge`](https://docs.victoriametrics.com/victorialogs/#forced-merge) - via `-forceMergeAuthKey` [command-line flag](https://docs.victoriametrics.com/victorialogs/#list-of-command-line-flags). +- [`/internal/partition/*`](https://docs.victoriametrics.com/victorialogs/#partitions-lifecycle) - via `-partitionManageAuthKey` [command-line flag](https://docs.victoriametrics.com/victorialogs/#list-of-command-line-flags). ### mTLS @@ -422,7 +427,7 @@ The following command-line flags must be set in order to enable automatic issuin This functionality can be evaluated for free according to [these docs](https://docs.victoriametrics.com/victoriametrics/enterprise/). -See also [security recommendations](#security). +See also [security recommendations](https://docs.victoriametrics.com/victorialogs/#security). ## Benchmarks @@ -488,6 +493,8 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -defaultMsgValue string Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field (default "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field") + -defaultParallelReaders int + Default number of parallel data readers to use for executing every query; higher number of readers may help increasing query performance on high-latency storage such as NFS or S3 at the cost of higher RAM usage; see https://docs.victoriametrics.com/victorialogs/logsql/#parallel_readers-query-option (default 32) -elasticsearch.version string Elasticsearch version to report to client (default "8.9.0") -enableTCP6 @@ -581,6 +588,8 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864) -internalselect.disable Whether to disable /internal/select/* HTTP endpoints + -internalselect.maxConcurrentRequests int + The limit on the number of concurrent requests to /internal/select/* endpoints; other requests are put into the wait queue; see https://docs.victoriametrics.com/victorialogs/cluster/ (default 100) -journald.ignoreFields array Comma-separated list of fields to ignore for logs ingested over journald protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#dropping-fields Supports an array of values separated by comma or specified via multiple flags. @@ -686,12 +695,14 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line -retentionPeriod value Log entries with timestamps older than now-retentionPeriod are automatically deleted; log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); see https://docs.victoriametrics.com/victorialogs/#retention ; see also -retention.maxDiskSpaceUsageBytes and -retention.maxDiskUsagePercent The following optional suffixes are supported: s (second), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 7d) + -search.allowPartialResponse + Whether to allow returning partial responses when some of vlstorage nodes from the -storageNode list are unavaialbe for querying. This flag works only for cluster setup of VictoriaLogs. See https://docs.victoriametrics.com/victorialogs/querying/#partial-responses -search.maxConcurrentRequests int The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration (default 16) -search.maxQueryDuration duration The maximum duration for query execution. It can be overridden to a smaller value on a per-query basis via 'timeout' query arg (default 30s) -search.maxQueryTimeRange duration - The maximum time range, which can be set in the query sent to querying APIs. Queries with bigger time ranges are rejected. See https://docs.victoriametrics.com/victorialogs/querying/#http-api + The maximum time range, which can be set in the query sent to querying APIs. Queries with bigger time ranges are rejected. See https://docs.victoriametrics.com/victorialogs/querying/#resource-usage-limits -search.maxQueueDuration duration The maximum time the search request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s) -select.disable @@ -751,6 +762,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Optional basic auth username to use for the corresponding -storageNode Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -storageNode.usernameFile array + Optional path to basic auth username to use for the corresponding -storageNode. The file is re-read every second + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.compressMethod.tcp array Compression method for syslog messages received at the corresponding -syslog.listenAddr.tcp. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression Supports an array of values separated by comma or specified via multiple flags. @@ -759,6 +774,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Compression method for syslog messages received at the corresponding -syslog.listenAddr.udp. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -syslog.compressMethod.unix array + Compression method for syslog messages received at the corresponding -syslog.listenAddr.unix. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.decolorizeFields.tcp array Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields Supports an array of values separated by comma or specified via multiple flags. @@ -767,6 +786,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -syslog.decolorizeFields.unix array + Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.extraFields.tcp array Fields to add to logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields Supports an array of values separated by comma or specified via multiple flags. @@ -775,6 +798,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Fields to add to logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -syslog.extraFields.unix array + Fields to add to logs ingested via the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.ignoreFields.tcp array Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields Supports an array of values separated by comma or specified via multiple flags. @@ -783,12 +810,20 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -syslog.ignoreFields.unix array + Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.listenAddr.tcp array Comma-separated list of TCP addresses to listen to for Syslog messages. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.listenAddr.udp array - Comma-separated list of UDP address to listen to for Syslog messages. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ + Comma-separated list of UDP addresses to listen to for Syslog messages. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -syslog.listenAddr.unix array + Comma-separated list of Unix socket filepaths to listen to for Syslog messages. Filepaths may be prepended with 'unixgram:' for listening for SOCK_DGRAM sockets. By default SOCK_STREAM sockets are used. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.mtls array @@ -807,6 +842,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -syslog.streamFields.unix array + Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.tenantID.tcp array TenantID for logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy Supports an array of values separated by comma or specified via multiple flags. @@ -815,6 +854,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line TenantID for logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -syslog.tenantID.unix array + TenantID for logs ingested via the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -syslog.timezone string Timezone to use when parsing timestamps in RFC3164 syslog messages. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 . See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ (default "Local") -syslog.tls array @@ -843,6 +886,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Whether to use local timestamp instead of the original timestamp for the ingested syslog messages at the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps Supports array of values separated by comma or specified via multiple flags. Empty values are set to false. + -syslog.useLocalTimestamp.unix array + Whether to use local timestamp instead of the original timestamp for the ingested syslog messages at the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps + Supports array of values separated by comma or specified via multiple flags. + Empty values are set to false. -syslog.useRemoteIP.tcp array Whether to add remote ip address as 'remote_ip' log field for syslog messages ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address Supports array of values separated by comma or specified via multiple flags. @@ -851,6 +898,10 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Whether to add remote ip address as 'remote_ip' log field for syslog messages ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address Supports array of values separated by comma or specified via multiple flags. Empty values are set to false. + -syslog.useRemoteIP.unix array + Whether to add remote ip address as 'remote_ip' log field for syslog messages ingested via the corresponding -syslog.listenAddr.unix. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address + Supports array of values separated by comma or specified via multiple flags. + Empty values are set to false. -tls array Whether to enable TLS for incoming HTTP requests at the given -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set. See also -mtls Supports array of values separated by comma or specified via multiple flags. diff --git a/docs/victorialogs/Release-Guide.md b/docs/victorialogs/Release-Guide.md index 7cb8ed3025..fee046d97c 100644 --- a/docs/victorialogs/Release-Guide.md +++ b/docs/victorialogs/Release-Guide.md @@ -1,11 +1,11 @@ --- -weight: 501 -title: Release process guidance for VictoriaLogs +weight: 100 +title: Release Process Guidance for VictoriaLogs menu: docs: parent: victorialogs identifier: victorialogs-release-process-guidance - weight: 501 + weight: 100 tags: [] aliases: - /victorialogs/release-guide/index.html diff --git a/docs/victorialogs/Roadmap.md b/docs/victorialogs/Roadmap.md index 372812b2b1..982c8945e5 100644 --- a/docs/victorialogs/Roadmap.md +++ b/docs/victorialogs/Roadmap.md @@ -1,11 +1,11 @@ --- -weight: 8 +weight: 102 title: Roadmap disableToc: true menu: docs: parent: "victorialogs" - weight: 8 + weight: 102 title: Roadmap tags: - logs diff --git a/docs/victorialogs/cluster.md b/docs/victorialogs/cluster.md index 1cb43b07cd..8859e92607 100644 --- a/docs/victorialogs/cluster.md +++ b/docs/victorialogs/cluster.md @@ -1,11 +1,11 @@ --- -weight: 20 -title: VictoriaLogs cluster +weight: 3 +title: VictoriaLogs Cluster menu: docs: parent: victorialogs identifier: vl-cluster - weight: 20 + weight: 3 title: VictoriaLogs cluster tags: - logs @@ -24,10 +24,10 @@ then it is preferred to do this instead of switching to cluster mode, since a si The migration path from a single-node VictoriaLogs to cluster mode is very easy - just [upgrade](https://docs.victoriametrics.com/victorialogs/#upgrading) a single-node VictoriaLogs executable to the [latest available release](https://docs.victoriametrics.com/victorialogs/changelog/) and add it to the list of `vlstorage` nodes -passed via `-storageNode` command-line flag to `vlinsert` and `vlselect` components of the cluster mode. See [cluster architecture](#architecture) +passed via `-storageNode` command-line flag to `vlinsert` and `vlselect` components of the cluster mode. See [cluster architecture](https://docs.victoriametrics.com/victorialogs/cluster/#architecture) for more details about VictoriaLogs cluster components. -See [quick start guide](#quick-start) on how to start working with VictoriaLogs cluster. +See [quick start guide](https://docs.victoriametrics.com/victorialogs/cluster/#quick-start) on how to start working with VictoriaLogs cluster. ## Architecture @@ -69,7 +69,7 @@ sequenceDiagram - It handles queries from `vlselect` by retrieving and transforming the requested data locally before returning results. Each `vlstorage` node operates as a self-contained VictoriaLogs instance. -Refer to the [single-node and cluster mode duality](#single-node-and-cluster-mode-duality) documentation for more information. +Refer to the [single-node and cluster mode duality](https://docs.victoriametrics.com/victorialogs/cluster/#single-node-and-cluster-mode-duality) documentation for more information. This design allows you to reuse existing single-node VictoriaLogs instances by listing them in the `-storageNode` flag for `vlselect`, enabling unified querying across all nodes. All VictoriaLogs components are horizontally scalable and can be deployed on hardware best suited to their respective workloads. @@ -85,30 +85,43 @@ This HTTP-based communication model allows you to use reverse proxies for author Use of [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) is recommended for managing access control. See [Security and Load balancing docs](https://docs.victoriametrics.com/victorialogs/security-and-lb/) for details. -For advanced setups, refer to the [multi-level cluster setup](#multi-level-cluster-setup) documentation. +For advanced setups, refer to the [multi-level cluster setup](https://docs.victoriametrics.com/victorialogs/cluster/#multi-level-cluster-setup) documentation. ## High availability -In the cluster setup, the following rules apply: - -- The `vlselect` component requires **all relevant vlstorage nodes to be available** in order to return complete and correct query results. - - - If even one of the vlstorage nodes is temporarily unavailable, `vlselect` cannot safely return a full response, since some of the required data may reside on the missing node. Rather than risk delivering partial or misleading query results, which can cause confusion, trigger false alerts, or produce incorrect metrics, VictoriaLogs chooses to return an error instead. - -- The `vlinsert` component continues to function normally when some vlstorage nodes are unavailable. It automatically routes new logs to the remaining available nodes to ensure that data ingestion remains uninterrupted and newly received logs are not lost. - -> [!NOTE] Insight -> In most real-world cases, `vlstorage` nodes become unavailable during planned maintenance such as upgrades, config changes, or rolling restarts. These are typically infrequent (weekly or monthly) and brief (a few minutes). -> A short period of query downtime during such events is acceptable and fits well within most SLAs. For example, 60 minutes of downtime per month still provides around 99.86% availability, which often outperforms complex HA setups that rely on opaque auto-recovery and may fail unpredictably. - -VictoriaLogs itself does not handle replication at the storage level. Instead, it relies on an external log shipper, such as [vector](https://docs.victoriametrics.com/victorialogs/data-ingestion/vector/) or [vlagent](https://docs.victoriametrics.com/victorialogs/vlagent/), to send the same log stream to multiple independent VictoriaLogs instances: +VictoriaLogs cluster provides high availability for [data ingestion path](https://docs.victoriametrics.com/victorialogs/data-ingestion/). +It continues accepting incoming logs if some of `vlstorage` nodes are temporarily unavailable. +`vlinsert` evenly spreads new logs among the remaining available `vlstorage` nodes in this case, so newly ingested logs are properly stored and are available for querying +without any delays. This allows performing maintenance tasks for `vlstorage` nodes (such as upgrades, configuration updates, etc.) without worrying of the data loss. +Make sure that the remaining `vlstorage` nodes have enough capacity for the increased data ingestion workload, in order to avoid availability problems. + +VictoriaLogs cluster returns `502 Bad Gateway` errors for [incoming queries](https://docs.victoriametrics.com/victorialogs/querying/) +if some of `vlstorage` nodes are unavailable. This guarantees consistent query responses +(e.g. all the stored logs are taken into account during the query) during maintenance tasks at `vlstorage` nodes. Note that all the newly incoming logs are properly stored +to the remaining `vlstorage` nodes - see the paragraph above, so they become available for querying immediately after all the `vlstorage` nodes return back to the cluster. + +There are practical cases when it is preferred to return partial responses instead of `502 Bad Gateway` errors if some of `vlstorage` nodes are unavailable. +See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#partial-responses) on how to achieve this. + +> [!NOTE] Insight +> In most real-world cases, `vlstorage` nodes become unavailable during planned maintenance such as upgrades, config changes, or rolling restarts. +> These are typically infrequent (weekly or monthly) and brief (a few minutes) events. +> A short period of query downtime during maintenance tasks is acceptable and fits well within most SLAs. For example, 43 minutes of downtime per month during maintenance tasks +> provides ~99.9% cluster availability. This is better in practice comparing to "magic" HA schemes with opaque auto-recovery - if these schemes fail, +> then it is impossible to debug and fix them in a timely manner, so this will likely result in a long outage, which violates SLAs. + +The real HA scheme for both data ingestion and querying can be built only when copies of logs are sent into independent VictoriaLogs instances (or clusters) +located in fully independent availability zones (datacenters). If an AZ becomes unavailable, then new logs continue to be written to the remaining AZ, +while queries return full responses from the remaining AZ. When the AZ becomes available, then the pending buffered logs can be written to it, so the AZ +can be used for querying full responses. This HA sheme can be built with the help of [vlagent](https://docs.victoriametrics.com/victorialogs/vlagent/) +for data replication and buffering, and [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) for data querying: ```mermaid graph TD subgraph "HA Solution" subgraph "Ingestion Layer" LS["Log Sources
(Applications)"] - VECTOR["Log Collector
• Buffering
• Replication
• Delivery Guarantees"] + VLAGENT["Log Collector
• Buffering
• Replication
• Delivery Guarantees"] end subgraph "Storage Layer" @@ -126,15 +139,15 @@ graph TD QC["Query Clients
(Grafana, API)"] end - LS --> VECTOR - VECTOR -->|"Replicate logs to
Zone A cluster"| VLA - VECTOR -->|"Replicate logs to
Zone B cluster"| VLB + LS --> VLAGENT + VLAGENT -->|"Replicate logs to
Zone A cluster"| VLA + VLAGENT -->|"Replicate logs to
Zone B cluster"| VLB VLA -->|"Serve queries from
Zone A cluster"| LB VLB -->|"Serve queries from
Zone B cluster"| LB LB --> QC - style VECTOR fill:#e8f5e8 + style VLAGENT fill:#e8f5e8 style VLA fill:#d5e8d4 style VLB fill:#d5e8d4 style LB fill:#e1f5fe @@ -143,18 +156,17 @@ graph TD end ``` -In this HA solution: - -- A log shipper at the top receives logs and replicates them in parallel to two VictoriaLogs clusters. - - If one cluster fails completely (i.e., **all** of its storage nodes become unavailable), the log shipper continues to send logs to the remaining healthy cluster and buffers any logs that cannot be delivered. When the failed cluster becomes available again, the log shipper resumes sending both buffered and new logs to it. -- On the read path, a load balancer (e.g., [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/)) sits in front of the VictoriaLogs clusters and routes query requests to any healthy cluster. - - If one cluster fails (i.e., **at least one** of its storage nodes is unavailable), the load balancer detects this and automatically redirects all query traffic to the remaining healthy cluster. +- [vlagent](https://docs.victoriametrics.com/victorialogs/vlagent/) receives and replicates logs to two VictoriaLogs clusters. + If one cluster becomes unavailable, the log shipper continues sending logs to the remaining healthy cluster. It also buffers logs that cannot be delivered to the unavailable cluster. + When the failed cluster becomes available again, the log shipper sends the buffered logs and then resumes sending new logs to it. This guarantees that both clusters have full copies + of all the ingested logs. +- [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) routes query requests to healthy VictoriaLogs clusters. + If one cluster becomes unavailable, `vmauth` detects this and automatically redirects all query traffic to the remaining healthy cluster. -There's no hidden coordination logic or consensus algorithm. You can scale it horizontally and operate it safely, even in bare-metal Kubernetes clusters using local PVs, -as long as the log shipper handles reliable replication and buffering. +There is no magic coordination logic or consensus algorithms in this scheme. This simplifies managing and troubleshooting this HA scheme. See also [Security and Load balancing docs](https://docs.victoriametrics.com/victorialogs/security-and-lb/). - + ## Single-node and cluster mode duality Every `vlstorage` node can be used as a single-node VictoriaLogs instance: @@ -167,7 +179,7 @@ A single-node VictoriaLogs instance can be used as `vlstorage` node in VictoriaL - It accepts data ingestion requests from `vlinsert` via `/internal/insert` HTTP endpoint at the TCP port specified via `-httpListenAddr` command-line flag. - It accepts queries from `vlselect` via `/internal/select/*` HTTP endpoints at the TCP port specified via `-httpListenAddr` command-line flags. -See also [security docs](#security). +See also [security docs](https://docs.victoriametrics.com/victorialogs/cluster/#security). ## Multi-level cluster setup @@ -177,7 +189,7 @@ See also [security docs](#security). - `vlselect` can send queries to other `vlselect` nodes if they are specified via `-storageNode` command-line flag. This allows building multi-level cluster schemes when top-level `vlselect` queries multiple lower-level clusters of VictoriaLogs. -See [security docs](#security) on how to protect communications between multiple levels of `vlinsert` and `vlselect` nodes. +See [security docs](https://docs.victoriametrics.com/victorialogs/cluster/#security) on how to protect communications between multiple levels of `vlinsert` and `vlselect` nodes. ## Security @@ -239,13 +251,13 @@ It is recommended to disable select endpoints on `vlinsert` and insert endpoints This helps prevent sending select requests to `vlinsert` nodes or insert requests to `vlselect` nodes in case of misconfiguration in the authorization proxy in front of the `vlinsert` and `vlselect` nodes. -See also [mTLS](#mtls). +See also [mTLS](https://docs.victoriametrics.com/victorialogs/cluster/#mtls). ### mTLS [Enterprise version of VictoriaLogs](https://docs.victoriametrics.com/victoriametrics/enterprise/) supports the ability to verify client TLS certificates at the `vlstorage` side for TLS connections established from `vlinsert` and `vlselect` nodes (aka [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication#mTLS)). -See [TLS docs](#tls) for details on how to set up TLS communications between VictoriaLogs cluster nodes. +See [TLS docs](https://docs.victoriametrics.com/victorialogs/cluster/#tls) for details on how to set up TLS communications between VictoriaLogs cluster nodes. mTLS authentication can be enabled by passing the `-mtls` command-line flag to `vlstorage` node additionally to the `-tls` command-line flag. In this case it verifies TLS client certificates for connections from `vlinsert` and `vlselect` at the address specified via `-httpListenAddr` command-line flag. @@ -266,18 +278,18 @@ The following guide covers the following topics for Linux host: - How to download VictoriaLogs executable. - How to start VictoriaLogs cluster, which consists of two `vlstorage` nodes, a single `vlinsert` node and a single `vlselect` node - running on a localhost according to [cluster architecture](#architecture). + running on a localhost according to [cluster architecture](https://docs.victoriametrics.com/victorialogs/cluster/#architecture). - How to ingest logs into the cluster. - How to query the ingested logs. Download and unpack the latest VictoriaLogs release: ```sh -curl -L -O https://github.com/VictoriaMetrics/VictoriaLogs/releases/download/v1.32.0/victoria-logs-linux-amd64-v1.32.0.tar.gz -tar xzf victoria-logs-linux-amd64-v1.32.0.tar.gz +curl -L -O https://github.com/VictoriaMetrics/VictoriaLogs/releases/download/v1.34.0/victoria-logs-linux-amd64-v1.34.0.tar.gz +tar xzf victoria-logs-linux-amd64-v1.34.0.tar.gz ``` -Start the first [`vlstorage` node](#architecture), which accepts incoming requests at the port `9491` and stores the ingested logs at `victoria-logs-data-1` directory: +Start the first [`vlstorage` node](https://docs.victoriametrics.com/victorialogs/cluster/#architecture), which accepts incoming requests at the port `9491` and stores the ingested logs at `victoria-logs-data-1` directory: ```sh ./victoria-logs-prod -httpListenAddr=:9491 -storageDataPath=victoria-logs-data-1 & @@ -330,7 +342,7 @@ Logs also can be explored and queried via [built-in Web UI](https://docs.victori Open `http://localhost:9471/select/vmui/` in the web browser, select `last 7 days` time range in the top right corner and explore the ingested logs. See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) to familiarize yourself with the query language. -Every `vlstorage` node can be queried individually because [it is equivalent to a single-node VictoriaLogs](#single-node-and-cluster-mode-duality). +Every `vlstorage` node can be queried individually because [it is equivalent to a single-node VictoriaLogs](https://docs.victoriametrics.com/victorialogs/cluster/#single-node-and-cluster-mode-duality). For example, the following command returns the number of stored logs at the first `vlstorage` node started above: ```sh @@ -339,7 +351,7 @@ curl http://localhost:9491/select/logsql/query -d 'query=* | count()' It is recommended reading [key concepts](https://docs.victoriametrics.com/victorialogs/keyconcepts/) before you start working with VictoriaLogs. -See also [security docs](#security). +See also [security docs](https://docs.victoriametrics.com/victorialogs/cluster/#security). ## Performance tuning diff --git a/docs/victorialogs/data-ingestion/DataDogAgent.md b/docs/victorialogs/data-ingestion/DataDogAgent.md index 625eb69632..8b0bbdd573 100644 --- a/docs/victorialogs/data-ingestion/DataDogAgent.md +++ b/docs/victorialogs/data-ingestion/DataDogAgent.md @@ -1,6 +1,6 @@ --- weight: 5 -title: DataDog Agent setup +title: DataDog Agent Setup disableToc: true menu: docs: diff --git a/docs/victorialogs/data-ingestion/Filebeat.md b/docs/victorialogs/data-ingestion/Filebeat.md index 8c8b489392..4782409e5f 100644 --- a/docs/victorialogs/data-ingestion/Filebeat.md +++ b/docs/victorialogs/data-ingestion/Filebeat.md @@ -1,6 +1,6 @@ --- weight: 1 -title: Filebeat setup +title: Filebeat Setup disableToc: true menu: docs: diff --git a/docs/victorialogs/data-ingestion/Fluentbit.md b/docs/victorialogs/data-ingestion/Fluentbit.md index 11d587595d..e3db992155 100644 --- a/docs/victorialogs/data-ingestion/Fluentbit.md +++ b/docs/victorialogs/data-ingestion/Fluentbit.md @@ -1,6 +1,6 @@ --- weight: 2 -title: Fluentbit setup +title: Fluentbit Setup disableToc: true menu: docs: diff --git a/docs/victorialogs/data-ingestion/Fluentd.md b/docs/victorialogs/data-ingestion/Fluentd.md index 556eab676a..353205ee69 100644 --- a/docs/victorialogs/data-ingestion/Fluentd.md +++ b/docs/victorialogs/data-ingestion/Fluentd.md @@ -1,6 +1,6 @@ --- weight: 2 -title: Fluentd setup +title: Fluentd Setup disableToc: true menu: docs: diff --git a/docs/victorialogs/data-ingestion/Journald.md b/docs/victorialogs/data-ingestion/Journald.md index 708f65d0fc..60f27c3635 100644 --- a/docs/victorialogs/data-ingestion/Journald.md +++ b/docs/victorialogs/data-ingestion/Journald.md @@ -1,6 +1,6 @@ --- weight: 10 -title: Journald setup +title: Journald Setup disableToc: true menu: docs: diff --git a/docs/victorialogs/data-ingestion/Logstash.md b/docs/victorialogs/data-ingestion/Logstash.md index e65985070c..85cf8761ad 100644 --- a/docs/victorialogs/data-ingestion/Logstash.md +++ b/docs/victorialogs/data-ingestion/Logstash.md @@ -1,6 +1,6 @@ --- weight: 3 -title: Logstash setup +title: Logstash Setup disableToc: true menu: docs: @@ -14,8 +14,8 @@ aliases: --- VictoriaLogs supports given below Logstash outputs: -- [Elasticsearch](#elasticsearch) -- [HTTP JSON](#http) +- [Elasticsearch](https://docs.victoriametrics.com/victorialogs/data-ingestion/logstash/#elasticsearch) +- [HTTP JSON](https://docs.victoriametrics.com/victorialogs/data-ingestion/logstash/#http) ## Elasticsearch diff --git a/docs/victorialogs/data-ingestion/Promtail.md b/docs/victorialogs/data-ingestion/Promtail.md index 39660cabc1..772291bbab 100644 --- a/docs/victorialogs/data-ingestion/Promtail.md +++ b/docs/victorialogs/data-ingestion/Promtail.md @@ -1,6 +1,6 @@ --- weight: 4 -title: Promtail setup +title: Promtail Setup disableToc: true menu: docs: diff --git a/docs/victorialogs/data-ingestion/README.md b/docs/victorialogs/data-ingestion/README.md index 40b6b03017..f6179bb562 100644 --- a/docs/victorialogs/data-ingestion/README.md +++ b/docs/victorialogs/data-ingestion/README.md @@ -24,20 +24,20 @@ The ingested logs can be queried according to [these docs](https://docs.victoria See also: -- [Log collectors and data ingestion formats](#log-collectors-and-data-ingestion-formats). -- [Data ingestion troubleshooting](#troubleshooting). +- [Log collectors and data ingestion formats](https://docs.victoriametrics.com/victorialogs/data-ingestion/#log-collectors-and-data-ingestion-formats). +- [Data ingestion troubleshooting](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting). ## HTTP APIs VictoriaLogs supports the following data ingestion HTTP APIs: -- Elasticsearch bulk API. See [these docs](#elasticsearch-bulk-api). -- JSON stream API aka [ndjson](https://jsonlines.org/). See [these docs](#json-stream-api). -- Loki JSON API. See [these docs](#loki-json-api). -- OpenTelemetry API. See [these docs](#opentelemetry-api). +- Elasticsearch bulk API. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#elasticsearch-bulk-api). +- JSON stream API aka [ndjson](https://jsonlines.org/). See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#json-stream-api). +- Loki JSON API. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#loki-json-api). +- OpenTelemetry API. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#opentelemetry-api). - Journald export format. -VictoriaLogs accepts optional [HTTP parameters](#http-parameters) at data ingestion HTTP APIs. +VictoriaLogs accepts optional [HTTP parameters](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) at data ingestion HTTP APIs. ### Elasticsearch bulk API @@ -70,7 +70,7 @@ Otherwise the timestamp field must be in one of the following formats: See [these docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for details on fields, which must be present in the ingested log messages. -The API accepts various http parameters, which can change the data ingestion behavior - [these docs](#http-parameters) for details. +The API accepts various http parameters, which can change the data ingestion behavior - [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details. The following command verifies that the data has been successfully ingested to VictoriaLogs by [querying](https://docs.victoriametrics.com/victorialogs/querying/) it: @@ -91,8 +91,8 @@ The duration of requests to `/insert/elasticsearch/_bulk` can be monitored with See also: -- [How to debug data ingestion](#troubleshooting). -- [HTTP parameters, which can be passed to the API](#http-parameters). +- [How to debug data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting). +- [HTTP parameters, which can be passed to the API](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters). - [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/). ### JSON stream API @@ -130,7 +130,7 @@ Otherwise the timestamp field must be in one of the following formats: See [these docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for details on fields, which must be present in the ingested log messages. -The API accepts various http parameters, which can change the data ingestion behavior - [these docs](#http-parameters) for details. +The API accepts various http parameters, which can change the data ingestion behavior - [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details. The following command verifies that the data has been successfully ingested into VictoriaLogs by [querying](https://docs.victoriametrics.com/victorialogs/querying/) it: @@ -153,8 +153,8 @@ The duration of requests to `/insert/jsonline` can be monitored with [`vl_http_r See also: -- [How to debug data ingestion](#troubleshooting). -- [HTTP parameters, which can be passed to the API](#http-parameters). +- [How to debug data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting). +- [HTTP parameters, which can be passed to the API](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters). - [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/). ### Loki JSON API @@ -185,7 +185,7 @@ The command should return the following response: The response by default contains all the [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [how to query specific fields](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields). -The `/insert/loki/api/v1/push` accepts various http parameters, which can change the data ingestion behavior - [these docs](#http-parameters) for details. +The `/insert/loki/api/v1/push` accepts various http parameters, which can change the data ingestion behavior - [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details. There is no need in specifying `_msg_field` and `_time_field` query args, since VictoriaLogs automatically extracts log message and timestamp from the ingested Loki data. The `_stream_fields` arg is optional. If it isn't set, then all the labels inside the `"stream":{...}` are treated @@ -202,8 +202,8 @@ The duration of requests to `/insert/loki/api/v1/push` can be monitored with [`v See also: -- [How to debug data ingestion](#troubleshooting). -- [HTTP parameters, which can be passed to the API](#http-parameters). +- [How to debug data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting). +- [HTTP parameters, which can be passed to the API](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters). - [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/). ### Opentelemetry API @@ -214,12 +214,12 @@ See more details [in these docs](https://docs.victoriametrics.com/victorialogs/d ### HTTP parameters VictoriaLogs accepts the following configuration parameters via [HTTP headers](https://en.wikipedia.org/wiki/List_of_HTTP_header_fields) -or via [HTTP query string args](https://en.wikipedia.org/wiki/Query_string) at [data ingestion HTTP APIs](#http-apis). +or via [HTTP query string args](https://en.wikipedia.org/wiki/Query_string) at [data ingestion HTTP APIs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-apis). HTTP query string parameters have priority over HTTP Headers. #### HTTP Query string parameters -All the [HTTP-based data ingestion protocols](#http-apis) support the following [HTTP query string](https://en.wikipedia.org/wiki/Query_string) args: +All the [HTTP-based data ingestion protocols](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-apis) support the following [HTTP query string](https://en.wikipedia.org/wiki/Query_string) args: - `_msg_field` - the name of the [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) containing [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field). @@ -260,12 +260,12 @@ All the [HTTP-based data ingestion protocols](#http-apis) support the following - `debug` - if this arg is set to `1`, then the ingested logs aren't stored in VictoriaLogs. Instead, the ingested data is logged by VictoriaLogs, so it can be investigated later. -See also [HTTP headers](#http-headers). +See also [HTTP headers](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-headers). #### HTTP headers -All the [HTTP-based data ingestion protocols](#http-apis) support the following [HTTP Headers](https://en.wikipedia.org/wiki/List_of_HTTP_header_fields) -additionally to [HTTP query args](#http-query-string-parameters): +All the [HTTP-based data ingestion protocols](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-apis) support the following [HTTP Headers](https://en.wikipedia.org/wiki/List_of_HTTP_header_fields) +additionally to [HTTP query args](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-query-string-parameters): - `AccountID` - accountID of the tenant to ingest data to. See [multitenancy docs](https://docs.victoriametrics.com/victorialogs/#multitenancy) for details. @@ -310,7 +310,7 @@ additionally to [HTTP query args](#http-query-string-parameters): - `VL-Debug` - if this parameter is set to `1`, then the ingested logs aren't stored in VictoriaLogs. Instead, the ingested data is logged by VictoriaLogs, so it can be investigated later. -See also [HTTP Query string parameters](#http-query-string-parameters). +See also [HTTP Query string parameters](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-query-string-parameters). ## Decolorizing @@ -318,7 +318,7 @@ If the ingested logs contain [ANSI color codes](https://en.wikipedia.org/wiki/AN storing the logs in VictoriaLogs. This simplifies further querying and analysis of such logs. Decolorizing can be done either at the log collector / shipper side or at the VictoriaLogs side with `decolorize_fields` HTTP query arg -and `VL-Decolorize-Fields` HTTP request header according to [these docs](#http-parameters). +and `VL-Decolorize-Fields` HTTP request header according to [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters). ## Troubleshooting @@ -343,14 +343,14 @@ VictoriaLogs provides the following command-line flags, which can help debugging This may help debugging [high cardinality issues](https://docs.victoriametrics.com/victorialogs/keyconcepts/#high-cardinality). - `-logIngestedRows` - if this flag is passed to VictoriaLogs, then it logs all the ingested [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - See also `debug` [parameter](#http-parameters). + See also `debug` [parameter](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters). VictoriaLogs exposes various [metrics](https://docs.victoriametrics.com/victorialogs/metrics/), which may help debugging data ingestion issues: - [`vl_rows_ingested_total`](https://docs.victoriametrics.com/victorialogs/metrics/#vl_rows_ingested_total) - the number of ingested [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) since the last VictoriaLogs restart. If this number increases over time, then logs are successfully ingested into VictoriaLogs. The ingested logs can be inspected in the following ways: - - By passing `debug=1` parameter to every request to [data ingestion APIs](#http-apis). The ingested rows aren't stored in VictoriaLogs + - By passing `debug=1` parameter to every request to [data ingestion APIs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-apis). The ingested rows aren't stored in VictoriaLogs in this case. Instead, they are logged, so they can be investigated later. The [`vl_rows_dropped_total`](https://docs.victoriametrics.com/victorialogs/metrics/#vl_rows_dropped_total) metric is incremented for each logged row. - By passing `-logIngestedRows` command-line flag to VictoriaLogs. In this case it logs all the ingested data, so it can be investigated later. @@ -363,17 +363,17 @@ VictoriaLogs exposes various [metrics](https://docs.victoriametrics.com/victoria Here is the list of log collectors and their ingestion formats supported by VictoriaLogs: -| How to setup the collector | Format: Elasticsearch | Format: JSON Stream | Format: Loki | Format: syslog | Format: OpenTelemetry | Format: Journald | Format: DataDog | -|----------------------------|-----------------------|---------------------|--------------|----------------|-----------------------|------------------|-----------------| -| [Rsyslog](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/) | [Yes](https://www.rsyslog.com/doc/configuration/modules/omelasticsearch.html) | No | No | [Yes](https://www.rsyslog.com/doc/configuration/modules/omfwd.html) | No | No | No | -| [Syslog-ng](https://docs.victoriametrics.com/victorialogs/data-ingestion/filebeat/) | Yes, [v1](https://support.oneidentity.com/technical-documents/syslog-ng-open-source-edition/3.16/administration-guide/28#TOPIC-956489), [v2](https://support.oneidentity.com/technical-documents/syslog-ng-open-source-edition/3.16/administration-guide/29#TOPIC-956494) | No | No | [Yes](https://support.oneidentity.com/technical-documents/syslog-ng-open-source-edition/3.16/administration-guide/44#TOPIC-956553) | No | No | No | -| [Filebeat](https://docs.victoriametrics.com/victorialogs/data-ingestion/filebeat/) | [Yes](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) | No | No | No | No | No | No | -| [Fluentbit](https://docs.victoriametrics.com/victorialogs/data-ingestion/fluentbit/) | No | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/http) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/loki) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/syslog) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/opentelemetry) | No | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/datadog) | -| [Logstash](https://docs.victoriametrics.com/victorialogs/data-ingestion/logstash/) | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) | No | No | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-syslog.html) | [Yes](https://github.com/paulgrav/logstash-output-opentelemetry) | No | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-datadog.html) | -| [Vector](https://docs.victoriametrics.com/victorialogs/data-ingestion/vector/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/elasticsearch/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/http/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/loki/) | No | No | No | [Yes](https://vector.dev/docs/reference/configuration/sinks/datadog_logs/) | -| [Promtail](https://docs.victoriametrics.com/victorialogs/data-ingestion/promtail/) | No | No | [Yes](https://grafana.com/docs/loki/latest/clients/promtail/configuration/#clients) | No | No | No | No | -| [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/elasticsearchexporter) | No | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/lokiexporter) | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/syslogexporter) | [Yes](https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter) | No | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/datadogexporter) | -| [Telegraf](https://docs.victoriametrics.com/victorialogs/data-ingestion/telegraf/) | [Yes](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/elasticsearch) | [Yes](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/http) | [Yes](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/loki) | [Yes](https://github.com/influxdata/telegraf/blob/master/plugins/outputs/syslog) | Yes | No | No | -| [Fluentd](https://docs.victoriametrics.com/victorialogs/data-ingestion/fluentd/) | [Yes](https://github.com/uken/fluent-plugin-elasticsearch) | [Yes](https://docs.fluentd.org/output/http) | [Yes](https://grafana.com/docs/loki/latest/send-data/fluentd/) | [Yes](https://github.com/fluent-plugins-nursery/fluent-plugin-remote_syslog) | No | No | No | -| [Journald](https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/) | No | No | No | No | No | Yes | No | -| [DataDog Agent](https://docs.victoriametrics.com/victorialogs/data-ingestion/datadog-agent) | No | No | No | No | No | No | Yes | +| How to setup the collector | Format: Elasticsearch | Format: JSON Stream | Format: Loki | Format: syslog | Format: OpenTelemetry | Format: Journald | Format: DataDog | +|----------------------------------------------------------------------------------------------|-----------------------|---------------------|--------------|----------------|-----------------------|------------------|-----------------| +| [Rsyslog](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/) | [Yes](https://www.rsyslog.com/doc/configuration/modules/omelasticsearch.html) | No | No | [Yes](https://www.rsyslog.com/doc/configuration/modules/omfwd.html) | No | No | No | +| [Syslog-ng](https://docs.victoriametrics.com/victorialogs/data-ingestion/filebeat/) | Yes, [v1](https://support.oneidentity.com/technical-documents/syslog-ng-open-source-edition/3.16/administration-guide/28#TOPIC-956489), [v2](https://support.oneidentity.com/technical-documents/syslog-ng-open-source-edition/3.16/administration-guide/29#TOPIC-956494) | No | No | [Yes](https://support.oneidentity.com/technical-documents/syslog-ng-open-source-edition/3.16/administration-guide/44#TOPIC-956553) | No | No | No | +| [Filebeat](https://docs.victoriametrics.com/victorialogs/data-ingestion/filebeat/) | [Yes](https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html) | No | No | No | No | No | No | +| [Fluentbit](https://docs.victoriametrics.com/victorialogs/data-ingestion/fluentbit/) | No | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/http) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/loki) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/syslog) | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/opentelemetry) | No | [Yes](https://docs.fluentbit.io/manual/pipeline/outputs/datadog) | +| [Logstash](https://docs.victoriametrics.com/victorialogs/data-ingestion/logstash/) | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html) | No | No | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-syslog.html) | [Yes](https://github.com/paulgrav/logstash-output-opentelemetry) | No | [Yes](https://www.elastic.co/guide/en/logstash/current/plugins-outputs-datadog.html) | +| [Vector](https://docs.victoriametrics.com/victorialogs/data-ingestion/vector/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/elasticsearch/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/http/) | [Yes](https://vector.dev/docs/reference/configuration/sinks/loki/) | No | No | No | [Yes](https://vector.dev/docs/reference/configuration/sinks/datadog_logs/) | +| [Promtail](https://docs.victoriametrics.com/victorialogs/data-ingestion/promtail/) | No | No | [Yes](https://grafana.com/docs/loki/latest/clients/promtail/configuration/#clients) | No | No | No | No | +| [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/elasticsearchexporter) | No | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/lokiexporter) | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/syslogexporter) | [Yes](https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter) | No | [Yes](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/datadogexporter) | +| [Telegraf](https://docs.victoriametrics.com/victorialogs/data-ingestion/telegraf/) | [Yes](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/elasticsearch) | [Yes](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/http) | [Yes](https://github.com/influxdata/telegraf/tree/master/plugins/outputs/loki) | [Yes](https://github.com/influxdata/telegraf/blob/master/plugins/outputs/syslog) | Yes | No | No | +| [Fluentd](https://docs.victoriametrics.com/victorialogs/data-ingestion/fluentd/) | [Yes](https://github.com/uken/fluent-plugin-elasticsearch) | [Yes](https://docs.fluentd.org/output/http) | [Yes](https://grafana.com/docs/loki/latest/send-data/fluentd/) | [Yes](https://github.com/fluent-plugins-nursery/fluent-plugin-remote_syslog) | No | No | No | +| [Journald](https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/) | No | No | No | No | No | Yes | No | +| [DataDog Agent](https://docs.victoriametrics.com/victorialogs/data-ingestion/datadog-agent/) | No | No | No | No | No | No | Yes | diff --git a/docs/victorialogs/data-ingestion/Telegraf.md b/docs/victorialogs/data-ingestion/Telegraf.md index d8f377ebf9..32d70a221c 100644 --- a/docs/victorialogs/data-ingestion/Telegraf.md +++ b/docs/victorialogs/data-ingestion/Telegraf.md @@ -1,6 +1,6 @@ --- weight: 5 -title: Telegraf setup +title: Telegraf Setup disableToc: true menu: docs: @@ -13,8 +13,8 @@ aliases: --- VictoriaLogs supports given below Telegraf outputs: -- [Elasticsearch](#elasticsearch) -- [HTTP JSON](#http) +- [Elasticsearch](https://docs.victoriametrics.com/victorialogs/data-ingestion/telegraf/#elasticsearch) +- [HTTP JSON](https://docs.victoriametrics.com/victorialogs/data-ingestion/telegraf/#http) ## Elasticsearch diff --git a/docs/victorialogs/data-ingestion/Vector.md b/docs/victorialogs/data-ingestion/Vector.md index 36529dfa0d..29d3fcb32d 100644 --- a/docs/victorialogs/data-ingestion/Vector.md +++ b/docs/victorialogs/data-ingestion/Vector.md @@ -1,6 +1,6 @@ --- weight: 20 -title: Vector setup +title: Vector Setup disableToc: true menu: docs: @@ -15,8 +15,8 @@ aliases: VictoriaLogs can accept logs from [Vector](https://vector.dev/) via the following protocols: -- Elasticsearch - see [these docs](#elasticsearch) -- HTTP JSON - see [these docs](#http) +- Elasticsearch - see [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/vector/#elasticsearch) +- HTTP JSON - see [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/vector/#http) ## Elasticsearch diff --git a/docs/victorialogs/data-ingestion/_index.md b/docs/victorialogs/data-ingestion/_index.md index 2ba77e5fe4..e92019fd14 100644 --- a/docs/victorialogs/data-ingestion/_index.md +++ b/docs/victorialogs/data-ingestion/_index.md @@ -1,11 +1,11 @@ --- -title: Data ingestion -weight: 3 +title: Data Ingestion +weight: 5 menu: docs: identifier: victorialogs-data-ingestion parent: "victorialogs" - weight: 3 + weight: 5 tags: - logs aliases: diff --git a/docs/victorialogs/data-ingestion/opentelemetry.md b/docs/victorialogs/data-ingestion/opentelemetry.md index 0614a986e2..96ad930c38 100644 --- a/docs/victorialogs/data-ingestion/opentelemetry.md +++ b/docs/victorialogs/data-ingestion/opentelemetry.md @@ -1,6 +1,6 @@ --- weight: 4 -title: OpenTelemetry setup +title: OpenTelemetry Setup disableToc: true menu: docs: @@ -46,8 +46,8 @@ The ingested log entries can be queried according to [these docs](https://docs.v VictoriaLogs supports receiving logs from the following OpenTelemetry collectors: -* [Elasticsearch](#elasticsearch) -* [OpenTelemetry](#opentelemetry) +* [Elasticsearch](https://docs.victoriametrics.com/victorialogs/data-ingestion/opentelemetry/#elasticsearch) +* [OpenTelemetry](https://docs.victoriametrics.com/victorialogs/data-ingestion/opentelemetry/#opentelemetry) ### Elasticsearch diff --git a/docs/victorialogs/data-ingestion/syslog.md b/docs/victorialogs/data-ingestion/syslog.md index e52c9bb2c5..6396802b3a 100644 --- a/docs/victorialogs/data-ingestion/syslog.md +++ b/docs/victorialogs/data-ingestion/syslog.md @@ -1,6 +1,6 @@ --- weight: 10 -title: Syslog setup +title: Syslog Setup disableToc: true menu: docs: @@ -11,8 +11,12 @@ tags: aliases: - /victorialogs/data-ingestion/syslog.html --- -[VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) can accept logs in [Syslog formats](https://en.wikipedia.org/wiki/Syslog) at the specified TCP and UDP addresses -via `-syslog.listenAddr.tcp` and `-syslog.listenAddr.udp` command-line flags. The following syslog formats are supported: + +[VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) can accept logs in [Syslog formats](https://en.wikipedia.org/wiki/Syslog) at the specified TCP, UDP or Unix socket addresses +via `-syslog.listenAddr.tcp`, `-syslog.listenAddr.udp` and `-syslog.listenAddr.unix` command-line flags. VictoriaLogs listens for `SOCK_STREAM` unix sockets by default. +Prepend the unix socket path passed to `-syslog.listenAddr.unix` with `unixgram:` for `SOCK_DGRAM` sockets. + +The following syslog formats are supported: - [RFC3164](https://datatracker.ietf.org/doc/html/rfc3164) aka `MMM DD hh:mm:ss HOSTNAME APP-NAME[PROCID]: MESSAGE` - [RFC5424](https://datatracker.ietf.org/doc/html/rfc5424) aka `1 TIMESTAMP HOSTNAME APP-NAME PROCID MSGID [STRUCTURED-DATA] MESSAGE` @@ -34,8 +38,8 @@ The following command starts VictoriaLogs, which accepts logs in Syslog format a VictoriaLogs can accept logs from the following syslog collectors: -- [Rsyslog](https://www.rsyslog.com/). See [these docs](#rsyslog). -- [Syslog-ng](https://www.syslog-ng.com/). See [these docs](#syslog-ng). +- [Rsyslog](https://www.rsyslog.com/). See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#rsyslog). +- [Syslog-ng](https://www.syslog-ng.com/). See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#syslog-ng). Multiple logs in Syslog format can be ingested via a single TCP connection or via a single UDP packet - just put every log on a separate line and delimit them with `\n` char. @@ -43,10 +47,10 @@ and delimit them with `\n` char. VictoriaLogs automatically extracts the following [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) from the received Syslog lines: -- [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) - log timestamp. See also [log timestamps](#log-timestamps) +- [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) - log timestamp. See also [log timestamps](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps) - [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) - the `MESSAGE` field from the supported syslog formats above - `hostname`, `app_name` and `proc_id` - for unique identification of [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields). - It is possible to change the list of fields for log streams - see [these docs](#stream-fields). + It is possible to change the list of fields for log streams - see [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields). - `level` - string representation of the log level according to the `` field value - `priority`, `facility` and `severity` - these fields are extracted from `` field - `facility_keyword` - string representation of the `facility` field according to [these docs](https://en.wikipedia.org/wiki/Syslog#Facility) @@ -73,15 +77,15 @@ curl http://localhost:9428/select/logsql/query -d 'query=_time:5m' See also: -- [Log timestamps](#log-timestamps) -- [Security](#security) -- [Compression](#compression) -- [Multitenancy](#multitenancy) -- [Stream fields](#stream-fields) -- [Dropping fields](#dropping-fields) -- [Decolorizing fields](#decolorizing-fields) -- [Adding extra fields](#adding-extra-fields) -- [Capturing remote ip address](#capturing-remote-ip-address) +- [Log timestamps](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps) +- [Security](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security) +- [Compression](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression) +- [Multitenancy](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy) +- [Stream fields](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields) +- [Dropping fields](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields) +- [Decolorizing fields](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields) +- [Adding extra fields](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields) +- [Capturing remote ip address](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#capturing-remote-ip-address) - [Data ingestion troubleshooting](https://docs.victoriametrics.com/victorialogs/data-ingestion/#troubleshooting). - [How to query VictoriaLogs](https://docs.victoriametrics.com/victorialogs/querying/). @@ -105,6 +109,13 @@ via the corresponding `-syslog.listenAddr.udp` address: ./victoria-logs -syslog.listenAddr.udp=:514 -syslog.useLocalTimestamp.udp ``` +The `-syslog.useLocalTimestamp.unix` command-line flag can be used for instructing VictoriaLogs to use local timestamps for the ingested logs +via the corresponding `-syslog.listenAddr.unix` address: + +```sh +./victoria-logs -syslog.listenAddr.unix=/dev/log -syslog.useLocalTimestamp.unix +``` + ## Security By default VictoriaLogs accepts plaintext data at `-syslog.listenAddr.tcp` address. Run VictoriaLogs with `-syslog.tls` command-line flag @@ -116,7 +127,7 @@ starts VictoriaLogs, which accepts TLS-encrypted syslog messages at TCP port 651 ./victoria-logs -syslog.listenAddr.tcp=:6514 -syslog.tls -syslog.tlsCertFile=/path/to/tls/cert -syslog.tlsKeyFile=/path/to/tls/key ``` -See also [mTLS docs](#mtls). +See also [mTLS docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#mtls). ### mTLS @@ -133,8 +144,8 @@ from [the releases page](https://github.com/VictoriaMetrics/VictoriaLogs/release ## Compression -By default VictoriaLogs accepts uncompressed log messages in Syslog format at `-syslog.listenAddr.tcp` and `-syslog.listenAddr.udp` addresses. -It is possible configuring VictoriaLogs to accept compressed log messages via `-syslog.compressMethod.tcp` and `-syslog.compressMethod.udp` command-line flags. +By default VictoriaLogs accepts uncompressed log messages in Syslog format at `-syslog.listenAddr.tcp`, `-syslog.listenAddr.udp` and `-syslog.listenAddr.unix` addresses. +It is possible configuring VictoriaLogs to accept compressed log messages via `-syslog.compressMethod.tcp`, `-syslog.compressMethod.udp` and `-syslog.compressMethod.unix` command-line flags. The following compression methods are supported: - `none` - no compression @@ -151,8 +162,8 @@ For example, the following command starts VictoriaLogs, which accepts gzip-compr ## Multitenancy By default, the ingested logs are stored in the `(AccountID=0, ProjectID=0)` [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy). -If you need storing logs in other tenant, then specify the needed tenant via `-syslog.tenantID.tcp` or `-syslog.tenantID.udp` command-line flags -depending on whether TCP or UDP ports are listened for syslog messages. +If you need storing logs in other tenant, then specify the needed tenant via `-syslog.tenantID.tcp`, `-syslog.tenantID.udp` or `-syslog.tenantID.unix` command-line flags +depending on whether TCP, UDP or Unix sockets listened for syslog messages. For example, the following command starts VictoriaLogs, which writes syslog messages received at TCP port 514, to `(AccountID=12, ProjectID=34)` tenant: ```sh @@ -162,8 +173,8 @@ For example, the following command starts VictoriaLogs, which writes syslog mess ## Stream fields VictoriaLogs uses `(hostname, app_name, proc_id)` fields as labels for [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) by default. -It is possible setting other set of labels via `-syslog.streamFields.tcp` and `-syslog.streamFields.udp` command-line flags -for logs instead via the corresponding `-syslog.listenAddr.tcp` and `-syslog.listenAddr.dup` addresses. +It is possible setting other set of labels via `-syslog.streamFields.tcp`, `-syslog.streamFields.udp` and `-syslog.streamFields.unix` command-line flags +for logs instead via the corresponding `-syslog.listenAddr.tcp`, `-syslog.listenAddr.udp` and `-syslog.listenAddr.unix` addresses. For example, the following command starts VictoriaLogs, which uses `(hostname, app_name)` fields as log stream labels for logs received at TCP port 514: @@ -173,9 +184,9 @@ for logs received at TCP port 514: ## Dropping fields -VictoriaLogs supports `-syslog.ignoreFields.tcp` and `-syslog.ignoreFields.udp` command-line flags for skipping +VictoriaLogs supports `-syslog.ignoreFields.tcp`, `-syslog.ignoreFields.udp` and `-syslog.ignoreFields.unix` command-line flags for skipping the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) during ingestion -of Syslog logs into `-syslog.listenAddr.tcp` and `-syslog.listenAddr.udp` addresses. +of Syslog logs into `-syslog.listenAddr.tcp`, `-syslog.listenAddr.udp` and `-syslog.listenAddr.unix` addresses. For example, the following command starts VictoriaLogs, which drops `proc_id` and `msg_id` fields from logs received at TCP port 514: ```sh @@ -187,9 +198,9 @@ are ignored during data ingestion. ## Decolorizing fields -VictoriaLogs supports `-syslog.decolorizeFields.tcp` and `-syslog.decolorizeFields.udp` command-line flags, +VictoriaLogs supports `-syslog.decolorizeFields.tcp`, `-syslog.decolorizeFields.udp` and `-syslog.decolorizeFields.unix` command-line flags, which can be used for removing ANSI color codes from the provided list fields during ingestion of Syslog logs -into `-syslog.listenAddr.tcp` and `-syslog.listenAddr.upd` addresses. +into `-syslog.listenAddr.tcp`, `-syslog.listenAddr.udp` and `-syslog.listenAddr.unix` addresses. For example, the following command starts VictoriaLogs, which removes ANSI color codes from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) at logs received via TCP port 514: @@ -199,9 +210,9 @@ at logs received via TCP port 514: ## Adding extra fields -VictoriaLogs supports -`syslog.extraFields.tcp` and `-syslog.extraFields.udp` command-line flags for adding +VictoriaLogs supports -`syslog.extraFields.tcp`, `-syslog.extraFields.udp` and `-syslog.extraFields.unix` command-line flags for adding the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) during data ingestion -of Syslog logs into `-syslog.listenAddr.tcp` and `-syslog.listenAddr.udp` addresses. +of Syslog logs into `-syslog.listenAddr.tcp`, `-syslog.listenAddr.udp` and `-syslog.listenAddr.unix` addresses. For example, the following command starts VictoriaLogs, which adds `source=foo` and `abc=def` fields to logs received at TCP port 514: ```sh @@ -223,8 +234,8 @@ For example, the following command starts VictoriaLogs, which captures remote IP ## Multiple configs -VictoriaLogs can accept syslog messages via multiple TCP and UDP ports with individual configurations for [log timestamps](#log-timestamps), [compression](#compression), [security](#security) -and [multitenancy](#multitenancy). Specify multiple command-line flags for this. For example, the following command starts VictoriaLogs, +VictoriaLogs can accept syslog messages via multiple TCP and UDP ports with individual configurations for [log timestamps](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps), [compression](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression), [security](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security) +and [multitenancy](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy). Specify multiple command-line flags for this. For example, the following command starts VictoriaLogs, which accepts gzip-compressed syslog messages via TCP port 514 at localhost interface and stores them to [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) `123:0`, plus it accepts TLS-encrypted syslog messages via TCP port 6514 and stores them to [tenant](https://docs.victoriametrics.com/victorialogs/#multitenancy) `567:0`: diff --git a/docs/victorialogs/keyConcepts.md b/docs/victorialogs/keyConcepts.md index 63bc6af416..1443e9312a 100644 --- a/docs/victorialogs/keyConcepts.md +++ b/docs/victorialogs/keyConcepts.md @@ -1,12 +1,12 @@ --- weight: 2 -title: Key concepts +title: Key Concepts menu: docs: identifier: vl-key-concepts parent: victorialogs weight: 2 - title: Key concepts + title: Key Concepts tags: - logs aliases: @@ -16,7 +16,7 @@ aliases: ## Data model [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) works with both structured and unstructured logs. -Every log entry must contain at least [log message field](#message-field). Arbitrary number of additional `key=value` fields can be added to the log entry. +Every log entry must contain at least [log message field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field). Arbitrary number of additional `key=value` fields can be added to the log entry. A single log entry can be expressed as a single-level [JSON](https://www.json.org/json-en.html) object with string keys and string values. For example: @@ -32,7 +32,7 @@ For example: ``` Empty values are treated the same as non-existing values. For example, the following log entries are equivalent, -since they have only one identical non-empty field - [`_msg`](#message-field): +since they have only one identical non-empty field - [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field): ```json { @@ -113,15 +113,15 @@ Unicode chars must be encoded with [UTF-8](https://en.wikipedia.org/wiki/UTF-8) VictoriaLogs automatically indexes all the fields in all the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs. This enables [full-text search](https://docs.victoriametrics.com/victorialogs/logsql/) across all the fields. -VictoriaLogs supports the following special fields additionally to arbitrary [other fields](#other-fields): +VictoriaLogs supports the following special fields additionally to arbitrary [other fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#other-fields): -- [`_msg` field](#message-field) -- [`_time` field](#time-field) -- [`_stream` and `_stream_id` fields](#stream-fields) +- [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) +- [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) +- [`_stream` and `_stream_id` fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) ### Message field -Every ingested [log entry](#data-model) must contain at least a `_msg` field with the actual log message. For example, this is the minimal +Every ingested [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) must contain at least a `_msg` field with the actual log message. For example, this is the minimal log entry for VictoriaLogs: ```json @@ -140,7 +140,7 @@ via `-defaultMsgValue` command-line flag. ### Time field -The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry. +The ingested [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) may contain `_time` field with the timestamp of the ingested log entry. This field must be in one of the following formats: - [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) or [RFC3339](https://www.rfc-editor.org/rfc/rfc3339). @@ -151,7 +151,7 @@ This field must be in one of the following formats: - Unix timestamp in seconds, milliseconds, microseconds or nanoseconds. For example, `1686026893` (seconds), `1686026893735` (milliseconds), `1686026893735321` (microseconds) or `1686026893735321098` (nanoseconds). -For example, the following [log entry](#data-model) contains valid timestamp with millisecond precision in the `_time` field: +For example, the following [log entry](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) contains valid timestamp with millisecond precision in the `_time` field: ```json { @@ -172,7 +172,7 @@ the search to the selected time range. ### Stream fields -Some [structured logging](#data-model) fields may uniquely identify the application instance, which generates logs. +Some [structured logging](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) fields may uniquely identify the application instance, which generates logs. This may be either a single field such as `instance="host123:456"` or a set of fields such as `{datacenter="...", env="...", job="...", instance="..."}` or `{kubernetes.namespace="...", kubernetes.node.name="...", kubernetes.pod.name="...", kubernetes.container.name="..."}`. @@ -224,7 +224,7 @@ per-container logs into distinct streams. #### How to determine which fields must be associated with log streams? -[Log streams](#stream-fields) must contain [fields](#data-model), which uniquely identify the application instance, which generates logs. +[Log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) must contain [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model), which uniquely identify the application instance, which generates logs. For example, `container`, `instance` and `host` are good candidates for stream fields. Additional fields may be added to log streams if they **remain constant during application instance lifetime**. @@ -234,15 +234,15 @@ makes sense if you are going to use these fields during search and want speeding There is **no need to add all the constant fields to log streams**, since this may increase resource usage during data ingestion and querying. **Never add non-constant fields to streams if these fields may change with every log entry of the same stream**. -For example, `ip`, `user_id` and `trace_id` **must never be associated with log streams**, since this may lead to [high cardinality issues](#high-cardinality). +For example, `ip`, `user_id` and `trace_id` **must never be associated with log streams**, since this may lead to [high cardinality issues](https://docs.victoriametrics.com/victorialogs/keyconcepts/#high-cardinality). #### High cardinality -Some fields in the [ingested logs](#data-model) may contain big number of unique values across log entries. +Some fields in the [ingested logs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) may contain big number of unique values across log entries. For example, fields with names such as `ip`, `user_id` or `trace_id` tend to contain big number of unique values. -VictoriaLogs works perfectly with such fields unless they are associated with [log streams](#stream-fields). +VictoriaLogs works perfectly with such fields unless they are associated with [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields). -**Never** associate high-cardinality fields with [log streams](#stream-fields), since this may lead to the following issues: +**Never** associate high-cardinality fields with [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), since this may lead to the following issues: - Performance degradation during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/) @@ -255,13 +255,13 @@ VictoriaLogs exposes `vl_streams_created_total` [metric](https://docs.victoriame which shows the number of created streams since the last VictoriaLogs restart. If this metric grows at a rapid rate during long period of time, then there are high chances of high cardinality issues mentioned above. VictoriaLogs can log all the newly registered streams when `-logNewStreams` command-line flag is passed to it. -This can help narrowing down and eliminating high-cardinality fields from [log streams](#stream-fields). +This can help narrowing down and eliminating high-cardinality fields from [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields). ### Other fields -Every ingested log entry may contain arbitrary number of [fields](#data-model) additionally to [`_msg`](#message-field) and [`_time`](#time-field). +Every ingested log entry may contain arbitrary number of [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) additionally to [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) and [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field). For example, `level`, `ip`, `user_id`, `trace_id`, etc. Such fields can be used for simplifying and optimizing [search queries](https://docs.victoriametrics.com/victorialogs/logsql/). -It is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long [log message](#message-field). +It is usually faster to search over a dedicated `trace_id` field instead of searching for the `trace_id` inside long [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field). E.g. the `trace_id:="XXXX-YYYY-ZZZZ"` query usually works faster than the `_msg:"trace_id=XXXX-YYYY-ZZZZ"` query. See [LogsQL docs](https://docs.victoriametrics.com/victorialogs/logsql/) for more details. diff --git a/docs/victorialogs/logql-to-logsql.md b/docs/victorialogs/logql-to-logsql.md index 4af48c2024..47ef33073c 100644 --- a/docs/victorialogs/logql-to-logsql.md +++ b/docs/victorialogs/logql-to-logsql.md @@ -1,10 +1,10 @@ --- -weight: 130 -title: How to convert Loki queries to VictoriaLogs queries +weight: 51 +title: How To Convert Loki Queries to VictoriaLogs Queries menu: docs: parent: "victorialogs" - weight: 120 + weight: 51 tags: - logs - guide @@ -16,7 +16,7 @@ query language. Both languages are optimized for querying logs. The docs below s ## Data model Both Loki and VictoriaLogs support log streams - these are timestamp-ordered streams of logs, where every stream may have its own set of labels. These labels can be used -in [log stream selectors](#log-stream-selector) for quickly narrowing down the amounts of logs for further processing by the query. +in [log stream selectors](https://docs.victoriametrics.com/victorialogs/logql-to-logsql/#log-stream-selector) for quickly narrowing down the amounts of logs for further processing by the query. The main difference is that VictoriaLogs is optimized for structured logs with big number of labels (aka [wide events](https://jeremymorrell.dev/blog/a-practitioners-guide-to-wide-events/)). Hundreds of labels per every log entry is OK for VictoriaLogs. @@ -160,7 +160,7 @@ Such a query can be replaced with `{...} | unpack_logmt` at VictoriaLogs. See [t It is recommended parsing logfmt-formatted structured logs before ingesting them into VictoriaLogs, so log labels are stored separately. VictoriaLogs is optimized for storing logs with big number of labels (fields), and every such field may contain arbitrary big number of unique values (e.g. VictoriaLogs works great with high-cardinality labels). -See [JSON parser](#json-parser) docs for more details. +See [JSON parser](https://docs.victoriametrics.com/victorialogs/logql-to-logsql/#json-parser) docs for more details. ## Pattern parser @@ -177,14 +177,14 @@ Such a query can be replaced with `{...} | extract_regexp "..."` at VictoriaLogs Loki provides the ability to format log lines with the `{...} | line_format "..."` syntax according to [these docs](https://grafana.com/docs/loki/latest/query/log_queries/#line-format-expression). Such a query can be replaced with `{...} | format "..."` at VictoriaLogs. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe). -Note that VictoriaLogs uses `