diff --git a/deps/libcircllhist/Makefile b/deps/libcircllhist/Makefile new file mode 100644 index 0000000..1293edd --- /dev/null +++ b/deps/libcircllhist/Makefile @@ -0,0 +1,7 @@ +CC = gcc +CFLAGS = -Wall -std=c99 + +all: circllhist.o + +circllhist.o: circllhist.c + $(CC) $(CFLAGS) -o $@ -c $< diff --git a/deps/libcircllhist/circllhist.c b/deps/libcircllhist/circllhist.c new file mode 100644 index 0000000..af193f7 --- /dev/null +++ b/deps/libcircllhist/circllhist.c @@ -0,0 +1,1294 @@ +/* + * Copyright (c) 2012-2018, Circonus, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name Circonus, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#if !defined(WIN32) +#include +#include +#endif + +#include "circllhist.h" + +hist_allocator_t default_allocator = { + .malloc = malloc, + .calloc = calloc, + .free = free +}; + +static union { + uint64_t private_nan_internal_rep; + double private_nan_double_rep; +} private_nan_union = { .private_nan_internal_rep = 0x7fffffffffffffff }; + +static const hist_bucket_t hbnan = { (int8_t)0xff, 0 }; + +#define MAX_HIST_BINS (2 + 2 * 90 * 256) +#ifndef NDEBUG +#define unlikely(x) (x) +#define ASSERT_GOOD_HIST(h) do { \ + if(h) { \ + assert(h->allocd <= MAX_HIST_BINS); \ + assert(h->used <= h->allocd); \ + } \ +} while(0) +#define ASSERT_GOOD_BUCKET(hb) assert(hist_bucket_is_valid(hb)) +#else +#define unlikely(x) __builtin_expect(!!(x), 0) +#define ASSERT_GOOD_HIST(h) +#define ASSERT_GOOD_BUCKET(hb) +#endif +#define private_nan private_nan_union.private_nan_double_rep +#define HIST_POSITIVE_MIN_I 1e-128 +#define HIST_NEGATIVE_MAX_I -1e-128 + +static double power_of_ten[256] = { + 1, 10, 100, 1000, 10000, 100000, 1e+06, 1e+07, 1e+08, 1e+09, 1e+10, + 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, + 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, + 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, + 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, + 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, + 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, + 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, + 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, + 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, + 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, + 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, + 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, + 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, + 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, + 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, + 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, + 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, + 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, + 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, + 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, + 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, + 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, + 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, + 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, + 1e-05, 0.0001, 0.001, 0.01, 0.1 +}; + +static const char __b64[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 0x00 }; + +struct hist_flevel { + uint8_t l2; + uint8_t l1; +}; + +//! A bucket-count pair +#if defined(WIN32) +#pragma pack(push, 1) +struct hist_bv_pair { + hist_bucket_t bucket; + uint64_t count; +}; +#pragma pack(pop) +#else +struct hist_bv_pair { + hist_bucket_t bucket; + uint64_t count; +}__attribute__((packed)); +#endif + +//! The histogram structure +//! Internals are regarded private and might change with version. +//! Only use the public methods to operate on this structure. +struct histogram { + uint16_t allocd; //!< number of allocated bv pairs + uint16_t used; //!< number of used bv pairs + uint32_t fast: 1; + hist_allocator_t *allocator; + struct hist_bv_pair *bvs; //!< pointer to bv-pairs +}; + +struct histogram_fast { + struct histogram internal; + uint16_t *faster[256]; +}; +uint64_t bvl_limits[7] = { + 0x00000000000000ffULL, 0x0000000000000ffffULL, + 0x0000000000ffffffULL, 0x00000000fffffffffULL, + 0x000000ffffffffffULL, 0x0000fffffffffffffULL, + 0x00ffffffffffffffULL +}; +typedef enum { + BVL1 = 0, + BVL2 = 1, + BVL3 = 2, + BVL4 = 3, + BVL5 = 4, + BVL6 = 5, + BVL7 = 6, + BVL8 = 7 +} bvdatum_t; + +static inline int +hist_bucket_isnan(hist_bucket_t hb) { + int aval = abs(hb.val); + if (99 < aval) return 1; // in [100... ]: nan + if ( 9 < aval) return 0; // in [10 - 99]: valid range + if ( 0 < aval) return 1; // in [1 - 9 ]: nan + if ( 0 == aval) return 0; // in [0]: zero bucket + assert(0); + return 0; +} + +/* It's either not NaN, or exactly matches the one, true NaN */ +static inline int +hist_bucket_is_valid(hist_bucket_t hb) { + return !hist_bucket_isnan(hb) || (hb.val == hbnan.val && hb.exp == hbnan.exp); +} + +static ssize_t +bv_size(const histogram_t *h, int idx) { + int i; + for(i=0; ibvs[idx].count <= bvl_limits[i]) return 3 + i + 1; + return 3+8; +} + +static ssize_t +bv_write(const histogram_t *h, int idx, void *buff, ssize_t size) { + int i; + uint8_t *cp; + ssize_t needed; + bvdatum_t tgt_type = BVL8; + for(i=0; ibvs[idx].count <= bvl_limits[i]) { + tgt_type = i; + break; + } + needed = 3 + tgt_type + 1; + if(needed > size) return -1; + cp = buff; + cp[0] = h->bvs[idx].bucket.val; + cp[1] = h->bvs[idx].bucket.exp; + cp[2] = tgt_type; + for(i=tgt_type;i>=0;i--) + cp[i+3] = ((h->bvs[idx].count >> (i * 8)) & 0xff); + return needed; +} +static ssize_t +bv_read(histogram_t *h, int idx, const void *buff, ssize_t len) { + const uint8_t *cp; + uint64_t count = 0; + bvdatum_t tgt_type; + int i; + + assert(idx == h->used); + if(len < 3) return -1; + cp = buff; + tgt_type = cp[2]; + if(tgt_type > BVL8) return -1; + if(len < 3 + tgt_type + 1) return -1; + for(i=tgt_type;i>=0;i--) + count |= ((uint64_t)cp[i+3]) << (i * 8); + if(count != 0) { + h->bvs[idx].bucket.val = cp[0]; + h->bvs[idx].bucket.exp = cp[1]; + if(hist_bucket_is_valid(h->bvs[idx].bucket)) { + /* Protect against reading invalid/corrupt buckets */ + h->bvs[idx].count = count; + h->used++; + } + } + return 3 + tgt_type + 1; +} + +ssize_t +hist_serialize_estimate(const histogram_t *h) { + /* worst case if 2 for the length + 3+8 * used */ + int i; + ssize_t len = 2; + if(h == NULL) return len; + for(i=0;iused;i++) { + if(h->bvs[i].count != 0) { + len += bv_size(h, i); + } + } + return len; +} + +#ifndef SKIP_LIBMTEV +ssize_t +hist_serialize_b64_estimate(const histogram_t *h) { + ssize_t len = hist_serialize_estimate(h); + // base 64 <=> 1 char == 6 bit <=> 4 chars = 3 Byte ==> n Bytpe = 4*ceil(len/3.) chars + return 4*(len/3+1); +} +#endif + +#define ADVANCE(tracker, n) cp += (n), tracker += (n), len -= (n) +ssize_t +hist_serialize(const histogram_t *h, void *buff, ssize_t len) { + ssize_t written = 0; + uint8_t *cp = buff; + uint16_t nlen = 0; + int i; + + if(len < 2) return -1; + ADVANCE(written, 2); + for(i=0;h && iused;i++) { + ssize_t incr_written; + if(h->bvs[i].count) { + incr_written = bv_write(h, i, cp, len); + if(incr_written < 0) return -1; + nlen++; + ADVANCE(written, incr_written); + } + } + nlen = htons(nlen); + memcpy(buff, &nlen, sizeof(nlen)); + + return written; +} + +static int +copy_of_mtev_b64_encode(const unsigned char *src, size_t src_len, + char *dest, size_t dest_len) { + const unsigned char *bptr = src; + char *eptr = dest; + int len = src_len; + int n = (((src_len + 2) / 3) * 4); + + if(dest_len < n) return 0; + + while(len > 2) { + *eptr++ = __b64[bptr[0] >> 2]; + *eptr++ = __b64[((bptr[0] & 0x03) << 4) + (bptr[1] >> 4)]; + *eptr++ = __b64[((bptr[1] & 0x0f) << 2) + (bptr[2] >> 6)]; + *eptr++ = __b64[bptr[2] & 0x3f]; + bptr += 3; + len -= 3; + } + if(len != 0) { + *eptr++ = __b64[bptr[0] >> 2]; + if(len > 1) { + *eptr++ = __b64[((bptr[0] & 0x03) << 4) + (bptr[1] >> 4)]; + *eptr++ = __b64[(bptr[1] & 0x0f) << 2]; + *eptr = '='; + } else { + *eptr++ = __b64[(bptr[0] & 0x03) << 4]; + *eptr++ = '='; + *eptr = '='; + } + } + return n; +} + +ssize_t +hist_serialize_b64(const histogram_t *h, char *b64_serialized_histo_buff, ssize_t buff_len) { + ssize_t serialize_buff_length = hist_serialize_estimate(h); + uint8_t serialize_buff_static[8192]; + void *serialize_buff = (void *)serialize_buff_static; + if(serialize_buff_length > sizeof(serialize_buff_static)) { + serialize_buff = malloc(serialize_buff_length); + if(!serialize_buff) return -1; + } + ssize_t serialized_length = hist_serialize(h, serialize_buff, serialize_buff_length); + if (serialized_length > 0) { + serialized_length = copy_of_mtev_b64_encode(serialize_buff, serialized_length, b64_serialized_histo_buff, buff_len); + } + if(serialize_buff != (void *)serialize_buff_static) free(serialize_buff); + return serialized_length; +} + +ssize_t +hist_deserialize(histogram_t *h, const void *buff, ssize_t len) { + const uint8_t *cp = buff; + ssize_t bytes_read = 0; + uint16_t nlen, cnt; + if(len < 2) goto bad_read; + if(h->bvs) h->allocator->free(h->bvs); + h->bvs = NULL; + memcpy(&nlen, cp, sizeof(nlen)); + ADVANCE(bytes_read, 2); + h->used = 0; + cnt = ntohs(nlen); + h->allocd = cnt; + if(h->allocd == 0) return bytes_read; + h->bvs = h->allocator->calloc(h->allocd, sizeof(*h->bvs)); + if(!h->bvs) goto bad_read; /* yeah, yeah... bad label name */ + while(len > 0 && cnt > 0) { + ssize_t incr_read = 0; + incr_read = bv_read(h, h->used, cp, len); + if(incr_read < 0) goto bad_read; + ADVANCE(bytes_read, incr_read); + cnt--; + } + return bytes_read; + + bad_read: + if(h->bvs) h->allocator->free(h->bvs); + h->bvs = NULL; + h->used = h->allocd = 0; + return -1; +} + +static int +copy_of_mtev_b64_decode(const char *src, size_t src_len, + unsigned char *dest, size_t dest_len) { + const unsigned char *cp = (unsigned char *)src; + unsigned char *dcp = dest; + unsigned char ch, in[4], out[3]; + int ib = 0, ob = 3, needed = (((src_len / 4) * 3) - 2); + + if(dest_len < needed) return 0; + while(cp <= ((unsigned char *)src+src_len)) { + if((*cp >= 'A') && (*cp <= 'Z')) ch = *cp - 'A'; + else if((*cp >= 'a') && (*cp <= 'z')) ch = *cp - 'a' + 26; + else if((*cp >= '0') && (*cp <= '9')) ch = *cp - '0' + 52; + else if(*cp == '+') ch = 62; + else if(*cp == '/') ch = 63; + else if(*cp == '=') ch = 0xff; + else if(isspace((int)*cp)) { cp++; continue; } + else break; + cp++; + if(ch == 0xff) { + if(ib == 0) break; + if(ib == 1 || ib == 2) ob = 1; + else ob = 2; + while (ib < 3) + in[ib++] = '\0'; + } + in[ib++] = ch; + if(ib == 4) { + out[0] = (in[0] << 2) | ((in[1] & 0x30) >> 4); + out[1] = ((in[1] & 0x0f) << 4) | ((in[2] & 0x3c) >> 2); + out[2] = ((in[2] & 0x03) << 6) | (in[3] & 0x3f); + for(ib = 0; ib < ob; ib++) + *dcp++ = out[ib]; + ib = 0; + } + } + return dcp - (unsigned char *)dest; +} + +ssize_t hist_deserialize_b64(histogram_t *h, const void *b64_string, ssize_t b64_string_len) { + int decoded_hist_len; + unsigned char decoded_hist_static[8192]; + unsigned char* decoded_hist = decoded_hist_static; + if(b64_string_len > sizeof(decoded_hist_static)) { + decoded_hist = malloc(b64_string_len); + if(!decoded_hist) return -1; + } + + decoded_hist_len = copy_of_mtev_b64_decode(b64_string, b64_string_len, decoded_hist, b64_string_len); + + ssize_t bytes_read = -1; + if (decoded_hist_len >= 2) { + bytes_read = hist_deserialize(h, decoded_hist, decoded_hist_len); + if (bytes_read != decoded_hist_len) { + bytes_read = -1; + } + } + if(decoded_hist != decoded_hist_static) free(decoded_hist); + return bytes_read; +} + +static inline +int hist_bucket_cmp(hist_bucket_t h1, hist_bucket_t h2) { + ASSERT_GOOD_BUCKET(h1); + ASSERT_GOOD_BUCKET(h2); + // checks if h1 < h2 on the real axis. + if(*(uint16_t *)&h1 == *(uint16_t *)&h2) return 0; + /* place NaNs at the beginning always */ + if(hist_bucket_isnan(h1)) return 1; + if(hist_bucket_isnan(h2)) return -1; + /* zero values need special treatment */ + if(h1.val == 0) return (h2.val > 0) ? 1 : -1; + if(h2.val == 0) return (h1.val < 0) ? 1 : -1; + /* opposite signs? */ + if(h1.val < 0 && h2.val > 0) return 1; + if(h1.val > 0 && h2.val < 0) return -1; + /* here they are either both positive or both negative */ + if(h1.exp == h2.exp) return (h1.val < h2.val) ? 1 : -1; + if(h1.exp > h2.exp) return (h1.val < 0) ? 1 : -1; + if(h1.exp < h2.exp) return (h1.val < 0) ? -1 : 1; + /* unreachable */ + return 0; +} + +double +hist_bucket_to_double(hist_bucket_t hb) { + uint8_t *pidx; + assert(private_nan != 0); + if(hist_bucket_isnan(hb)) return private_nan; + if(hb.val == 0) return 0.0; + pidx = (uint8_t *)&hb.exp; + return (((double)hb.val)/10.0) * power_of_ten[*pidx]; +} + +double +hist_bucket_to_double_bin_width(hist_bucket_t hb) { + if(hist_bucket_isnan(hb)) return private_nan; + if(hb.val == 0) return 0; + uint8_t *pidx; + pidx = (uint8_t *)&hb.exp; + return power_of_ten[*pidx]/10.0; +} + +/* + * A midpoint in a bin should a minimum error midpoint, not a linear midpoint. Let + * us choose an M such that M * bin-width finds our our placement from the bottom + * of a bin + * + * Take the [B0,B1) bin, with a bin-width of B1-B0... + * as a sample S approaches B1, we see error ((B1-B0)(1-M))/B1 + * and as S approaches B0, we see error ((B1-B0)M)/B0. + * + * M should be chosen such that: + * + * ((B1-B0)(1-M))/B1 = ((B1-B0)M)/B0 + * + * (B0)(B1-B0)(1-M) = (B1)(B1-B0)(M) + * + * B0 - B0(M) = B1(M) + * + * B0 = (B0 + B1)(M) + * + * M = (B1)/(B0 + B1) + */ + +double +hist_bucket_midpoint(hist_bucket_t in) { + double bottom, top, interval, ratio; + if(hist_bucket_isnan(in)) return private_nan; + if(in.val == 0) return 0; + bottom = hist_bucket_to_double(in); + interval = hist_bucket_to_double_bin_width(in); + if(bottom < 0) interval *= -1.0; + top = bottom + interval; + ratio = (bottom)/(bottom + top); + return bottom + interval * ratio; +} + +/* This is used for quantile calculation, + * where we want the side of the bucket closest to -inf */ +static double +hist_bucket_left(hist_bucket_t in) { + double out, interval; + if(hist_bucket_isnan(in)) return private_nan; + if(in.val == 0) return 0; + out = hist_bucket_to_double(in); + if(out > 0) return out; + /* out < 0 */ + interval = hist_bucket_to_double_bin_width(in); + return out - interval; +} + +double +hist_approx_mean(const histogram_t *hist) { + int i; + double divisor = 0.0; + double sum = 0.0; + if(!hist) return private_nan; + ASSERT_GOOD_HIST(hist); + for(i=0; iused; i++) { + if(hist_bucket_isnan(hist->bvs[i].bucket)) continue; + double midpoint = hist_bucket_midpoint(hist->bvs[i].bucket); + double cardinality = (double)hist->bvs[i].count; + divisor += cardinality; + sum += midpoint * cardinality; + } + if(divisor == 0.0) return private_nan; + return sum/divisor; +} + +double +hist_approx_sum(const histogram_t *hist) { + int i; + double sum = 0.0; + if(!hist) return 0.0; + ASSERT_GOOD_HIST(hist); + for(i=0; iused; i++) { + if(hist_bucket_isnan(hist->bvs[i].bucket)) continue; + double value = hist_bucket_midpoint(hist->bvs[i].bucket); + double cardinality = (double)hist->bvs[i].count; + sum += value * cardinality; + } + return sum; +} + +double +hist_approx_stddev(const histogram_t *hist) { + int i; + double total_count = 0.0; + double s1 = 0.0; + double s2 = 0.0; + if(!hist) return private_nan; + ASSERT_GOOD_HIST(hist); + if(hist->used == 0) return 0.0; + for(i=0; iused; i++) { + if(hist_bucket_isnan(hist->bvs[i].bucket)) continue; + double midpoint = hist_bucket_midpoint(hist->bvs[i].bucket); + double count = hist->bvs[i].count; + total_count += count; + s1 += midpoint * count; + s2 += pow(midpoint, 2.0) * count; + } + if(total_count == 0.0) return private_nan; + return sqrt(s2 / total_count - pow(s1 / total_count, 2.0)); +} + +double +hist_approx_moment(const histogram_t *hist, double k) { + int i; + double total_count = 0.0; + double sk = 0.0; + if(!hist) return private_nan; + ASSERT_GOOD_HIST(hist); + for(i=0; iused; i++) { + if(hist_bucket_isnan(hist->bvs[i].bucket)) continue; + double midpoint = hist_bucket_midpoint(hist->bvs[i].bucket); + double count = hist->bvs[i].count; + total_count += count; + sk += pow(midpoint, k) * count; + } + if(total_count == 0.0) return private_nan; + return sk / pow(total_count, k); +} + +uint64_t +hist_approx_count_below(const histogram_t *hist, double threshold) { + int i; + uint64_t running_count = 0; + if(!hist) return 0; + ASSERT_GOOD_HIST(hist); + for(i=0; iused; i++) { + if(hist_bucket_isnan(hist->bvs[i].bucket)) continue; + double bucket_bound = hist_bucket_to_double(hist->bvs[i].bucket); + double bucket_upper; + if(bucket_bound < 0.0) + bucket_upper = bucket_bound; + else + bucket_upper = bucket_bound + hist_bucket_to_double_bin_width(hist->bvs[i].bucket); + if(bucket_upper <= threshold) + running_count += hist->bvs[i].count; + else + break; + } + return running_count; +} + +uint64_t +hist_approx_count_above(const histogram_t *hist, double threshold) { + int i; + if(!hist) return 0; + ASSERT_GOOD_HIST(hist); + uint64_t running_count = hist_sample_count(hist); + for(i=0; iused; i++) { + if(hist_bucket_isnan(hist->bvs[i].bucket)) continue; + double bucket_bound = hist_bucket_to_double(hist->bvs[i].bucket); + double bucket_lower; + if(bucket_bound < 0.0) + bucket_lower = bucket_bound - hist_bucket_to_double_bin_width(hist->bvs[i].bucket); + else + bucket_lower = bucket_bound; + if(bucket_lower < threshold) + running_count -= hist->bvs[i].count; + else + break; + } + return running_count; +} + +uint64_t +hist_approx_count_nearby(const histogram_t *hist, double value) { + int i; + if(!hist) return 0; + ASSERT_GOOD_HIST(hist); + for(i=0; iused; i++) { + if(hist_bucket_isnan(hist->bvs[i].bucket)) continue; + double bucket_bound = hist_bucket_to_double(hist->bvs[i].bucket); + double bucket_lower, bucket_upper; + if(bucket_bound < 0.0) { + bucket_lower = bucket_bound - hist_bucket_to_double_bin_width(hist->bvs[i].bucket); + bucket_upper = bucket_bound; + if(bucket_lower < value && value <= bucket_upper) + return hist->bvs[i].count; + } + else if(bucket_bound == 0.0) { + if(HIST_NEGATIVE_MAX_I < value && value < HIST_POSITIVE_MIN_I) + return hist->bvs[i].count; + } + else { + bucket_lower = bucket_bound; + bucket_upper = bucket_bound + hist_bucket_to_double_bin_width(hist->bvs[i].bucket); + if(bucket_lower <= value && value < bucket_upper) + return hist->bvs[i].count; + } + } + return 0; +} + +/* 0 success, + * -1 (empty histogram), + * -2 (out of order quantile request) + * -3 (out of bound quantile) + */ +int +hist_approx_quantile(const histogram_t *hist, const double *q_in, int nq, double *q_out) { + int i_q, i_b; + double total_cnt = 0.0, bucket_width = 0.0, + bucket_left = 0.0, lower_cnt = 0.0, upper_cnt = 0.0; + + if(nq < 1) return 0; /* nothing requested, easy to satisfy successfully */ + + if(!hist) { + for(i_q=0;i_qused;i_b++) { + /* ignore NaN */ + if(hist_bucket_isnan(hist->bvs[i_b].bucket)) + continue; + total_cnt += (double)hist->bvs[i_b].count; + } + + /* Run through the quantiles and make sure they are in order */ + for (i_q=1;i_q q_in[i_q]) return -2; + + if(total_cnt == 0) { + for(i_q=0;i_q 1.0) return -3; + q_out[i_q] = total_cnt * q_in[i_q]; + } + + +#define TRACK_VARS(idx) do { \ + bucket_width = hist_bucket_to_double_bin_width(hist->bvs[idx].bucket); \ + bucket_left = hist_bucket_left(hist->bvs[idx].bucket); \ + lower_cnt = upper_cnt; \ + upper_cnt = lower_cnt + hist->bvs[idx].count; \ +} while(0) + + /* Find the least bin (first) */ + for(i_b=0;i_bused;i_b++) { + /* We don't include NaNs */ + if(hist_bucket_isnan(hist->bvs[i_b].bucket)) + continue; + if(hist->bvs[i_b].count == 0) + continue; + TRACK_VARS(i_b); + break; + } + + /* Next walk the bins and the quantiles together */ + for(i_q=0;i_qused-1) && upper_cnt < q_out[i_q]) { + i_b++; + TRACK_VARS(i_b); + } + if(lower_cnt == q_out[i_q]) { + q_out[i_q] = bucket_left; + } + else if(upper_cnt == q_out[i_q]) { + q_out[i_q] = bucket_left + bucket_width; + } + else { + if(bucket_width == 0) q_out[i_q] = bucket_left; + else q_out[i_q] = bucket_left + + (q_out[i_q] - lower_cnt) / (upper_cnt - lower_cnt) * bucket_width; + } + } + return 0; +} + +hist_bucket_t +int_scale_to_hist_bucket(int64_t value, int scale) { + hist_bucket_t hb = { 0, 0 }; + int sign = 1; + if(unlikely(value == 0)) return hb; + scale++; + if(unlikely(value < 0)) { + if(unlikely(value == INT64_MIN)) value = INT64_MAX; + else value = 0 - value; + sign = -1; + } + if(unlikely(value < 10)) { + value *= 10; + scale -= 1; + } + while(unlikely(value >= 100)) { + value /= 10; + scale++; + } + if(unlikely(scale < -128)) return hb; + if(unlikely(scale > 127)) return hbnan; + hb.val = sign * value; + hb.exp = scale; + ASSERT_GOOD_BUCKET(hb); + return hb; +} + +hist_bucket_t +double_to_hist_bucket(double d) { + hist_bucket_t hb = { (int8_t)0xff, 0 }; // NaN + assert(private_nan != 0); + if(unlikely(isnan(d))) return hb; + if(unlikely(isinf(d))) return hb; + else if(unlikely(d==0)) hb.val = 0; + else { + int big_exp; + uint8_t *pidx; + int sign = (d < 0) ? -1 : 1; + d = fabs(d); + big_exp = (int32_t)floor(log10(d)); + hb.exp = (int8_t)big_exp; + if(unlikely(hb.exp != big_exp)) { /* we rolled */ + if(unlikely(big_exp >= 0)) return hbnan; + hb.val = 0; + hb.exp = 0; + return hb; + } + pidx = (uint8_t *)&hb.exp; + d /= power_of_ten[*pidx]; + d *= 10; + // avoid rounding problem at the bucket boundary + // e.g. d=0.11 results in hb.val = 10 (should be 11) + // by allowing an error margin (in the order or magnitude + // of the expected rounding errors of the above transformations) + hb.val = sign * (int)floor(d + 1e-13); + if(unlikely(hb.val == 100 || hb.val == -100)) { + if (hb.exp < 127) { + hb.val /= 10; + hb.exp++; + } else { // can't increase exponent. Return NaN + return hbnan; + } + } + if(unlikely(hb.val == 0)) { + hb.exp = 0; + return hb; + } + if(unlikely(!((hb.val >= 10 && hb.val < 100) || + (hb.val <= -10 && hb.val > -100)))) { + return hbnan; + } + } + return hb; +} + +static int +hist_internal_find(histogram_t *hist, hist_bucket_t hb, int *idx) { + /* This is a simple binary search returning the idx in which + * the specified bucket belongs... returning 1 if it is there + * or 0 if the value would need to be inserted here (moving the + * rest of the buckets forward one). + */ + int rv = -1, l = 0, r = hist->used - 1; + *idx = 0; + ASSERT_GOOD_HIST(hist); + if(unlikely(hist->used == 0)) return 0; + if(hist->fast) { + struct histogram_fast *hfast = (struct histogram_fast *)hist; + struct hist_flevel *faster = (struct hist_flevel *)&hb; + if(hfast->faster[faster->l1]) { + *idx = hfast->faster[faster->l1][faster->l2]; + if(*idx) { + (*idx)--; + return 1; + } + } + } + while(l < r) { + int check = (r+l)/2; + rv = hist_bucket_cmp(hist->bvs[check].bucket, hb); + if(rv == 0) l = r = check; + else if(rv > 0) l = check + 1; + else r = check - 1; + } + /* if rv == 0 we found a match, no need to compare again */ + if(rv != 0) rv = hist_bucket_cmp(hist->bvs[l].bucket, hb); + *idx = l; + if(rv == 0) return 1; /* this is it */ + if(rv < 0) return 0; /* it goes here (before) */ + (*idx)++; /* it goes after here */ +#ifndef NDEBUG + assert(*idx >= 0 && *idx <= hist->used); +#endif + return 0; +} + +uint64_t +hist_insert_raw(histogram_t *hist, hist_bucket_t hb, uint64_t count) { + int found, idx; + ASSERT_GOOD_HIST(hist); + if(unlikely(hist->bvs == NULL)) { + hist->bvs = hist->allocator->malloc(DEFAULT_HIST_SIZE * sizeof(*hist->bvs)); + hist->allocd = DEFAULT_HIST_SIZE; + } + found = hist_internal_find(hist, hb, &idx); + if(unlikely(!found)) { + int i; + if(unlikely(hist->used == hist->allocd)) { + /* A resize is required */ + histogram_t dummy; + dummy.bvs = hist->allocator->malloc((hist->allocd + DEFAULT_HIST_SIZE) * + sizeof(*hist->bvs)); + if(idx > 0) + memcpy(dummy.bvs, hist->bvs, idx * sizeof(*hist->bvs)); + dummy.bvs[idx].bucket = hb; + dummy.bvs[idx].count = count; + if(idx < hist->used) + memcpy(dummy.bvs + idx + 1, hist->bvs + idx, + (hist->used - idx)*sizeof(*hist->bvs)); + hist->allocator->free(hist->bvs); + hist->bvs = dummy.bvs; + hist->allocd += DEFAULT_HIST_SIZE; + } + else { // used !== alloced + /* We need to shuffle out data to poke the new one in */ + memmove(hist->bvs + idx + 1, hist->bvs + idx, + (hist->used - idx)*sizeof(*hist->bvs)); + hist->bvs[idx].bucket = hb; + hist->bvs[idx].count = count; + } + hist->used++; + if(hist->fast) { + struct histogram_fast *hfast = (struct histogram_fast *)hist; + /* reindex if in fast mode */ + for(i=idx;iused;i++) { + struct hist_flevel *faster = (struct hist_flevel *)&hist->bvs[i].bucket; + if(hfast->faster[faster->l1] == NULL) + hfast->faster[faster->l1] = hist->allocator->calloc(256, sizeof(uint16_t)); + hfast->faster[faster->l1][faster->l2] = i+1; + } + } + } + else { // found + /* Just need to update the counters */ + uint64_t newval = hist->bvs[idx].count + count; + if(unlikely(newval < hist->bvs[idx].count)) /* we rolled */ + newval = ~(uint64_t)0; + count = newval - hist->bvs[idx].count; + hist->bvs[idx].count = newval; + } + ASSERT_GOOD_HIST(hist); + return count; +} + +uint64_t +hist_insert(histogram_t *hist, double val, uint64_t count) { + return hist_insert_raw(hist, double_to_hist_bucket(val), count); +} + +uint64_t +hist_insert_intscale(histogram_t *hist, int64_t val, int scale, uint64_t count) { + return hist_insert_raw(hist, int_scale_to_hist_bucket(val, scale), count); +} + +uint64_t +hist_remove(histogram_t *hist, double val, uint64_t count) { + hist_bucket_t hb; + int idx; + ASSERT_GOOD_HIST(hist); + hb = double_to_hist_bucket(val); + if(hist_internal_find(hist, hb, &idx)) { + uint64_t newval = hist->bvs[idx].count - count; + if(newval > hist->bvs[idx].count) newval = 0; /* we rolled */ + count = hist->bvs[idx].count - newval; + hist->bvs[idx].count = newval; + ASSERT_GOOD_HIST(hist); + return count; + } + return 0; +} + +uint64_t +hist_sample_count(const histogram_t *hist) { + int i; + uint64_t total = 0, last = 0; + if(!hist) return 0; + ASSERT_GOOD_HIST(hist); + for(i=0;iused;i++) { + last = total; + total += hist->bvs[i].count; + if(total < last) return ~((uint64_t)0); + } + return total; +} + +int +hist_bucket_count(const histogram_t *hist) { + ASSERT_GOOD_HIST(hist); + return hist ? hist->used : 0; +} + +int +hist_bucket_idx(const histogram_t *hist, int idx, + double *bucket, uint64_t *count) { + ASSERT_GOOD_HIST(hist); + if(idx < 0 || idx >= hist->used) return 0; + *bucket = hist_bucket_to_double(hist->bvs[idx].bucket); + *count = hist->bvs[idx].count; + return 1; +} + +int +hist_bucket_idx_bucket(const histogram_t *hist, int idx, + hist_bucket_t *bucket, uint64_t *count) { + ASSERT_GOOD_HIST(hist); + if(idx < 0 || idx >= hist->used) return 0; + *bucket = hist->bvs[idx].bucket; + *count = hist->bvs[idx].count; + return 1; +} + +static int +hist_needed_merge_size_fc(histogram_t **hist, int cnt, + void (*f)(histogram_t *tgt, int tgtidx, + histogram_t *src, int srcidx), + histogram_t *tgt) { + ASSERT_GOOD_HIST(hist[0]); + unsigned short idx_static[8192]; + unsigned short *idx = idx_static; + int i, count = 0; + if(cnt > 8192) { + idx = malloc(cnt * sizeof(*idx)); + if(!idx) return -1; + } + memset(idx, 0, cnt * sizeof(*idx)); + while(1) { + hist_bucket_t smallest = { .exp = 0, .val = 0 }; + for(i=0;iused) { + smallest = hist[i]->bvs[idx[i]].bucket; + break; + } + if(i == cnt) break; /* there is no min -- no items */ + for(;iused) + if(hist_bucket_cmp(smallest, hist[i]->bvs[idx[i]].bucket) < 0) + smallest = hist[i]->bvs[idx[i]].bucket; + } + /* Now zip back through and advanced all smallests */ + for(i=0;iused && + hist_bucket_cmp(smallest, hist[i]->bvs[idx[i]].bucket) == 0) { + if(f) f(tgt, count, hist[i], idx[i]); + idx[i]++; + } + } + count++; + } + assert(count <= MAX_HIST_BINS); + if(idx != idx_static) free(idx); + return count; +} + +static void +internal_bucket_accum(histogram_t *tgt, int tgtidx, + histogram_t *src, int srcidx) { + uint64_t newval; + ASSERT_GOOD_HIST(tgt); + assert(tgtidx < tgt->allocd); + if(tgt->used == tgtidx) { + tgt->bvs[tgtidx].bucket = src->bvs[srcidx].bucket; + tgt->used++; + } + assert(hist_bucket_cmp(tgt->bvs[tgtidx].bucket, + src->bvs[srcidx].bucket) == 0); + newval = tgt->bvs[tgtidx].count + src->bvs[srcidx].count; + if(newval < tgt->bvs[tgtidx].count) newval = ~(uint64_t)0; + tgt->bvs[tgtidx].count = newval; +} + +int +hist_subtract(histogram_t *tgt, const histogram_t * const *hist, int cnt) { + int i, tgt_idx, src_idx; + int rv = 0; + ASSERT_GOOD_HIST(tgt); + for(i=0;iused && src_idx < hist[i]->used) { + int cmp = hist_bucket_cmp(tgt->bvs[tgt_idx].bucket, hist[i]->bvs[src_idx].bucket); + /* if the match, attempt to subtract, and move tgt && src fwd. */ + if(cmp == 0) { + if(tgt->bvs[tgt_idx].count < hist[i]->bvs[src_idx].count) { + tgt->bvs[tgt_idx].count = 0; + rv = -1; + } else { + tgt->bvs[tgt_idx].count = tgt->bvs[tgt_idx].count - hist[i]->bvs[src_idx].count; + } + tgt_idx++; + src_idx++; + } + else if(cmp > 0) { + tgt_idx++; + } + else { + if(hist[i]->bvs[src_idx].count > 0) rv = -1; + src_idx++; + } + } + /* run for the rest of the source so see if we have stuff we can't subtract */ + while(src_idx < hist[i]->used) { + if(hist[i]->bvs[src_idx].count > 0) rv = -1; + src_idx++; + } + } + ASSERT_GOOD_HIST(tgt); + return rv; +} + +static int +hist_needed_merge_size(histogram_t **hist, int cnt) { + return hist_needed_merge_size_fc(hist, cnt, NULL, NULL); +} + +int +hist_accumulate(histogram_t *tgt, const histogram_t* const *src, int cnt) { + int tgtneeds; + ASSERT_GOOD_HIST(tgt); + void *oldtgtbuff = tgt->bvs; + histogram_t tgt_copy; + histogram_t *inclusive_src_static[1025]; + histogram_t **inclusive_src = inclusive_src_static; + if(cnt+1 > 1025) { + inclusive_src = malloc(sizeof(histogram_t *) * (cnt+1)); + if(!inclusive_src) return -1; + } + memcpy(&tgt_copy, tgt, sizeof(*tgt)); + memcpy(inclusive_src, src, sizeof(*src)*cnt); + inclusive_src[cnt] = &tgt_copy; + tgtneeds = hist_needed_merge_size(inclusive_src, cnt+1); + if(tgtneeds < 0) { + if(inclusive_src != inclusive_src_static) free(inclusive_src); + return -1; + } + assert(tgtneeds <= MAX_HIST_BINS); + tgt->allocd = tgtneeds; + tgt->used = 0; + if (! tgt->allocd) tgt->allocd = 1; + tgt->bvs = tgt->allocator->calloc(tgt->allocd, sizeof(*tgt->bvs)); + hist_needed_merge_size_fc(inclusive_src, cnt+1, internal_bucket_accum, tgt); + if(oldtgtbuff) tgt->allocator->free(oldtgtbuff); + if(inclusive_src != inclusive_src_static) free(inclusive_src); + ASSERT_GOOD_HIST(tgt); + return tgt->used; +} + +int +hist_num_buckets(const histogram_t *hist) { + return hist->used; +} + +void +hist_clear(histogram_t *hist) { + int i; + ASSERT_GOOD_HIST(hist); + for(i=0;iused;i++) + hist->bvs[i].count = 0; + if(hist->fast) { + struct histogram_fast *hfast = (struct histogram_fast *)hist; + for(i=0;i<256;i++) { + if(hfast->faster[i]) { + memset(hfast->faster[i], 0, 256 * sizeof(uint16_t)); + } + } + } +} + +histogram_t * +hist_alloc(void) { + return hist_alloc_nbins(0); +} + +histogram_t * +hist_alloc_with_allocator(hist_allocator_t *allocator) { + return hist_alloc_nbins_with_allocator(0, allocator); +} + +histogram_t * +hist_alloc_nbins(int nbins) { + return hist_alloc_nbins_with_allocator(nbins, &default_allocator); +} + +histogram_t * +hist_alloc_nbins_with_allocator(int nbins, hist_allocator_t *allocator) { + histogram_t *tgt; + if(nbins < 1) nbins = DEFAULT_HIST_SIZE; + if(nbins > MAX_HIST_BINS) nbins = MAX_HIST_BINS; + tgt = allocator->calloc(1, sizeof(histogram_t)); + tgt->allocd = nbins; + tgt->bvs = allocator->calloc(tgt->allocd, sizeof(*tgt->bvs)); + tgt->allocator = allocator; + return tgt; +} + +histogram_t * +hist_fast_alloc(void) { + return hist_fast_alloc_nbins(0); +} + +histogram_t * +hist_fast_alloc_with_allocator(hist_allocator_t *allocator) { + return hist_fast_alloc_nbins_with_allocator(0, allocator); +} + +histogram_t * +hist_fast_alloc_nbins(int nbins) { + return hist_fast_alloc_nbins_with_allocator(nbins, &default_allocator); +} + +histogram_t * +hist_fast_alloc_nbins_with_allocator(int nbins, hist_allocator_t *allocator) { + struct histogram_fast *tgt; + if(nbins < 1) nbins = DEFAULT_HIST_SIZE; + if(nbins > MAX_HIST_BINS) nbins = MAX_HIST_BINS; + tgt = allocator->calloc(1, sizeof(struct histogram_fast)); + tgt->internal.allocd = nbins; + tgt->internal.bvs = allocator->calloc(tgt->internal.allocd, sizeof(*tgt->internal.bvs)); + tgt->internal.fast = 1; + tgt->internal.allocator = allocator; + return &tgt->internal; +} + +histogram_t * +hist_clone(histogram_t *other) { + return hist_clone_with_allocator(other, &default_allocator); +} + +histogram_t * +hist_clone_with_allocator(histogram_t *other, hist_allocator_t *allocator) +{ + histogram_t *tgt = NULL; + int i = 0; + if (other->fast) { + tgt = hist_fast_alloc_nbins_with_allocator(other->allocd, allocator); + struct histogram_fast *f = (struct histogram_fast *)tgt; + struct histogram_fast *of = (struct histogram_fast *)other; + for(i=0;i<256;i++) { + if (of->faster[i]) { + f->faster[i] = allocator->calloc(256, sizeof(uint16_t)); + memcpy(f->faster[i], of->faster[i], 256 * sizeof(uint16_t)); + } + } + } + else { + tgt = hist_alloc_nbins_with_allocator(other->allocd, allocator); + } + memcpy(tgt->bvs, other->bvs, other->used * sizeof(struct hist_bv_pair)); + tgt->used = other->used; + return tgt; +} + +void +hist_free(histogram_t *hist) { + if(hist == NULL) return; + hist_allocator_t *a = hist->allocator; + if(hist->bvs != NULL) a->free(hist->bvs); + if(hist->fast) { + int i; + struct histogram_fast *hfast = (struct histogram_fast *)hist; + for(i=0;i<256;i++) a->free(hfast->faster[i]); + } + + a->free(hist); +} + +histogram_t * +hist_compress_mbe(histogram_t *hist, int8_t mbe) { + histogram_t *hist_compressed = hist_alloc(); + if(!hist) return hist_compressed; + int total = hist_bucket_count(hist); + for(int idx=0; idxbvs[idx]; + // we know that stored buckets are valid (abs(val)>=10) + // so it suffices to check the exponent + if (bv.bucket.exp < mbe) { + // merge into zero bucket + hist_insert_raw(hist_compressed, (hist_bucket_t) {.exp = 0, .val = 0}, bv.count); + } + else if (bv.bucket.exp == mbe) { + // re-bucket to val = 10, 20, ... 90 + hist_insert_raw(hist_compressed, (hist_bucket_t) { + .exp = bv.bucket.exp, + .val = (bv.bucket.val/10) * 10 + }, bv.count); + } + else { + // copy over + hist_insert_raw(hist_compressed, bv.bucket, bv.count); + } + } + return hist_compressed; +} + +extern int +hist_bucket_to_string(hist_bucket_t hb, char *buf) { + if(hist_bucket_isnan(hb)) { strcpy(buf, "NaN"); return 3; } + if(hb.val == 0) { strcpy(buf, "0"); return 1; } + else { + int aval = abs(hb.val); + int aexp = abs((int)hb.exp - 1); + buf[0] = hb.val >= 0 ? '+' : '-'; + buf[1] = '0' + aval / 10; + buf[2] = '0' + aval % 10; + buf[3] = 'e'; + buf[4] = hb.exp >= 1 ? '+' : '-'; + buf[5] = '0' + (aexp / 100); + buf[6] = '0' + (aexp % 100)/10; + buf[7] = '0' + (aexp % 10); + buf[8] = '\0'; + return 8; + } +} diff --git a/deps/libcircllhist/circllhist.h b/deps/libcircllhist/circllhist.h new file mode 100644 index 0000000..a43a84a --- /dev/null +++ b/deps/libcircllhist/circllhist.h @@ -0,0 +1,231 @@ +/** \file circllhist.h */ +/* + * Copyright (c) 2016, Circonus, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name Circonus, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/*! \mainpage A C implementation of Circonus log-linear histograms +* \ref circllhist.h +*/ + +#ifndef CIRCLLHIST_H +#define CIRCLLHIST_H + +#if defined(WIN32) +#include +typedef SSIZE_T ssize_t; +#endif + +#ifdef __cplusplus +extern "C" { /* FFI_SKIP */ +#endif + +#define DEFAULT_HIST_SIZE 100 +//! Maximal size of hist bucket standard string format (inc. terminator) +#define HIST_BUCKET_MAX_STRING_SIZE 9 +#define API_EXPORT(type) extern type +#include +#include +#include + +typedef struct histogram histogram_t; +typedef struct hist_rollup_config hist_rollup_config_t; +//! A hist_bucket structure represents a histogram bucket with the following +//! dimensions: +//! - (val < -99 || 99 < val) => Invalid bucket +//! - (-10 < val && val < 10) => (-10^-127 .. +10^-127) zero bucket +//! - val > 0 => [ (val/10)*10^exp .. (val+1)/10*10^exp ) +//! - val < 0 => ( (val-1)/10*10^exp .. (val/10)*10^exp ] +typedef struct hist_bucket { + int8_t val; //!< value * 10 + int8_t exp; //!< exponent -128 .. 127 +} hist_bucket_t; + +typedef struct hist_allocator { + void *(*malloc)(size_t); + void *(*calloc)(size_t, size_t); + void (*free)(void *); +} hist_allocator_t; + +//////////////////////////////////////////////////////////////////////////////// +// Histogram buckets + +//! Returns the edge of the histogram bucket closer to zero +API_EXPORT(double) hist_bucket_to_double(hist_bucket_t hb); +//! Calculate mid-point of the bucket +API_EXPORT(double) hist_bucket_midpoint(hist_bucket_t in); +//! Get the width of the hist_bucket +API_EXPORT(double) hist_bucket_to_double_bin_width(hist_bucket_t hb); +//! Create the bucket that a value belongs to +API_EXPORT(hist_bucket_t) double_to_hist_bucket(double d); +//! Create the bucket that value * 10^(scale) belongs to +API_EXPORT(hist_bucket_t) int_scale_to_hist_bucket(int64_t value, int scale); +//! Writes a standardized string representation to buf +//! Buf must be of size HIST_BUCKET_MAX_STRING_SIZE or larger. +//! \return of characters of bytes written into the buffer excluding terminator +//! +//! Format spec: "sxxetyyy", where +//! - s = '+' or '-' global sign +//! - xx -- two digits representing val as decimal integer (in 10 .. 99) +//! - e = 'e' literal character +//! - t = '+' or '-' exponent sign +//! - yyy -- three digits representing exp as decimal integer with leading 0s +//! +//! Exception: The zero bucket is represented as "0" +//! Exception: Invalid buckets are represented as "NaN" +//! +//! Examples: +//! 1 => '+10e-001'; 12 => '+12e+000'; +//! -0.23 => '-23e-003'; 23000 => '+23e+003'; +API_EXPORT(int) hist_bucket_to_string(hist_bucket_t hb, char *buf); + +//////////////////////////////////////////////////////////////////////////////// +// Creating and destroying histograms + +//! Create a new histogram, uses default allocator +API_EXPORT(histogram_t *) hist_alloc(void); +//! Create a new histogram with preallocated bins, uses default allocator +API_EXPORT(histogram_t *) hist_alloc_nbins(int nbins); +//! Create a fast-histogram +/*! Fast allocations consume 2kb + N * 512b more memory + * where N is the number of used exponents. It allows for O(1) increments for + * prexisting keys, uses default allocator */ +API_EXPORT(histogram_t *) hist_fast_alloc(void); +//! Create a fast-histogram with preallocated bins, uses default allocator +API_EXPORT(histogram_t *) hist_fast_alloc_nbins(int nbins); +//! Create an exact copy of other, uses default allocator +API_EXPORT(histogram_t *) hist_clone(histogram_t *other); + +//! Create a new histogram, uses custom allocator +API_EXPORT(histogram_t *) hist_alloc_with_allocator(hist_allocator_t *alloc); +//! Create a new histogram with preallocated bins, uses custom allocator +API_EXPORT(histogram_t *) hist_alloc_nbins_with_allocator(int nbins, hist_allocator_t *alloc); +//! Create a fast-histogram +/*! Fast allocations consume 2kb + N * 512b more memory + * where N is the number of used exponents. It allows for O(1) increments for + * prexisting keys, uses custom allocator */ +API_EXPORT(histogram_t *) hist_fast_alloc_with_allocator(hist_allocator_t *alloc); +//! Create a fast-histogram with preallocated bins, uses custom allocator +API_EXPORT(histogram_t *) hist_fast_alloc_nbins_with_allocator(int nbins, hist_allocator_t *alloc); +//! Create an exact copy of other, uses custom allocator +API_EXPORT(histogram_t *) hist_clone_with_allocator(histogram_t *other, hist_allocator_t *alloc); + +//! Free a (fast-) histogram, frees with allocator chosen during the alloc/clone +API_EXPORT(void) hist_free(histogram_t *hist); + +//////////////////////////////////////////////////////////////////////////////// +// Getting data in and out of histograms + +/*! Inserting double values converts from IEEE double to a small static integer + * base and can suffer from floating point math skew. Using the intscale + * variant is more precise and significantly faster if you already have + * integer measurements. */ +//! insert a value into a histogram count times +API_EXPORT(uint64_t) hist_insert(histogram_t *hist, double val, uint64_t count); +//! Remove data from a histogram count times +API_EXPORT(uint64_t) hist_remove(histogram_t *hist, double val, uint64_t count); +//! Insert a single bucket + count into a histogram +//! +//! Updates counts if the bucket exists +//! Handles re-allocation of new buckets if needed +API_EXPORT(uint64_t) hist_insert_raw(histogram_t *hist, hist_bucket_t hb, uint64_t count); +//! Get the number of used buckets in a histogram +API_EXPORT(int) hist_bucket_count(const histogram_t *hist); +//! Same as hist_bucket_count +API_EXPORT(int) hist_num_buckets(const histogram_t *hist); +//! Get the total number of values stored in the histogram +API_EXPORT(uint64_t) hist_sample_count(const histogram_t *hist); +//! Get value+count for bucket at position idx. Valid positions are 0 .. hist_bucket_count() +API_EXPORT(int) hist_bucket_idx(const histogram_t *hist, int idx, double *v, uint64_t *c); +//! Get bucket+count for bucket at position idx. Valid positions are 0 .. hist_bucket_count() +API_EXPORT(int) hist_bucket_idx_bucket(const histogram_t *hist, int idx, hist_bucket_t *b, uint64_t *c); +//! Accumulate bins from each of cnt histograms in src onto tgt +API_EXPORT(int) hist_accumulate(histogram_t *tgt, const histogram_t * const *src, int cnt); +//! Subtract bins from each of cnt histograms in src from tgt, return -1 on underrun error +API_EXPORT(int) hist_subtract(histogram_t *tgt, const histogram_t * const *src, int cnt); +//! Clear data fast. Keeps buckets allocated. +API_EXPORT(void) hist_clear(histogram_t *hist); +//! Insert a value into a histogram value = val * 10^(scale) +API_EXPORT(uint64_t) hist_insert_intscale(histogram_t *hist, int64_t val, int scale, uint64_t count); + +//////////////////////////////////////////////////////////////////////////////// +// Serialization + +//! Serialize histogram to binary data +API_EXPORT(ssize_t) hist_serialize(const histogram_t *h, void *buff, ssize_t len); +API_EXPORT(ssize_t) hist_deserialize(histogram_t *h, const void *buff, ssize_t len); +API_EXPORT(ssize_t) hist_serialize_estimate(const histogram_t *h); +//! Return histogram serialization as base64 encoded string +API_EXPORT(ssize_t) hist_serialize_b64(const histogram_t *h, char *b64_serialized_histo_buff, ssize_t buff_len); +API_EXPORT(ssize_t) hist_deserialize_b64(histogram_t *h, const void *b64_string, ssize_t b64_string_len); +API_EXPORT(ssize_t) hist_serialize_b64_estimate(const histogram_t *h); +//! Compress histogram by squshing together adjacent buckets +//! +//! This compression is lossy. mean/quantiles will be affected by compression. +//! Intended use cases is visualization. +//! \param hist +//! \param mbe the Minimum Bucket Exponent +//! \return the compressed histogram as new value +API_EXPORT(histogram_t *) hist_compress_mbe(histogram_t *h, int8_t mbe); + +//////////////////////////////////////////////////////////////////////////////// +// Analytics + +//! Approximate mean value of all values stored in the histogram +API_EXPORT(double) hist_approx_mean(const histogram_t *); +//! Approximate the sum of all values stored in the histogram +API_EXPORT(double) hist_approx_sum(const histogram_t *); +//! Approximate the standard deviation of all values stored in the histogram +API_EXPORT(double) hist_approx_stddev(const histogram_t *); +//! Approximate the k-th moment of all values stored in the histogram +//! \param hist +//! \param k +API_EXPORT(double) hist_approx_moment(const histogram_t *hist, double k); +//! Returns the number of values in buckets that are entirely lower than or equal to threshold +//! \param hist +//! \param threshold +API_EXPORT(uint64_t) hist_approx_count_below(const histogram_t *hist, double threshold); +//! Returns the number of values in buckets that are entirely larger than or equal to threshold +//! \param hist +//! \param threshold +API_EXPORT(uint64_t) hist_approx_count_above(const histogram_t *hist, double threshold); +//! Returns the number of samples in the histogram that are in the same bucket as the provided value +//! \param hist +//! \param value +API_EXPORT(uint64_t) hist_approx_count_nearby(const histogram_t *hist, double value); +//! Approiximate n quantiles of all values stored in the histogram +//! \param *q_in array of quantiles to comute +//! \param nq length of quantile array +//! \param *q_out pre-allocated array where results shall be written to +API_EXPORT(int) hist_approx_quantile(const histogram_t *, const double *q_in, int nq, double *q_out); + +#ifdef __cplusplus +} /* FFI_SKIP */ +#endif + +#endif diff --git a/src/Makefile b/src/Makefile index b652517..ae11154 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,8 @@ CC = gcc CFLAGS = -g -Wall -CINCLUDES = -I../deps/libpcap/ -I../deps/lua/src -CLIBS = ../deps/libpcap/libpcap.a ../deps/lua/src/liblua.a +LDFLAGS = +CINCLUDES = -I../deps/libpcap/ -I../deps/lua/src -I../deps/libcircllhist +CLIBS = ../deps/libpcap/libpcap.a ../deps/lua/src/liblua.a ../deps/libcircllhist/circllhist.o LOAD_LIBS=-lm -ldl -lpthread INSTALL=/usr/bin/install @@ -21,7 +22,7 @@ all: deps tcpkit include dep.mk tcpkit: $(OBJS) - $(CC) $(CFLAGS) -o $(PROG) $(OBJS) $(CINCLUDES) $(CLIBS) $(LOAD_LIBS) + $(CC) $(CFLAGS) -o $(PROG) $(OBJS) $(CINCLUDES) $(CLIBS) $(LDFLAGS) $(LOAD_LIBS) %.o: %.c $(TCPKIT_CC) -c $< @@ -32,6 +33,7 @@ dep.mk: deps: nop @cd ../deps/libpcap/ && ./configure --enable-dbus=no && make @cd ../deps/lua/src && make a + @cd ../deps/libcircllhist && make circllhist.o clean: - rm -rf $(PROG) && rm -rf *.o diff --git a/src/server.c b/src/server.c index f9354fe..713848c 100644 --- a/src/server.c +++ b/src/server.c @@ -84,6 +84,7 @@ static void server_print_latency_stats(server *srv) { ); for (j = 0; j < N_BUCKET; j++) { + hist_clear(srv->st->latencies[i].lathist); if (srv->st->latencies[i].buckets[j] == 0) continue; if (j >= 1) { rlog("%s~%s: %lld", latency_buckets_name[j-1], latency_buckets_name[j], @@ -131,15 +132,37 @@ char *server_stats_to_json(server *svr) { buf[n++] = '['; for (i = 0; i < st->n_latency; i++) { n += snprintf(buf+n, size-n, - "{\"%d\":{\"total_reqs\": %" PRId64 ",\"total_costs\":%" PRId64 ", \"slow_reqs\":%" PRId64 ",\"latencies\":[", + "{\"%d\":{\"total_reqs\": %" PRId64 ",\"total_costs\":%" PRId64 ", \"slow_reqs\":%" PRId64, *(int*)array_pos(svr->opts->ports, i), st->latencies[i].total_reqs, st->latencies[i].total_costs, st->latencies[i].slow_counts); + + /* Fixed latecy buckets */ + n += snprintf(buf+n, size-n, ",\"latencies\":["); for (j = 0; j < N_BUCKET; j++) { n += snprintf(buf+n, size-n, "%" PRId64 ",", st->latencies[i].buckets[j]); } buf[n-1] = ']'; + + /* Log-linear latency buckets */ + n += snprintf(buf+n, size-n, ",\"latency\":{\"_type\":\"h\",\"_value\":["); + for (j = 0; j < hist_bucket_count(st->latencies[i].lathist); j++) { + hist_bucket_t hb; + uint64_t count; + char bname[32]; + hist_bucket_to_string(hb, bname); + if(n+64 > size) { + size *= 2; + buf = realloc(buf, size); + } + hist_bucket_idx_bucket(st->latencies[i].lathist, j, &hb, &count); + n += snprintf(buf+n, size-n, "%s\"H[%s]=%" PRIu64 "\"", j ? "," : "", + bname, count); + } + buf[n++] = ']'; + buf[n++] = '}'; + buf[n++] = '}'; buf[n++] = '}'; buf[n++] = ','; diff --git a/src/stats.c b/src/stats.c index 0bd5fad..e6854b2 100644 --- a/src/stats.c +++ b/src/stats.c @@ -52,6 +52,10 @@ stats *stats_create(int n) { } void stats_destroy(stats *st) { + int i; + for (i = 0; i < st->n_latency; i++) { + if(st->latencies[i].lathist) hist_free(st->latencies[i].lathist); + } free(st->latencies); free(st); } @@ -74,6 +78,8 @@ void stats_update_latency(stats *st, int ind, int64_t latency_us) { int i, n; st->latencies[ind].total_reqs++; st->latencies[ind].total_costs += latency_us; + if(!st->latencies[ind].lathist) st->latencies[ind].lathist = hist_alloc(); + hist_insert_intscale(st->latencies[ind].lathist, latency_us, -6, 1); n = sizeof(latency_buckets)/ sizeof(latency_buckets[0]); for (i = 0; i < n-1; i++) { @@ -83,4 +89,4 @@ void stats_update_latency(stats *st, int ind, int64_t latency_us) { } } st->latencies[ind].buckets[n-1]++; -} \ No newline at end of file +} diff --git a/src/stats.h b/src/stats.h index 1369ba3..805331f 100644 --- a/src/stats.h +++ b/src/stats.h @@ -18,6 +18,7 @@ #define TCPKIT_STATS_H #include +#include #define N_BUCKET 18 @@ -26,6 +27,7 @@ typedef struct { int64_t total_costs; int64_t slow_counts; int64_t buckets[N_BUCKET]; + histogram_t *lathist; }latency_stat; typedef struct {