honeycombio · ChristopherBiscardi · Mar 2, 2018 · Feb 21, 2018 · Feb 22, 2018 · Feb 26, 2018
diff --git a/.babelrc b/.babelrc
@@ -0,0 +1,3 @@
+{
+  "presets": ["env"]
+}
diff --git a/README.md b/README.md
@@ -0,0 +1,93 @@
+# Dynamic Sampler
+
+This is a collection of samplers that can be used to provide sample
+rates when sending data to services like [honeycomb](https://honeycomb.io)
+
+# Usage
+
+### With defaults:
+
+```javascript
+import { PerKeyThroughput } from "dynamic-sampler";
+const sampler = new PerKeyThroughput();
+
+const rate = sampler.getSampleRate("my key");
+```
+
+### With options
+
+```javascript
+import { PerKeyThroughput } from "dynamic-sampler";
+const sampler = new PerKeyThroughput({
+  clearFrequencySec: 100,
+  perKeyThroughputSec: 2
+});
+```
+
+## Choosing a Sampler
+
+TODO
+
+# Implementing New Samplers
+
+The `Sampler` class includes:
+
+* timer setup
+* construction of initial state (`Map`s)
+* `getSampleRate` returns the rate for a given key
+
+You can extend it to create new samplers. `updateMaps` is the only
+function that needs to be defined, but it is often useful to collect
+additional configuration from the constructor:
+
+```javascript
+import { Sampler } from "dynamic-sampler";
+
+export class PerKey extends Sampler {
+  constructor(opts = {}) {
+    super(opts);
+    this.perKeyThroughputSec = opts.perKeyThroughputSec || 5;
+  }
+  updateMaps() {
+    if (this.currentCounts.size == 0) {
+      //no traffic in the last 30s. clear the result Map
+      this.savedSampleRates.clear();
+      return;
+    }
+    const actualPerKeyRate = this.perKeyThroughputSec * this.clearFrequencySec;
+
+    const newRates = new Map();
+    this.currentCounts.forEach((val, key) => {
+      newRates.set(key, Math.floor(Math.max(1, val / actualPerKeyRate)));
+    });
+    this.savedSampleRates = newRates;
+  }
+}
+```
+
+## Modifying getSampleRate
+
+Sometimes it makes sense to check additional state in `getSampleRate`
+and return a different result based on that. When overriding the
+function call `super.getSampleRate`.
+
+```javascript
+class MySampler extends Sampler {
+  constructor(opts = {}) {
+    super(opts);
+    this.hasReceivedTraffic = false;
+  }
+  updateMaps() {
+    // other logic
+    this.hasReceivedTraffic = true;
+  }
+  getSampleRate(key) {
+    const superSampleRate = super.getSampleRate(key);
+    if (!this.hasReceivedTraffic) {
+      return this.goalSampleRate;
+    } else {
+      return superSampleRate;
+    }
+  }
+}
+```
diff --git a/__mocks__/nanotimer.js b/__mocks__/nanotimer.js
@@ -0,0 +1,15 @@
+// Mock nanotimer so we can manually tick()
+export default class NanoTimer {
+  setInterval(fn, args, timeInSeconds) {
+    this.fn = fn;
+    this.args = args;
+  }
+  // custom function that is only for testing
+  tick() {
+    if (this.args) {
+      this.fn(this.args);
+    } else {
+      this.fn();
+    }
+  }
+}
diff --git a/index.js b/index.js
@@ -0,0 +1,127 @@
+import NanoTimer from "nanotimer";
+const debug = require("debug")("dynamic-sampler");
+
+// A Sampler handles construction, timer initialization, and getting the sample
+// rate.
+export class Sampler {
+  constructor({ clearFrequencySec } = {}) {
+    // TODO: runtime validate inputs; make sure they're numbers
+    this.clearFrequencySec = clearFrequencySec || 30;
+    this.savedSampleRates = new Map();
+    this.currentCounts = new Map();
+
+    if (debug.enabled) {
+      // if debug is enabled, add a unique id to help debug
+      this.id = (Math.random() * 100000).toFixed();
+      this.getSampleRateCalledTimes = 0;
+      debug("created new perKey sampler with id", this.id);
+    } else {
+      debug("created new perKey sampler");
+    }
+
+    // Set up timer to run updateMaps() on an interval
+    this.timer = new NanoTimer();
+    this.timer.setInterval(
+      this.updateMaps.bind(this),
+      [this.id],
+      `${this.clearFrequencySec}s`
+    );
+  }
+  getSampleRate(key) {
+    // initialize or increment an existing counter
+    const { currentCounts, savedSampleRates } = this;
+    if (currentCounts.has(key)) {
+      const value = currentCounts.get(key);
+      currentCounts.set(key, value + 1);
+    } else {
+      currentCounts.set(key, 1);
+    }
+    if (savedSampleRates.has(key)) {
+      return savedSampleRates.get(key);
+    } else {
+      return 1;
+    }
+  }
+  updateMaps() {
+    throw new Error(
+      "Classes which extend `Sampler` must define `updateMaps()`"
+    );
+  }
+}
+
+export class PerKeyThroughput extends Sampler {
+  constructor(opts = {}) {
+    super(opts);
+    this.perKeyThroughputSec = opts.perKeyThroughputSec || 10;
+  }
+  updateMaps() {
+    debug("PerKey.updateMaps()", this.id && this.id);
+    if (this.currentCounts.size == 0) {
+      // no traffic in the last clearFrequencySecs. clear the result Map
+      this.savedSampleRates.clear();
+      return;
+    }
+    const actualPerKeyRate = this.perKeyThroughputSec * this.clearFrequencySec;
+
+    const newRates = new Map();
+    this.currentCounts.forEach((val, key) => {
+      newRates.set(key, Math.floor(Math.max(1, val / actualPerKeyRate)));
+    });
+    this.savedSampleRates = newRates;
+  }
+}
+
+export class AvgSampleRate extends Sampler {
+  constructor(opts = {}) {
+    super(opts);
+    this.goalSampleRate = opts.goalSampleRate || 10;
+    this.hasReceivedTraffic = false;
+  }
+  updateMaps() {
+    debug("Avg.updateMaps()", this.id && this.id);
+    if (this.currentCounts.size == 0) {
+      //no traffic in the last 30s. clear the result Map
+      this.savedSampleRates.clear();
+      return;
+    }
+
+    let sumEvents = 0;
+    let logSum = 0;
+    this.currentCounts.forEach((val, key) => {
+      sumEvents += val;
+      logSum += Math.log10(val);
+    });
+    const goalCount = sumEvents / this.goalSampleRate;
+    const goalRatio = goalCount / logSum;
+
+    const newRates = new Map();
+    let keysRemaining = this.currentCounts.size;
+    let extra = 0;
+    this.currentCounts.forEach((count, key) => {
+      let goalForKey = Math.max(1, Math.log10(count) * goalRatio);
+      const extraForKey = extra / keysRemaining;
+      goalForKey += extraForKey;
+      extra -= extraForKey;
+      keysRemaining--;
+      if (count <= goalForKey) {
+        newRates.set(key, 1);
+        extra += goalForKey - count;
+      } else {
+        newRates.set(key, Math.ceil(count / goalForKey));
+        extra += goalForKey - count / newRates.get(key);
+      }
+    });
+    this.savedSampleRates = newRates;
+    this.hasReceivedTraffic = true;
+  }
+  getSampleRate(key) {
+    debug("hasReceivedTraffic", this.hasReceivedTraffic);
+    if (!this.hasReceivedTraffic) {
+      return this.goalSampleRate;
+    } else {
+      // TODO: how well supported is this syntax
+      // (basically no IE native support. what about babel/etc?)
+      return super.getSampleRate(key);
+    }
+  }
+}
diff --git a/index.test.js b/index.test.js
@@ -0,0 +1,57 @@
+jest.mock("nanotimer");
+import { Sampler, PerKeyThroughput, AvgSampleRate } from ".";
+
+describe("Sampler", () => {
+  test("initializes with default values", () => {
+    const sampler = new Sampler();
+    expect(sampler.clearFrequencySec).toEqual(30);
+    expect(sampler.id).toBeUndefined();
+    expect(sampler.getSampleRateCalledTimes).toBeUndefined();
+  });
+});
+describe("PerKeyThroughput", () => {
+  test("initializes with default values", () => {
+    const sampler = new PerKeyThroughput();
+    expect(sampler.clearFrequencySec).toEqual(30);
+    expect(sampler.perKeyThroughputSec).toEqual(10);
+    expect(sampler.id).toBeUndefined();
+    expect(sampler.getSampleRateCalledTimes).toBeUndefined();
+  });
+
+  test("gets a sample rate", () => {
+    const sampler = new PerKeyThroughput();
+    // Fake a bunch of traffic for a specific key
+    new Array(1500).fill(1).forEach(() => sampler.getSampleRate("my-key"));
+    // Mocked tick() is equal to clearFrequencySec
+    // Moves time forward by enough to run `updateMaps()`
+    sampler.timer.tick();
+    // get the resulting sample rate after updating maps
+    const a = sampler.getSampleRate("my-key");
+    expect(a).toEqual(5);
+  });
+});
+
+describe("AvgSampleRate", () => {
+  test("initializes with default values", () => {
+    const sampler = new AvgSampleRate();
+    console.log(sampler);
+    expect(sampler.clearFrequencySec).toEqual(30);
+    expect(sampler.goalSampleRate).toEqual(10);
+    expect(sampler.id).toBeUndefined();
+    expect(sampler.getSampleRateCalledTimes).toBeUndefined();
+  });
+
+  test("gets a sample rate", () => {
+    const sampler = new AvgSampleRate();
+    expect(sampler.hasReceivedTraffic).toEqual(false);
+    new Array(1500).fill(1).forEach(() => sampler.getSampleRate("my-key"));
+    // manual tick is equal to ""
+    sampler.timer.tick();
+    expect(sampler.hasReceivedTraffic).toEqual(true);
+    const a = sampler.getSampleRate("my-key");
+    // No traffic means this is the goal sample rate
+    expect(a).toEqual(10);
+    sampler.timer.tick();
+    expect(sampler.getSampleRate("my-key")).toEqual(1);
+  });
+});
diff --git a/package.json b/package.json
@@ -0,0 +1,38 @@
+{
+  "name": "dynsampler",
+  "version": "1.0.0",
+  "description": "Dynamic sampling of events",
+  "main": "lib/dynamic-sampler.js",
+  "module": "lib/dynamic-sampler.m.js",
+  "repository": "https://github.com/honeycombio/dynamic-sampler.js",
+  "author":
+    "Christopher Biscardi <[email protected]> (@chrisbiscardi)",
+  "license": "Apache 2.0",
+  "scripts": {
+    "build": "microbundle --output lib --external all",
+    "start": "microbundle watch --output lib --external all",
+    "test": "jest",
+    "precommit": "lint-staged"
+  },
+  "lint-staged": {
+    "*.{js,jsx}": ["prettier --parser flow --write", "git add"],
+    "*.json": ["prettier --parser json --write", "git add"],
+    "*.{graphql,gql}": ["prettier --parser graphql --write", "git add"],
+    "*.{md,markdown}": ["prettier --parser markdown --write", "git add"],
+    "*.{css,scss}": ["prettier --parser css --write", "git add"]
+  },
+  "devDependencies": {
+    "babel-core": "^6.26.0",
+    "babel-jest": "^22.2.2",
+    "babel-preset-env": "^1.6.1",
+    "husky": "^0.14.3",
+    "jest": "^22.3.0",
+    "lint-staged": "^7.0.0",
+    "microbundle": "^0.4.3",
+    "prettier": "^1.10.2"
+  },
+  "dependencies": {
+    "debug": "^3.1.0",
+    "nanotimer": "^0.3.15"
+  }
+}