-
Notifications
You must be signed in to change notification settings - Fork 3
Initialize dynamic-sampler #1
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"presets": ["env"] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Dynamic Sampler | ||
|
||
This is a collection of samplers that can be used to provide sample | ||
rates when sending data to services like [honeycomb](https://honeycomb.io) | ||
|
||
# Usage | ||
|
||
### With defaults: | ||
|
||
```javascript | ||
import { PerKeyThroughput } from "dynamic-sampler"; | ||
const sampler = new PerKeyThroughput(); | ||
|
||
const rate = sampler.getSampleRate("my key"); | ||
``` | ||
|
||
### With options | ||
|
||
```javascript | ||
import { PerKeyThroughput } from "dynamic-sampler"; | ||
const sampler = new PerKeyThroughput({ | ||
clearFrequencySec: 100, | ||
perKeyThroughputSec: 2 | ||
}); | ||
``` | ||
|
||
## Choosing a Sampler | ||
|
||
TODO | ||
|
||
# Implementing New Samplers | ||
|
||
The `Sampler` class includes: | ||
|
||
* timer setup | ||
* construction of initial state (`Map`s) | ||
* `getSampleRate` returns the rate for a given key | ||
|
||
You can extend it to create new samplers. `updateMaps` is the only | ||
function that needs to be defined, but it is often useful to collect | ||
additional configuration from the constructor: | ||
|
||
```javascript | ||
import { Sampler } from "dynamic-sampler"; | ||
|
||
export class PerKey extends Sampler { | ||
constructor(opts = {}) { | ||
super(opts); | ||
this.perKeyThroughputSec = opts.perKeyThroughputSec || 5; | ||
} | ||
updateMaps() { | ||
if (this.currentCounts.size == 0) { | ||
//no traffic in the last 30s. clear the result Map | ||
this.savedSampleRates.clear(); | ||
return; | ||
} | ||
const actualPerKeyRate = this.perKeyThroughputSec * this.clearFrequencySec; | ||
|
||
const newRates = new Map(); | ||
this.currentCounts.forEach((val, key) => { | ||
newRates.set(key, Math.floor(Math.max(1, val / actualPerKeyRate))); | ||
}); | ||
this.savedSampleRates = newRates; | ||
} | ||
} | ||
``` | ||
|
||
## Modifying getSampleRate | ||
|
||
Sometimes it makes sense to check additional state in `getSampleRate` | ||
and return a different result based on that. When overriding the | ||
function call `super.getSampleRate`. | ||
|
||
```javascript | ||
class MySampler extends Sampler { | ||
constructor(opts = {}) { | ||
super(opts); | ||
this.hasReceivedTraffic = false; | ||
} | ||
updateMaps() { | ||
// other logic | ||
this.hasReceivedTraffic = true; | ||
} | ||
getSampleRate(key) { | ||
const superSampleRate = super.getSampleRate(key); | ||
if (!this.hasReceivedTraffic) { | ||
return this.goalSampleRate; | ||
} else { | ||
return superSampleRate; | ||
} | ||
} | ||
} | ||
``` | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// Mock nanotimer so we can manually tick() | ||
export default class NanoTimer { | ||
setInterval(fn, args, timeInSeconds) { | ||
this.fn = fn; | ||
this.args = args; | ||
} | ||
// custom function that is only for testing | ||
tick() { | ||
if (this.args) { | ||
this.fn(this.args); | ||
} else { | ||
this.fn(); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import NanoTimer from "nanotimer"; | ||
const debug = require("debug")("dynamic-sampler"); | ||
|
||
// A Sampler handles construction, timer initialization, and getting the sample | ||
// rate. | ||
export class Sampler { | ||
constructor({ clearFrequencySec } = {}) { | ||
// TODO: runtime validate inputs; make sure they're numbers | ||
this.clearFrequencySec = clearFrequencySec || 30; | ||
this.savedSampleRates = new Map(); | ||
this.currentCounts = new Map(); | ||
|
||
if (debug.enabled) { | ||
// if debug is enabled, add a unique id to help debug | ||
this.id = (Math.random() * 100000).toFixed(); | ||
this.getSampleRateCalledTimes = 0; | ||
debug("created new perKey sampler with id", this.id); | ||
} else { | ||
debug("created new perKey sampler"); | ||
} | ||
|
||
// Set up timer to run updateMaps() on an interval | ||
this.timer = new NanoTimer(); | ||
this.timer.setInterval( | ||
this.updateMaps.bind(this), | ||
[this.id], | ||
`${this.clearFrequencySec}s` | ||
); | ||
} | ||
getSampleRate(key) { | ||
// initialize or increment an existing counter | ||
const { currentCounts, savedSampleRates } = this; | ||
if (currentCounts.has(key)) { | ||
const value = currentCounts.get(key); | ||
currentCounts.set(key, value + 1); | ||
} else { | ||
currentCounts.set(key, 1); | ||
} | ||
if (savedSampleRates.has(key)) { | ||
return savedSampleRates.get(key); | ||
} else { | ||
return 1; | ||
} | ||
} | ||
updateMaps() { | ||
throw new Error( | ||
"Classes which extend `Sampler` must define `updateMaps()`" | ||
); | ||
} | ||
} | ||
|
||
export class PerKeyThroughput extends Sampler { | ||
constructor(opts = {}) { | ||
super(opts); | ||
this.perKeyThroughputSec = opts.perKeyThroughputSec || 10; | ||
} | ||
updateMaps() { | ||
debug("PerKey.updateMaps()", this.id && this.id); | ||
if (this.currentCounts.size == 0) { | ||
// no traffic in the last clearFrequencySecs. clear the result Map | ||
this.savedSampleRates.clear(); | ||
return; | ||
} | ||
const actualPerKeyRate = this.perKeyThroughputSec * this.clearFrequencySec; | ||
|
||
const newRates = new Map(); | ||
this.currentCounts.forEach((val, key) => { | ||
newRates.set(key, Math.floor(Math.max(1, val / actualPerKeyRate))); | ||
}); | ||
this.savedSampleRates = newRates; | ||
} | ||
} | ||
|
||
export class AvgSampleRate extends Sampler { | ||
constructor(opts = {}) { | ||
super(opts); | ||
this.goalSampleRate = opts.goalSampleRate || 10; | ||
this.hasReceivedTraffic = false; | ||
} | ||
updateMaps() { | ||
debug("Avg.updateMaps()", this.id && this.id); | ||
if (this.currentCounts.size == 0) { | ||
//no traffic in the last 30s. clear the result Map | ||
this.savedSampleRates.clear(); | ||
return; | ||
} | ||
|
||
let sumEvents = 0; | ||
let logSum = 0; | ||
this.currentCounts.forEach((val, key) => { | ||
sumEvents += val; | ||
logSum += Math.log10(val); | ||
}); | ||
const goalCount = sumEvents / this.goalSampleRate; | ||
const goalRatio = goalCount / logSum; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. both the go/ruby implementations sort the keys here and iterate in sorted order. my head hurts looking the ruby implementation. I expect it's a problem to drop the sort, but ... /shrug? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we're using
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah that's the question I had - if we give the dynsamplers the same input, do we expect the same results? both other implementations go beyond the "sort them in the same order for this implementation to fix rounding errors" and use a sort that should be the same across implementations. eh, we can deal with that when it's an issue for someone (e.g. "I'm using two dynsampler implementations and things are different between them"). And the answer even then might be WONTFIX. |
||
const newRates = new Map(); | ||
let keysRemaining = this.currentCounts.size; | ||
let extra = 0; | ||
this.currentCounts.forEach((count, key) => { | ||
let goalForKey = Math.max(1, Math.log10(count) * goalRatio); | ||
const extraForKey = extra / keysRemaining; | ||
goalForKey += extraForKey; | ||
extra -= extraForKey; | ||
keysRemaining--; | ||
if (count <= goalForKey) { | ||
newRates.set(key, 1); | ||
extra += goalForKey - count; | ||
} else { | ||
newRates.set(key, Math.ceil(count / goalForKey)); | ||
extra += goalForKey - count / newRates.get(key); | ||
} | ||
}); | ||
this.savedSampleRates = newRates; | ||
this.hasReceivedTraffic = true; | ||
} | ||
getSampleRate(key) { | ||
debug("hasReceivedTraffic", this.hasReceivedTraffic); | ||
if (!this.hasReceivedTraffic) { | ||
return this.goalSampleRate; | ||
} else { | ||
// TODO: how well supported is this syntax | ||
// (basically no IE native support. what about babel/etc?) | ||
return super.getSampleRate(key); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope babel supports it. I've been writing code for a diff that uses it 😬 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe it does. My concern was centered more at what buble does and whether we should care about that before microbundle gets it's babel support: developit/microbundle#39. |
||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
jest.mock("nanotimer"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line isn't required but makes it nice for the next person who might be unfamiliar with mocks and why nanotimer now has a .tick() function but only in tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 this line in general. better to be explicit for future readers. |
||
import { Sampler, PerKeyThroughput, AvgSampleRate } from "."; | ||
|
||
describe("Sampler", () => { | ||
test("initializes with default values", () => { | ||
const sampler = new Sampler(); | ||
expect(sampler.clearFrequencySec).toEqual(30); | ||
expect(sampler.id).toBeUndefined(); | ||
expect(sampler.getSampleRateCalledTimes).toBeUndefined(); | ||
}); | ||
}); | ||
describe("PerKeyThroughput", () => { | ||
test("initializes with default values", () => { | ||
const sampler = new PerKeyThroughput(); | ||
expect(sampler.clearFrequencySec).toEqual(30); | ||
expect(sampler.perKeyThroughputSec).toEqual(10); | ||
expect(sampler.id).toBeUndefined(); | ||
expect(sampler.getSampleRateCalledTimes).toBeUndefined(); | ||
}); | ||
|
||
test("gets a sample rate", () => { | ||
const sampler = new PerKeyThroughput(); | ||
// Fake a bunch of traffic for a specific key | ||
new Array(1500).fill(1).forEach(() => sampler.getSampleRate("my-key")); | ||
// Mocked tick() is equal to clearFrequencySec | ||
// Moves time forward by enough to run `updateMaps()` | ||
sampler.timer.tick(); | ||
// get the resulting sample rate after updating maps | ||
const a = sampler.getSampleRate("my-key"); | ||
expect(a).toEqual(5); | ||
}); | ||
}); | ||
|
||
describe("AvgSampleRate", () => { | ||
test("initializes with default values", () => { | ||
const sampler = new AvgSampleRate(); | ||
console.log(sampler); | ||
expect(sampler.clearFrequencySec).toEqual(30); | ||
expect(sampler.goalSampleRate).toEqual(10); | ||
expect(sampler.id).toBeUndefined(); | ||
expect(sampler.getSampleRateCalledTimes).toBeUndefined(); | ||
}); | ||
|
||
test("gets a sample rate", () => { | ||
const sampler = new AvgSampleRate(); | ||
expect(sampler.hasReceivedTraffic).toEqual(false); | ||
new Array(1500).fill(1).forEach(() => sampler.getSampleRate("my-key")); | ||
// manual tick is equal to "" | ||
sampler.timer.tick(); | ||
expect(sampler.hasReceivedTraffic).toEqual(true); | ||
const a = sampler.getSampleRate("my-key"); | ||
// No traffic means this is the goal sample rate | ||
expect(a).toEqual(10); | ||
sampler.timer.tick(); | ||
expect(sampler.getSampleRate("my-key")).toEqual(1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a blocker, but it takes me a fair amount of mental context threading to see how the log math above leads to this set of expectations... maybe a few more comments or something a little more verbose would be helpful? (The dynsampler-ruby equivalent feels more parseable somehow.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Cross that, the ruby test looks like we're reaching into the internals to set the rate and it's not actually triggering an Curious as to your opinion on the difference in the tests. This test feels like it approximates more real-world usage
Whereas the ruby test seems like:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Do you think adding the "stage" comments above is enough? or should I try to do something else. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yeah, fair point, agree that it's a bit nicer to focus on testing the public API. 👍for adding the stage comments for now and I'll noodle more on what my dream tests would look like. |
||
}); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
{ | ||
"name": "dynsampler", | ||
"version": "1.0.0", | ||
"description": "Dynamic sampling of events", | ||
"main": "lib/dynamic-sampler.js", | ||
"module": "lib/dynamic-sampler.m.js", | ||
"repository": "https://github.com/honeycombio/dynamic-sampler.js", | ||
"author": | ||
"Christopher Biscardi <[email protected]> (@chrisbiscardi)", | ||
"license": "Apache 2.0", | ||
"scripts": { | ||
"build": "microbundle --output lib --external all", | ||
"start": "microbundle watch --output lib --external all", | ||
"test": "jest", | ||
"precommit": "lint-staged" | ||
}, | ||
"lint-staged": { | ||
"*.{js,jsx}": ["prettier --parser flow --write", "git add"], | ||
"*.json": ["prettier --parser json --write", "git add"], | ||
"*.{graphql,gql}": ["prettier --parser graphql --write", "git add"], | ||
"*.{md,markdown}": ["prettier --parser markdown --write", "git add"], | ||
"*.{css,scss}": ["prettier --parser css --write", "git add"] | ||
}, | ||
"devDependencies": { | ||
"babel-core": "^6.26.0", | ||
"babel-jest": "^22.2.2", | ||
"babel-preset-env": "^1.6.1", | ||
"husky": "^0.14.3", | ||
"jest": "^22.3.0", | ||
"lint-staged": "^7.0.0", | ||
"microbundle": "^0.4.3", | ||
"prettier": "^1.10.2" | ||
}, | ||
"dependencies": { | ||
"debug": "^3.1.0", | ||
"nanotimer": "^0.3.15" | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Love all the code examples in the README. 😍
Not a blocker for shipping this at all, but at some point pulling in more of the verbiage from the dynsampler-go readme — esp the "what is sampling" bits from the top — could be handy. Want me to open issues for readme nice-to-haves like that? (The other one I'm thinking of is the answer to "can I use this in a browser?" 😳)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes please! Open issues and I'll knock em down.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yay, will do.