908 lines
39 KiB
JavaScript
Executable File
908 lines
39 KiB
JavaScript
Executable File
(function (factory) {
|
|
if (typeof define === 'function' && define.amd) {
|
|
// AMD. Register as an anonymous module.
|
|
define([], factory);
|
|
} else if (typeof exports === 'object') {
|
|
// Node. Does not work with strict CommonJS, but
|
|
// only CommonJS-like enviroments that support module.exports,
|
|
// like Node.
|
|
module.exports = factory();
|
|
} else {
|
|
// Browser globals
|
|
this.tdigest = factory();
|
|
}
|
|
}(function () {
|
|
var global = this, define;
|
|
function _require(id) {
|
|
var module = _require.cache[id];
|
|
if (!module) {
|
|
var exports = {};
|
|
module = _require.cache[id] = {
|
|
id: id,
|
|
exports: exports
|
|
};
|
|
_require.modules[id].call(exports, module, exports);
|
|
}
|
|
return module.exports;
|
|
}
|
|
_require.cache = [];
|
|
_require.modules = [
|
|
function (module, exports) {
|
|
module.exports = {
|
|
RBTree: _require(2),
|
|
BinTree: _require(1)
|
|
};
|
|
},
|
|
function (module, exports) {
|
|
var TreeBase = _require(3);
|
|
function Node(data) {
|
|
this.data = data;
|
|
this.left = null;
|
|
this.right = null;
|
|
}
|
|
Node.prototype.get_child = function (dir) {
|
|
return dir ? this.right : this.left;
|
|
};
|
|
Node.prototype.set_child = function (dir, val) {
|
|
if (dir) {
|
|
this.right = val;
|
|
} else {
|
|
this.left = val;
|
|
}
|
|
};
|
|
function BinTree(comparator) {
|
|
this._root = null;
|
|
this._comparator = comparator;
|
|
this.size = 0;
|
|
}
|
|
BinTree.prototype = new TreeBase(); // returns true if inserted, false if duplicate
|
|
// returns true if inserted, false if duplicate
|
|
BinTree.prototype.insert = function (data) {
|
|
if (this._root === null) {
|
|
// empty tree
|
|
this._root = new Node(data);
|
|
this.size++;
|
|
return true;
|
|
}
|
|
var dir = 0; // setup
|
|
// setup
|
|
var p = null; // parent
|
|
// parent
|
|
var node = this._root; // search down
|
|
// search down
|
|
while (true) {
|
|
if (node === null) {
|
|
// insert new node at the bottom
|
|
node = new Node(data);
|
|
p.set_child(dir, node);
|
|
ret = true;
|
|
this.size++;
|
|
return true;
|
|
} // stop if found
|
|
// stop if found
|
|
if (this._comparator(node.data, data) === 0) {
|
|
return false;
|
|
}
|
|
dir = this._comparator(node.data, data) < 0; // update helpers
|
|
// update helpers
|
|
p = node;
|
|
node = node.get_child(dir);
|
|
}
|
|
}; // returns true if removed, false if not found
|
|
// returns true if removed, false if not found
|
|
BinTree.prototype.remove = function (data) {
|
|
if (this._root === null) {
|
|
return false;
|
|
}
|
|
var head = new Node(undefined); // fake tree root
|
|
// fake tree root
|
|
var node = head;
|
|
node.right = this._root;
|
|
var p = null; // parent
|
|
// parent
|
|
var found = null; // found item
|
|
// found item
|
|
var dir = 1;
|
|
while (node.get_child(dir) !== null) {
|
|
p = node;
|
|
node = node.get_child(dir);
|
|
var cmp = this._comparator(data, node.data);
|
|
dir = cmp > 0;
|
|
if (cmp === 0) {
|
|
found = node;
|
|
}
|
|
}
|
|
if (found !== null) {
|
|
found.data = node.data;
|
|
p.set_child(p.right === node, node.get_child(node.left === null));
|
|
this._root = head.right;
|
|
this.size--;
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
};
|
|
module.exports = BinTree;
|
|
},
|
|
function (module, exports) {
|
|
var TreeBase = _require(3);
|
|
function Node(data) {
|
|
this.data = data;
|
|
this.left = null;
|
|
this.right = null;
|
|
this.red = true;
|
|
}
|
|
Node.prototype.get_child = function (dir) {
|
|
return dir ? this.right : this.left;
|
|
};
|
|
Node.prototype.set_child = function (dir, val) {
|
|
if (dir) {
|
|
this.right = val;
|
|
} else {
|
|
this.left = val;
|
|
}
|
|
};
|
|
function RBTree(comparator) {
|
|
this._root = null;
|
|
this._comparator = comparator;
|
|
this.size = 0;
|
|
}
|
|
RBTree.prototype = new TreeBase(); // returns true if inserted, false if duplicate
|
|
// returns true if inserted, false if duplicate
|
|
RBTree.prototype.insert = function (data) {
|
|
var ret = false;
|
|
if (this._root === null) {
|
|
// empty tree
|
|
this._root = new Node(data);
|
|
ret = true;
|
|
this.size++;
|
|
} else {
|
|
var head = new Node(undefined); // fake tree root
|
|
// fake tree root
|
|
var dir = 0;
|
|
var last = 0; // setup
|
|
// setup
|
|
var gp = null; // grandparent
|
|
// grandparent
|
|
var ggp = head; // grand-grand-parent
|
|
// grand-grand-parent
|
|
var p = null; // parent
|
|
// parent
|
|
var node = this._root;
|
|
ggp.right = this._root; // search down
|
|
// search down
|
|
while (true) {
|
|
if (node === null) {
|
|
// insert new node at the bottom
|
|
node = new Node(data);
|
|
p.set_child(dir, node);
|
|
ret = true;
|
|
this.size++;
|
|
} else if (is_red(node.left) && is_red(node.right)) {
|
|
// color flip
|
|
node.red = true;
|
|
node.left.red = false;
|
|
node.right.red = false;
|
|
} // fix red violation
|
|
// fix red violation
|
|
if (is_red(node) && is_red(p)) {
|
|
var dir2 = ggp.right === gp;
|
|
if (node === p.get_child(last)) {
|
|
ggp.set_child(dir2, single_rotate(gp, !last));
|
|
} else {
|
|
ggp.set_child(dir2, double_rotate(gp, !last));
|
|
}
|
|
}
|
|
var cmp = this._comparator(node.data, data); // stop if found
|
|
// stop if found
|
|
if (cmp === 0) {
|
|
break;
|
|
}
|
|
last = dir;
|
|
dir = cmp < 0; // update helpers
|
|
// update helpers
|
|
if (gp !== null) {
|
|
ggp = gp;
|
|
}
|
|
gp = p;
|
|
p = node;
|
|
node = node.get_child(dir);
|
|
} // update root
|
|
// update root
|
|
this._root = head.right;
|
|
} // make root black
|
|
// make root black
|
|
this._root.red = false;
|
|
return ret;
|
|
}; // returns true if removed, false if not found
|
|
// returns true if removed, false if not found
|
|
RBTree.prototype.remove = function (data) {
|
|
if (this._root === null) {
|
|
return false;
|
|
}
|
|
var head = new Node(undefined); // fake tree root
|
|
// fake tree root
|
|
var node = head;
|
|
node.right = this._root;
|
|
var p = null; // parent
|
|
// parent
|
|
var gp = null; // grand parent
|
|
// grand parent
|
|
var found = null; // found item
|
|
// found item
|
|
var dir = 1;
|
|
while (node.get_child(dir) !== null) {
|
|
var last = dir; // update helpers
|
|
// update helpers
|
|
gp = p;
|
|
p = node;
|
|
node = node.get_child(dir);
|
|
var cmp = this._comparator(data, node.data);
|
|
dir = cmp > 0; // save found node
|
|
// save found node
|
|
if (cmp === 0) {
|
|
found = node;
|
|
} // push the red node down
|
|
// push the red node down
|
|
if (!is_red(node) && !is_red(node.get_child(dir))) {
|
|
if (is_red(node.get_child(!dir))) {
|
|
var sr = single_rotate(node, dir);
|
|
p.set_child(last, sr);
|
|
p = sr;
|
|
} else if (!is_red(node.get_child(!dir))) {
|
|
var sibling = p.get_child(!last);
|
|
if (sibling !== null) {
|
|
if (!is_red(sibling.get_child(!last)) && !is_red(sibling.get_child(last))) {
|
|
// color flip
|
|
p.red = false;
|
|
sibling.red = true;
|
|
node.red = true;
|
|
} else {
|
|
var dir2 = gp.right === p;
|
|
if (is_red(sibling.get_child(last))) {
|
|
gp.set_child(dir2, double_rotate(p, last));
|
|
} else if (is_red(sibling.get_child(!last))) {
|
|
gp.set_child(dir2, single_rotate(p, last));
|
|
} // ensure correct coloring
|
|
// ensure correct coloring
|
|
var gpc = gp.get_child(dir2);
|
|
gpc.red = true;
|
|
node.red = true;
|
|
gpc.left.red = false;
|
|
gpc.right.red = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} // replace and remove if found
|
|
// replace and remove if found
|
|
if (found !== null) {
|
|
found.data = node.data;
|
|
p.set_child(p.right === node, node.get_child(node.left === null));
|
|
this.size--;
|
|
} // update root and make it black
|
|
// update root and make it black
|
|
this._root = head.right;
|
|
if (this._root !== null) {
|
|
this._root.red = false;
|
|
}
|
|
return found !== null;
|
|
};
|
|
function is_red(node) {
|
|
return node !== null && node.red;
|
|
}
|
|
function single_rotate(root, dir) {
|
|
var save = root.get_child(!dir);
|
|
root.set_child(!dir, save.get_child(dir));
|
|
save.set_child(dir, root);
|
|
root.red = true;
|
|
save.red = false;
|
|
return save;
|
|
}
|
|
function double_rotate(root, dir) {
|
|
root.set_child(!dir, single_rotate(root.get_child(!dir), !dir));
|
|
return single_rotate(root, dir);
|
|
}
|
|
module.exports = RBTree;
|
|
},
|
|
function (module, exports) {
|
|
function TreeBase() {
|
|
} // removes all nodes from the tree
|
|
// removes all nodes from the tree
|
|
TreeBase.prototype.clear = function () {
|
|
this._root = null;
|
|
this.size = 0;
|
|
}; // returns node data if found, null otherwise
|
|
// returns node data if found, null otherwise
|
|
TreeBase.prototype.find = function (data) {
|
|
var res = this._root;
|
|
while (res !== null) {
|
|
var c = this._comparator(data, res.data);
|
|
if (c === 0) {
|
|
return res.data;
|
|
} else {
|
|
res = res.get_child(c > 0);
|
|
}
|
|
}
|
|
return null;
|
|
}; // returns iterator to node if found, null otherwise
|
|
// returns iterator to node if found, null otherwise
|
|
TreeBase.prototype.findIter = function (data) {
|
|
var res = this._root;
|
|
var iter = this.iterator();
|
|
while (res !== null) {
|
|
var c = this._comparator(data, res.data);
|
|
if (c === 0) {
|
|
iter._cursor = res;
|
|
return iter;
|
|
} else {
|
|
iter._ancestors.push(res);
|
|
res = res.get_child(c > 0);
|
|
}
|
|
}
|
|
return null;
|
|
}; // Returns an iterator to the tree node at or immediately after the item
|
|
// Returns an iterator to the tree node at or immediately after the item
|
|
TreeBase.prototype.lowerBound = function (item) {
|
|
var cur = this._root;
|
|
var iter = this.iterator();
|
|
var cmp = this._comparator;
|
|
while (cur !== null) {
|
|
var c = cmp(item, cur.data);
|
|
if (c === 0) {
|
|
iter._cursor = cur;
|
|
return iter;
|
|
}
|
|
iter._ancestors.push(cur);
|
|
cur = cur.get_child(c > 0);
|
|
}
|
|
for (var i = iter._ancestors.length - 1; i >= 0; --i) {
|
|
cur = iter._ancestors[i];
|
|
if (cmp(item, cur.data) < 0) {
|
|
iter._cursor = cur;
|
|
iter._ancestors.length = i;
|
|
return iter;
|
|
}
|
|
}
|
|
iter._ancestors.length = 0;
|
|
return iter;
|
|
}; // Returns an iterator to the tree node immediately after the item
|
|
// Returns an iterator to the tree node immediately after the item
|
|
TreeBase.prototype.upperBound = function (item) {
|
|
var iter = this.lowerBound(item);
|
|
var cmp = this._comparator;
|
|
while (iter.data() !== null && cmp(iter.data(), item) === 0) {
|
|
iter.next();
|
|
}
|
|
return iter;
|
|
}; // returns null if tree is empty
|
|
// returns null if tree is empty
|
|
TreeBase.prototype.min = function () {
|
|
var res = this._root;
|
|
if (res === null) {
|
|
return null;
|
|
}
|
|
while (res.left !== null) {
|
|
res = res.left;
|
|
}
|
|
return res.data;
|
|
}; // returns null if tree is empty
|
|
// returns null if tree is empty
|
|
TreeBase.prototype.max = function () {
|
|
var res = this._root;
|
|
if (res === null) {
|
|
return null;
|
|
}
|
|
while (res.right !== null) {
|
|
res = res.right;
|
|
}
|
|
return res.data;
|
|
}; // returns a null iterator
|
|
// call next() or prev() to point to an element
|
|
// returns a null iterator
|
|
// call next() or prev() to point to an element
|
|
TreeBase.prototype.iterator = function () {
|
|
return new Iterator(this);
|
|
}; // calls cb on each node's data, in order
|
|
// calls cb on each node's data, in order
|
|
TreeBase.prototype.each = function (cb) {
|
|
var it = this.iterator(), data;
|
|
while ((data = it.next()) !== null) {
|
|
cb(data);
|
|
}
|
|
}; // calls cb on each node's data, in reverse order
|
|
// calls cb on each node's data, in reverse order
|
|
TreeBase.prototype.reach = function (cb) {
|
|
var it = this.iterator(), data;
|
|
while ((data = it.prev()) !== null) {
|
|
cb(data);
|
|
}
|
|
};
|
|
function Iterator(tree) {
|
|
this._tree = tree;
|
|
this._ancestors = [];
|
|
this._cursor = null;
|
|
}
|
|
Iterator.prototype.data = function () {
|
|
return this._cursor !== null ? this._cursor.data : null;
|
|
}; // if null-iterator, returns first node
|
|
// otherwise, returns next node
|
|
// if null-iterator, returns first node
|
|
// otherwise, returns next node
|
|
Iterator.prototype.next = function () {
|
|
if (this._cursor === null) {
|
|
var root = this._tree._root;
|
|
if (root !== null) {
|
|
this._minNode(root);
|
|
}
|
|
} else {
|
|
if (this._cursor.right === null) {
|
|
// no greater node in subtree, go up to parent
|
|
// if coming from a right child, continue up the stack
|
|
var save;
|
|
do {
|
|
save = this._cursor;
|
|
if (this._ancestors.length) {
|
|
this._cursor = this._ancestors.pop();
|
|
} else {
|
|
this._cursor = null;
|
|
break;
|
|
}
|
|
} while (this._cursor.right === save);
|
|
} else {
|
|
// get the next node from the subtree
|
|
this._ancestors.push(this._cursor);
|
|
this._minNode(this._cursor.right);
|
|
}
|
|
}
|
|
return this._cursor !== null ? this._cursor.data : null;
|
|
}; // if null-iterator, returns last node
|
|
// otherwise, returns previous node
|
|
// if null-iterator, returns last node
|
|
// otherwise, returns previous node
|
|
Iterator.prototype.prev = function () {
|
|
if (this._cursor === null) {
|
|
var root = this._tree._root;
|
|
if (root !== null) {
|
|
this._maxNode(root);
|
|
}
|
|
} else {
|
|
if (this._cursor.left === null) {
|
|
var save;
|
|
do {
|
|
save = this._cursor;
|
|
if (this._ancestors.length) {
|
|
this._cursor = this._ancestors.pop();
|
|
} else {
|
|
this._cursor = null;
|
|
break;
|
|
}
|
|
} while (this._cursor.left === save);
|
|
} else {
|
|
this._ancestors.push(this._cursor);
|
|
this._maxNode(this._cursor.left);
|
|
}
|
|
}
|
|
return this._cursor !== null ? this._cursor.data : null;
|
|
};
|
|
Iterator.prototype._minNode = function (start) {
|
|
while (start.left !== null) {
|
|
this._ancestors.push(start);
|
|
start = start.left;
|
|
}
|
|
this._cursor = start;
|
|
};
|
|
Iterator.prototype._maxNode = function (start) {
|
|
while (start.right !== null) {
|
|
this._ancestors.push(start);
|
|
start = start.right;
|
|
}
|
|
this._cursor = start;
|
|
};
|
|
module.exports = TreeBase;
|
|
},
|
|
function (module, exports) {
|
|
//
|
|
// TDigest:
|
|
//
|
|
// approximate distribution percentiles from a stream of reals
|
|
//
|
|
var RBTree = _require(0).RBTree;
|
|
function TDigest(delta, K, CX) {
|
|
// allocate a TDigest structure.
|
|
//
|
|
// delta is the compression factor, the max fraction of mass that
|
|
// can be owned by one centroid (bigger, up to 1.0, means more
|
|
// compression). delta=false switches off TDigest behavior and treats
|
|
// the distribution as discrete, with no merging and exact values
|
|
// reported.
|
|
//
|
|
// K is a size threshold that triggers recompression as the TDigest
|
|
// grows during input. (Set it to 0 to disable automatic recompression)
|
|
//
|
|
// CX specifies how often to update cached cumulative totals used
|
|
// for quantile estimation during ingest (see cumulate()). Set to
|
|
// 0 to use exact quantiles for each new point.
|
|
//
|
|
this.discrete = delta === false;
|
|
this.delta = delta || 0.01;
|
|
this.K = K === undefined ? 25 : K;
|
|
this.CX = CX === undefined ? 1.1 : CX;
|
|
this.centroids = new RBTree(compare_centroid_means);
|
|
this.nreset = 0;
|
|
this.reset();
|
|
}
|
|
TDigest.prototype.reset = function () {
|
|
// prepare to digest new points.
|
|
//
|
|
this.centroids.clear();
|
|
this.n = 0;
|
|
this.nreset += 1;
|
|
this.last_cumulate = 0;
|
|
};
|
|
TDigest.prototype.size = function () {
|
|
return this.centroids.size;
|
|
};
|
|
TDigest.prototype.toArray = function (everything) {
|
|
// return {mean,n} of centroids as an array ordered by mean.
|
|
//
|
|
var result = [];
|
|
if (everything) {
|
|
this._cumulate(true); // be sure cumns are exact
|
|
// be sure cumns are exact
|
|
this.centroids.each(function (c) {
|
|
result.push(c);
|
|
});
|
|
} else {
|
|
this.centroids.each(function (c) {
|
|
result.push({
|
|
mean: c.mean,
|
|
n: c.n
|
|
});
|
|
});
|
|
}
|
|
return result;
|
|
};
|
|
TDigest.prototype.summary = function () {
|
|
var approx = this.discrete ? 'exact ' : 'approximating ';
|
|
var s = [
|
|
approx + this.n + ' samples using ' + this.size() + ' centroids',
|
|
'min = ' + this.percentile(0),
|
|
'Q1 = ' + this.percentile(0.25),
|
|
'Q2 = ' + this.percentile(0.5),
|
|
'Q3 = ' + this.percentile(0.75),
|
|
'max = ' + this.percentile(1)
|
|
];
|
|
return s.join('\n');
|
|
};
|
|
function compare_centroid_means(a, b) {
|
|
// order two centroids by mean.
|
|
//
|
|
return a.mean > b.mean ? 1 : a.mean < b.mean ? -1 : 0;
|
|
}
|
|
function compare_centroid_mean_cumns(a, b) {
|
|
// order two centroids by mean_cumn.
|
|
//
|
|
return a.mean_cumn - b.mean_cumn;
|
|
}
|
|
TDigest.prototype.push = function (x, n) {
|
|
// incorporate value or array of values x, having count n into the
|
|
// TDigest. n defaults to 1.
|
|
//
|
|
n = n || 1;
|
|
x = Array.isArray(x) ? x : [x];
|
|
for (var i = 0; i < x.length; i++) {
|
|
this._digest(x[i], n);
|
|
}
|
|
};
|
|
TDigest.prototype.push_centroid = function (c) {
|
|
// incorporate centroid or array of centroids c
|
|
//
|
|
c = Array.isArray(c) ? c : [c];
|
|
for (var i = 0; i < c.length; i++) {
|
|
this._digest(c[i].mean, c[i].n);
|
|
}
|
|
};
|
|
TDigest.prototype._cumulate = function (exact) {
|
|
// update cumulative counts for each centroid
|
|
//
|
|
// exact: falsey means only cumulate after sufficient
|
|
// growth. During ingest, these counts are used as quantile
|
|
// estimates, and they work well even when somewhat out of
|
|
// date. (this is a departure from the publication, you may set CX
|
|
// to 0 to disable).
|
|
//
|
|
if (this.n === this.last_cumulate || !exact && this.CX && this.CX > this.n / this.last_cumulate) {
|
|
return;
|
|
}
|
|
var cumn = 0;
|
|
var self = this;
|
|
this.centroids.each(function (c) {
|
|
c.mean_cumn = cumn + c.n / 2; // half of n at the mean
|
|
// half of n at the mean
|
|
cumn = c.cumn = cumn + c.n;
|
|
});
|
|
this.n = this.last_cumulate = cumn;
|
|
};
|
|
TDigest.prototype.find_nearest = function (x) {
|
|
// find the centroid closest to x. The assumption of
|
|
// unique means and a unique nearest centroid departs from the
|
|
// paper, see _digest() below
|
|
//
|
|
if (this.size() === 0) {
|
|
return null;
|
|
}
|
|
var iter = this.centroids.lowerBound({ mean: x }); // x <= iter || iter==null
|
|
// x <= iter || iter==null
|
|
var c = iter.data() === null ? iter.prev() : iter.data();
|
|
if (c.mean === x || this.discrete) {
|
|
return c; // c is either x or a neighbor (discrete: no distance func)
|
|
}
|
|
// c is either x or a neighbor (discrete: no distance func)
|
|
var prev = iter.prev();
|
|
if (prev && Math.abs(prev.mean - x) < Math.abs(c.mean - x)) {
|
|
return prev;
|
|
} else {
|
|
return c;
|
|
}
|
|
};
|
|
TDigest.prototype._new_centroid = function (x, n, cumn) {
|
|
// create and insert a new centroid into the digest (don't update
|
|
// cumulatives).
|
|
//
|
|
var c = {
|
|
mean: x,
|
|
n: n,
|
|
cumn: cumn
|
|
};
|
|
this.centroids.insert(c);
|
|
this.n += n;
|
|
return c;
|
|
};
|
|
TDigest.prototype._addweight = function (nearest, x, n) {
|
|
// add weight at location x to nearest centroid. adding x to
|
|
// nearest will not shift its relative position in the tree and
|
|
// require reinsertion.
|
|
//
|
|
if (x !== nearest.mean) {
|
|
nearest.mean += n * (x - nearest.mean) / (nearest.n + n);
|
|
}
|
|
nearest.cumn += n;
|
|
nearest.mean_cumn += n / 2;
|
|
nearest.n += n;
|
|
this.n += n;
|
|
};
|
|
TDigest.prototype._digest = function (x, n) {
|
|
// incorporate value x, having count n into the TDigest.
|
|
//
|
|
var min = this.centroids.min();
|
|
var max = this.centroids.max();
|
|
var nearest = this.find_nearest(x);
|
|
if (nearest && nearest.mean === x) {
|
|
// accumulate exact matches into the centroid without
|
|
// limit. this is a departure from the paper, made so
|
|
// centroids remain unique and code can be simple.
|
|
this._addweight(nearest, x, n);
|
|
} else if (nearest === min) {
|
|
this._new_centroid(x, n, 0); // new point around min boundary
|
|
} else // new point around min boundary
|
|
if (nearest === max) {
|
|
this._new_centroid(x, n, this.n); // new point around max boundary
|
|
} else // new point around max boundary
|
|
if (this.discrete) {
|
|
this._new_centroid(x, n, nearest.cumn); // never merge
|
|
} else {
|
|
// never merge
|
|
// conider a merge based on nearest centroid's capacity. if
|
|
// there's not room for all of n, don't bother merging any of
|
|
// it into nearest, as we'll have to make a new centroid
|
|
// anyway for the remainder (departure from the paper).
|
|
var p = nearest.mean_cumn / this.n;
|
|
var max_n = Math.floor(4 * this.n * this.delta * p * (1 - p));
|
|
if (max_n - nearest.n >= n) {
|
|
this._addweight(nearest, x, n);
|
|
} else {
|
|
this._new_centroid(x, n, nearest.cumn);
|
|
}
|
|
}
|
|
this._cumulate(false);
|
|
if (!this.discrete && this.K && this.size() > this.K / this.delta) {
|
|
// re-process the centroids and hope for some compression.
|
|
this.compress();
|
|
}
|
|
};
|
|
TDigest.prototype.bound_mean = function (x) {
|
|
// find centroids lower and upper such that lower.mean < x <
|
|
// upper.mean or lower.mean === x === upper.mean. Don't call
|
|
// this for x out of bounds.
|
|
//
|
|
var iter = this.centroids.upperBound({ mean: x }); // x < iter
|
|
// x < iter
|
|
var lower = iter.prev(); // lower <= x
|
|
// lower <= x
|
|
var upper = lower.mean === x ? lower : iter.next();
|
|
return [
|
|
lower,
|
|
upper
|
|
];
|
|
};
|
|
TDigest.prototype.p_rank = function (x_or_xlist) {
|
|
// return approximate percentile-ranks (0..1) for data value x.
|
|
// or list of x. calculated according to
|
|
// https://en.wikipedia.org/wiki/Percentile_rank
|
|
//
|
|
// (Note that in continuous mode, boundary sample values will
|
|
// report half their centroid weight inward from 0/1 as the
|
|
// percentile-rank. X values outside the observed range return
|
|
// 0/1)
|
|
//
|
|
// this triggers cumulate() if cumn's are out of date.
|
|
//
|
|
var xs = Array.isArray(x_or_xlist) ? x_or_xlist : [x_or_xlist];
|
|
var ps = xs.map(this._p_rank, this);
|
|
return Array.isArray(x_or_xlist) ? ps : ps[0];
|
|
};
|
|
TDigest.prototype._p_rank = function (x) {
|
|
if (this.size() === 0) {
|
|
return undefined;
|
|
} else if (x < this.centroids.min().mean) {
|
|
return 0;
|
|
} else if (x > this.centroids.max().mean) {
|
|
return 1;
|
|
} // find centroids that bracket x and interpolate x's cumn from
|
|
// their cumn's.
|
|
// find centroids that bracket x and interpolate x's cumn from
|
|
// their cumn's.
|
|
this._cumulate(true); // be sure cumns are exact
|
|
// be sure cumns are exact
|
|
var bound = this.bound_mean(x);
|
|
var lower = bound[0], upper = bound[1];
|
|
if (this.discrete) {
|
|
return lower.cumn / this.n;
|
|
} else {
|
|
var cumn = lower.mean_cumn;
|
|
if (lower !== upper) {
|
|
cumn += (x - lower.mean) * (upper.mean_cumn - lower.mean_cumn) / (upper.mean - lower.mean);
|
|
}
|
|
return cumn / this.n;
|
|
}
|
|
};
|
|
TDigest.prototype.bound_mean_cumn = function (cumn) {
|
|
// find centroids lower and upper such that lower.mean_cumn < x <
|
|
// upper.mean_cumn or lower.mean_cumn === x === upper.mean_cumn. Don't call
|
|
// this for cumn out of bounds.
|
|
//
|
|
// XXX because mean and mean_cumn give rise to the same sort order
|
|
// (up to identical means), use the mean rbtree for our search.
|
|
this.centroids._comparator = compare_centroid_mean_cumns;
|
|
var iter = this.centroids.upperBound({ mean_cumn: cumn }); // cumn < iter
|
|
// cumn < iter
|
|
this.centroids._comparator = compare_centroid_means;
|
|
var lower = iter.prev(); // lower <= cumn
|
|
// lower <= cumn
|
|
var upper = lower && lower.mean_cumn === cumn ? lower : iter.next();
|
|
return [
|
|
lower,
|
|
upper
|
|
];
|
|
};
|
|
TDigest.prototype.percentile = function (p_or_plist) {
|
|
// for percentage p (0..1), or for each p in a list of ps, return
|
|
// the smallest data value q at which at least p percent of the
|
|
// observations <= q.
|
|
//
|
|
// for discrete distributions, this selects q using the Nearest
|
|
// Rank Method
|
|
// (https://en.wikipedia.org/wiki/Percentile#The_Nearest_Rank_method)
|
|
// (in scipy, same as percentile(...., interpolation='higher')
|
|
//
|
|
// for continuous distributions, interpolates data values between
|
|
// count-weighted bracketing means.
|
|
//
|
|
// this triggers cumulate() if cumn's are out of date.
|
|
//
|
|
var ps = Array.isArray(p_or_plist) ? p_or_plist : [p_or_plist];
|
|
var qs = ps.map(this._percentile, this);
|
|
return Array.isArray(p_or_plist) ? qs : qs[0];
|
|
};
|
|
TDigest.prototype._percentile = function (p) {
|
|
if (this.size() === 0) {
|
|
return undefined;
|
|
}
|
|
this._cumulate(true); // be sure cumns are exact
|
|
// be sure cumns are exact
|
|
var min = this.centroids.min();
|
|
var max = this.centroids.max();
|
|
var h = this.n * p;
|
|
var bound = this.bound_mean_cumn(h);
|
|
var lower = bound[0], upper = bound[1];
|
|
if (upper === lower || lower === null || upper === null) {
|
|
return (lower || upper).mean;
|
|
} else if (!this.discrete) {
|
|
return lower.mean + (h - lower.mean_cumn) * (upper.mean - lower.mean) / (upper.mean_cumn - lower.mean_cumn);
|
|
} else if (h <= lower.cumn) {
|
|
return lower.mean;
|
|
} else {
|
|
return upper.mean;
|
|
}
|
|
};
|
|
function pop_random(choices) {
|
|
// remove and return an item randomly chosen from the array of choices
|
|
// (mutates choices)
|
|
//
|
|
var idx = Math.floor(Math.random() * choices.length);
|
|
return choices.splice(idx, 1)[0];
|
|
}
|
|
TDigest.prototype.compress = function () {
|
|
// TDigests experience worst case compression (none) when input
|
|
// increases monotonically. Improve on any bad luck by
|
|
// reconsuming digest centroids as if they were weighted points
|
|
// while shuffling their order (and hope for the best).
|
|
//
|
|
if (this.compressing) {
|
|
return;
|
|
}
|
|
var points = this.toArray();
|
|
this.reset();
|
|
this.compressing = true;
|
|
while (points.length > 0) {
|
|
this.push_centroid(pop_random(points));
|
|
}
|
|
this._cumulate(true);
|
|
this.compressing = false;
|
|
};
|
|
function Digest(config) {
|
|
// allocate a distribution digest structure. This is an extension
|
|
// of a TDigest structure that starts in exact histogram (discrete)
|
|
// mode, and automatically switches to TDigest mode for large
|
|
// samples that appear to be from a continuous distribution.
|
|
//
|
|
this.config = config || {};
|
|
this.mode = this.config.mode || 'auto'; // disc, cont, auto
|
|
// disc, cont, auto
|
|
TDigest.call(this, this.mode === 'cont' ? config.delta : false);
|
|
this.digest_ratio = this.config.ratio || 0.9;
|
|
this.digest_thresh = this.config.thresh || 1000;
|
|
this.n_unique = 0;
|
|
}
|
|
Digest.prototype = Object.create(TDigest.prototype);
|
|
Digest.prototype.constructor = Digest;
|
|
Digest.prototype.push = function (x_or_xlist) {
|
|
TDigest.prototype.push.call(this, x_or_xlist);
|
|
this.check_continuous();
|
|
};
|
|
Digest.prototype._new_centroid = function (x, n, cumn) {
|
|
this.n_unique += 1;
|
|
TDigest.prototype._new_centroid.call(this, x, n, cumn);
|
|
};
|
|
Digest.prototype._addweight = function (nearest, x, n) {
|
|
if (nearest.n === 1) {
|
|
this.n_unique -= 1;
|
|
}
|
|
TDigest.prototype._addweight.call(this, nearest, x, n);
|
|
};
|
|
Digest.prototype.check_continuous = function () {
|
|
// while in 'auto' mode, if there are many unique elements, assume
|
|
// they are from a continuous distribution and switch to 'cont'
|
|
// mode (tdigest behavior). Return true on transition from
|
|
// disctete to continuous.
|
|
if (this.mode !== 'auto' || this.size() < this.digest_thresh) {
|
|
return false;
|
|
}
|
|
if (this.n_unique / this.size() > this.digest_ratio) {
|
|
this.mode = 'cont';
|
|
this.discrete = false;
|
|
this.delta = this.config.delta || 0.01;
|
|
this.compress();
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
module.exports = {
|
|
'TDigest': TDigest,
|
|
'Digest': Digest
|
|
};
|
|
}
|
|
];
|
|
return _require(4);
|
|
})); |