在 javascript 中将数组分箱以获得直方图

Posted

技术标签:

【中文标题】在 javascript 中将数组分箱以获得直方图【英文标题】:Binning an array in javascript for a histogram 【发布时间】:2016-09-23 13:13:52 【问题描述】:

我在 javascript 中有以下数组,我需要将其分成 20 个桶。数据值介于 0 和 1 之间,因此 bin 大小为 0.05。我觉得应该有一个函数接受两个参数,一个数组和一个 bin 大小,但我找不到一个。我知道 D3.js 有一些特性可以帮助构建这样一个数组,但我不知道哪个函数可能有帮助。

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]

【问题讨论】:

如果我能弄清楚这句话的意思,我会创造奇迹,但是......“数据值介于 0 和 1 之间,因此 bin 大小将是 0.05。”跨度> 查看数组,所有的值都在 0 到 1 的范围内,或者说没有低于 0 或高于 1。 是的,我注意到了,但是 bin size 是什么,bucket 是什么? ok bins 是数据组,如果你愿意的话,是类间隔。 bucket 是每组数据的常用统计短语 【参考方案1】:

随着 D3.js v6 的发布,d3.layout.histogram 已由 d3.bin() 改名为 superseded,现在属于 d3-array 模块。

要对您的数组进行分类,您需要创建一个直方图生成器:

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0,1]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

configure your thresholds 和您的垃圾箱还有更多可用选项,但此生成器将完全按照您的要求执行。您可以通过使用您的值数组调用生成器来将计算出的 bin 作为数组检索:

var bins = histGenerator(arr);

看看这个工作示例:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
];

var histGenerator = d3.bin()
  .domain([0,1])    // Set the domain to cover the entire intervall [0;]
  .thresholds(19);  // number of thresholds; this will create 19+1 bins

var bins = histGenerator(arr);
console.log(bins);
<script src="http://d3js.org/d3.v6.js"></script>

【讨论】:

【参考方案2】:

您可以使用一些 JS 自己制作垃圾箱:

var arr = [
  "0.362743", "0.357969", "0.356322", "0.355757", "0.358511",
  "0.357218", "0.356696", "0.354579", "0.828295", "0.391186",
  "0.378577", "0.39372", "0.396416", "0.395641", "0.37573",
  "0.379666", "0.377443", "0.391842", "0.402021", "0.377516",
  "0.38936", "0.38936", "0.400883", "0.393171", "0.374419",
  "0.400821", "0.380502", "0.396098", "0.388256", "0.398968",
  "0.392525", "0.401858", "0.387297", "0.376471", "0.378183",
  "0.379787", "0.382024", "0.387928", "0.395367", "0.391972",
  "0.381295", "0.391183", "0.383598", "0.386424", "0.384338",
  "0.401834", "0.406253", "0.392854", "0.399266", "0.400804",
  "0.391146", "0.395441", "0.396265", "0.397894", "0.384822",
  "0.385181", "0.395443", "0.400981", "0.401716", "0.406633",
  "0.406887", "0.40694", "0.391219", "0.387946", "0.398858",
  "0.402233", "0.388583", "0.389772", "0.397084", "0.711566",
  "0.954557", "0.524007", "0.672288", "0.668441", "0.421726",
  "0.549536", "0.932952", "0.397851", "0.395536", "0.354818",
  "0.374355", "0.375257", "0.362613", "0.391271", "0.379219",
  "0.363316", "0.866006", "0.862254", "0.864403", "0.861346",
  "0.845225", "0.784467", "0.801275", "0.638579", "0.847282",
  "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"
]

var bins = [];
var binCount = 0;
var interval = .05;
var numOfBuckets = 1;

//Setup Bins
for(var i = 0; i < numOfBuckets; i += interval)
  bins.push(
    binNum: binCount,
    minNum: i,
    maxNum: i + interval,
    count: 0
  )
  binCount++;


//Loop through data and add to bin's count
for (var i = 0; i < arr.length; i++)
  var item = arr[i];
  for (var j = 0; j < bins.length; j++)
    var bin = bins[j];
    if(item > bin.minNum && item <= bin.maxNum)
      bin.count++;
      break;  // An item can only be in one bin.
    
    

https://jsbin.com/keropoyadu/edit?js,output

【讨论】:

【参考方案3】:

您想要的功能是直方图布局。你可以这样做:

var data = d3.layout.histogram()
    .bins(20)
    (arr);

这只是一个一般示例,您必须调整值。查看文档:https://github.com/d3/d3/wiki/Histogram-Layout

【讨论】:

【参考方案4】:

d3js 库有一个 d3.layout.histogram() 函数,该函数返回一个直方图布局对象,用于将数据分组到 bin 中。布局对象既是对象又是函数。您可以调用布局对象上的方法来设置所需的布局行为。然后,您可以调用布局对象将数据分组到一个 bin 数组中。每个 bin 是一个值数组。每个 bin 都有 x, dx, dy 的附加属性。

例如,下面的代码会将数据分组到 20 个 bin 中,覆盖从 0 到 1 的范围。

var arr = ["0.362743", "0.357969", "0.356322", "0.355757", "0.358511", "0.357218", "0.356696", "0.354579", "0.828295", "0.391186", "0.378577", "0.39372", "0.396416", "0.395641", "0.37573", "0.379666", "0.377443", "0.391842", "0.402021", "0.377516", "0.38936", "0.38936", "0.400883", "0.393171", "0.374419", "0.400821", "0.380502", "0.396098", "0.388256", "0.398968", "0.392525", "0.401858", "0.387297", "0.376471", "0.378183", "0.379787", "0.382024", "0.387928", "0.395367", "0.391972", "0.381295", "0.391183", "0.383598", "0.386424", "0.384338", "0.401834", "0.406253", "0.392854", "0.399266", "0.400804", "0.391146", "0.395441", "0.396265", "0.397894", "0.384822", "0.385181", "0.395443", "0.400981", "0.401716", "0.406633", "0.406887", "0.40694", "0.391219", "0.387946", "0.398858", "0.402233", "0.388583", "0.389772", "0.397084", "0.711566", "0.954557", "0.524007", "0.672288", "0.668441", "0.421726", "0.549536", "0.932952", "0.397851", "0.395536", "0.354818", "0.374355", "0.375257", "0.362613", "0.391271", "0.379219", "0.363316", "0.866006", "0.862254", "0.864403", "0.861346", "0.845225", "0.784467", "0.801275", "0.638579", "0.847282", "0.847402", "0.847747", "0.790411", "0.835979", "0.838546"];
var bins = d3.layout.histogram()  // create layout object
    .bins(20)       // to use 20 bins
    .range([0, 1])  // to cover range from 0 to 1
    (arr);          // group the data into the bins

代码运行后...

bins[i] is an array of values in the ith bin
bins[i].x is the lower bounds of the ith bin
bins[i].dx is the width of the ith bin
bins[i].x + bins[i].dx is the upper bounds of the ith bin
bins[i].y is the number of values in the ith bin

直方图布局对象的文档位于...

https://github.com/d3/d3/wiki/Histogram-Layout

注意:默认情况下,布局对象将字符串值转换为数字值。因此,布局函数将使用您的字符串值。

【讨论】:

以上是关于在 javascript 中将数组分箱以获得直方图的主要内容,如果未能解决你的问题,请参考以下文章

使用分箱 X 值 Python 制作条形图

在 Spark 中创建分箱直方图

在直方图上叠加数据的一致方式(从 geom_histogram 中提取分箱数据?)

R语言plotly可视化:plotly可视化多个数据集归一化直方图(historgram)设置不同的直方图使用不同的分箱大小(bin size)在直方图的底部边缘添加边缘轴须图rug

R语言plotly可视化:可视化多个数据集归一化直方图(historgram)并在直方图中添加密度曲线kde设置不同的直方图使用不同的分箱大小(bin size)在直方图的底部边缘添加边缘轴须图

在 JavaScript 中将数组转换为对象