C#相关系数计算
Posted 何以解忧 `唯有暴富
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了C#相关系数计算相关的知识,希望对你有一定的参考价值。
相关系数是最早由统计学家卡尔·皮尔逊设计的统计指标,是研究变量之间线性相关程度的量,一般用字母 r 表示。由于研究对象的不同,相关系数有多种定义方式,较为常用的是皮尔逊相关系数。
相关表和相关图可反映两个变量之间的相互关系及其相关方向,但无法确切地表明两个变量之间相关的程度。相关系数是用以反映变量之间相关关系密切程度的统计指标。相关系数是按积差方法计算,同样以两变量与各自平均值的离差为基础,通过两个离差相乘来反映两变量之间相关程度;着重研究线性的单相关系数。
需要说明的是,皮尔逊相关系数并不是唯一的相关系数,但是最常见的相关系数,以下解释都是针对皮尔逊相关系数。
依据相关现象之间的不同特征,其统计指标的名称有所不同。如将反映两变量间线性相关关系的统计指标称为相关系数(相关系数的平方称为判定系数);将反映两变量间曲线相关关系的统计指标称为非线性相关系数、非线性判定系数;将反映多元线性相关关系的统计指标称为复相关系数、复判定系数等。
第一步
NuGet添加MathNet.Numerics.Statistics
第二步 代码
using MathNet.Numerics.LinearAlgebra;
using MathNet.Numerics.Statistics;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace General
{
public static class Correlation
{
/// <summary>计算皮尔逊积差相关系数</summary>
/// <param name="dataA">数据样本A.</param>
/// <param name="dataB">数据样本B.</param>
/// <returns>返回皮尔逊积差相关系数.</returns>
public static double Pearson(IEnumerable<double> dataA, IEnumerable<double> dataB)
{
int n = 0;
double r = 0.0;
double meanA = 0;
double meanB = 0;
double varA = 0;
double varB = 0;
using (IEnumerator<double> ieA = dataA.GetEnumerator())
using (IEnumerator<double> ieB = dataB.GetEnumerator())
{
while (ieA.MoveNext())
{
if (!ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
}
double currentA = ieA.Current;
double currentB = ieB.Current;
double deltaA = currentA - meanA;
double scaleDeltaA = deltaA / ++n;
double deltaB = currentB - meanB;
double scaleDeltaB = deltaB / n;
meanA += scaleDeltaA;
meanB += scaleDeltaB;
varA += scaleDeltaA * deltaA * (n - 1);
varB += scaleDeltaB * deltaB * (n - 1);
r += (deltaA * deltaB * (n - 1)) / n;
}
if (ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataA", Resources.ArgumentArraysSameLength);
}
}
return r / Math.Sqrt(varA * varB);
}
/// <summary>计算加权皮尔逊积差相关系数.</summary>
/// <param name="dataA">数据样本A.</param>
/// <param name="dataB">数据样本B.</param>
/// <param name="weights">数据权重.</param>
/// <returns>加权皮尔逊积差相关系数.</returns>
public static double WeightedPearson(IEnumerable<double> dataA, IEnumerable<double> dataB, IEnumerable<double> weights)
{
int n = 0;
double meanA = 0;
double meanB = 0;
double varA = 0;
double varB = 0;
double sumWeight = 0;
double covariance = 0;
using (IEnumerator<double> ieA = dataA.GetEnumerator())
using (IEnumerator<double> ieB = dataB.GetEnumerator())
using (IEnumerator<double> ieW = weights.GetEnumerator())
{
while (ieA.MoveNext())
{
if (!ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
}
if (!ieW.MoveNext())
{
throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
}
++n;
double xi = ieA.Current;
double yi = ieB.Current;
double wi = ieW.Current;
double temp = sumWeight + wi;
double deltaX = xi - meanA;
double rX = deltaX * wi / temp;
meanA += rX;
varA += sumWeight * deltaX * rX;
double deltaY = yi - meanB;
double rY = deltaY * wi / temp;
meanB += rY;
varB += sumWeight * deltaY * rY;
sumWeight = temp;
covariance += deltaX * deltaY * (n - 1) * wi / n;
}
if (ieB.MoveNext())
{
throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
}
if (ieW.MoveNext())
{
throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
}
}
return covariance / Math.Sqrt(varA * varB);
}
/// <summary>计算皮尔逊积差相关矩阵</summary>
/// <param name="vectors">数据矩阵</param>
/// <returns>皮尔逊积差相关矩阵.</returns>
public static Matrix<double> PearsonMatrix(params double[][] vectors)
{
var m = Matrix<double>.Build.DenseIdentity(vectors.Length);
for (int i = 0; i < vectors.Length; i++)
{
for (int j = i + 1; j < vectors.Length; j++)
{
var c = Pearson(vectors[i], vectors[j]);
m.At(i, j, c);
m.At(j, i, c);
}
}
return m;
}
/// <summary> 计算皮尔逊积差相关矩阵</summary>
/// <param name="vectors">数据集合.</param>
/// <returns>皮尔逊积差相关矩阵.</returns>
public static Matrix<double> PearsonMatrix(IEnumerable<double[]> vectors)
{
return PearsonMatrix(vectors as double[][] ?? vectors.ToArray());
}
/// <summary>
/// 斯皮尔曼等级相关系数
/// </summary>
/// <param name="dataA">数据集A.</param>
/// <param name="dataB">数据集B.</param>
/// <returns>斯皮尔曼等级相关系数.</returns>
public static double Spearman(IEnumerable<double> dataA, IEnumerable<double> dataB)
{
return Pearson(Rank(dataA), Rank(dataB));
}
/// <summary>
/// 斯皮尔曼等级相关矩阵
/// Computes the Spearman Ranked Correlation matrix.
/// </summary>
/// <param name="vectors">数据集.</param>
/// <returns>斯皮尔曼等级相关矩阵.</returns>
public static Matrix<double> SpearmanMatrix(params double[][] vectors)
{
return PearsonMatrix(vectors.Select(Rank).ToArray());
}
/// <summary>计算斯皮尔曼等级相关矩阵</summary>
/// <param name="vectors">数据集合.</param>
/// <returns>斯皮尔曼等级相关矩阵.</returns>
public static Matrix<double> SpearmanMatrix(IEnumerable<double[]> vectors)
{
return PearsonMatrix(vectors.Select(Rank).ToArray());
}
static double[] Rank(IEnumerable<double> series)
{
if (series == null)
{
return new double[0];
}
// WARNING: do not try to cast series to an array and use it directly,
// as we need to sort it (inplace operation)
var data = series.ToArray();
return ArrayStatistics.RanksInplace(data, RankDefinition.Average);
}
}
}
第三步 使用
//先生成数据集合data
var chiSquare = new ChiSquared(5);
Console.WriteLine(@"2. Generate 1000 samples of the ChiSquare(5) distribution");
var data = new double[1000];
for (var i = 0; i < data.Length; i++)
{
data[i] = chiSquare.Sample();
}
//生成数据集合dataB
var chiSquareB = new ChiSquared(2);
var dataB = new double[1000];
for (var i = 0; i < data.Length; i++)
{
dataB[i] = chiSquareB.Sample();
}
// 5. 计算data和dataB的相关系数
var r1 = Correlation.Pearson(data, dataB);
var r2 = Correlation.Spearman(data, dataB);
以上是关于C#相关系数计算的主要内容,如果未能解决你的问题,请参考以下文章