C#相关系数计算

Posted 何以解忧 `唯有暴富

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了C#相关系数计算相关的知识,希望对你有一定的参考价值。

相关系数是最早由统计学家卡尔·皮尔逊设计的统计指标,是研究变量之间线性相关程度的量,一般用字母 r 表示。由于研究对象的不同,相关系数有多种定义方式,较为常用的是皮尔逊相关系数

相关表和相关图可反映两个变量之间的相互关系及其相关方向,但无法确切地表明两个变量之间相关的程度。相关系数是用以反映变量之间相关关系密切程度的统计指标。相关系数是按积差方法计算,同样以两变量与各自平均值离差为基础,通过两个离差相乘来反映两变量之间相关程度;着重研究线性的单相关系数

需要说明的是,皮尔逊相关系数并不是唯一的相关系数,但是最常见的相关系数,以下解释都是针对皮尔逊相关系数。

依据相关现象之间的不同特征,其统计指标的名称有所不同。如将反映两变量间线性相关关系的统计指标称为相关系数(相关系数的平方称为判定系数);将反映两变量间曲线相关关系的统计指标称为非线性相关系数、非线性判定系数;将反映多元线性相关关系的统计指标称为复相关系数复判定系数等。

第一步

NuGet添加MathNet.Numerics.Statistics

第二步 代码

using MathNet.Numerics.LinearAlgebra;
using MathNet.Numerics.Statistics;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace General
{
    public static class Correlation
    {
        /// <summary>计算皮尔逊积差相关系数</summary>
        /// <param name="dataA">数据样本A.</param>
        /// <param name="dataB">数据样本B.</param>
        /// <returns>返回皮尔逊积差相关系数.</returns>
        public static double Pearson(IEnumerable<double> dataA, IEnumerable<double> dataB)
        {
            int n = 0;
            double r = 0.0;

            double meanA = 0;
            double meanB = 0;
            double varA = 0;
            double varB = 0;

            using (IEnumerator<double> ieA = dataA.GetEnumerator())
            using (IEnumerator<double> ieB = dataB.GetEnumerator())
            {
                while (ieA.MoveNext())
                {
                    if (!ieB.MoveNext())
                    {
                        throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
                    }

                    double currentA = ieA.Current;
                    double currentB = ieB.Current;

                    double deltaA = currentA - meanA;
                    double scaleDeltaA = deltaA / ++n;

                    double deltaB = currentB - meanB;
                    double scaleDeltaB = deltaB / n;

                    meanA += scaleDeltaA;
                    meanB += scaleDeltaB;

                    varA += scaleDeltaA * deltaA * (n - 1);
                    varB += scaleDeltaB * deltaB * (n - 1);
                    r += (deltaA * deltaB * (n - 1)) / n;
                }

                if (ieB.MoveNext())
                {
                    throw new ArgumentOutOfRangeException("dataA", Resources.ArgumentArraysSameLength);
                }
            }

            return r / Math.Sqrt(varA * varB);
        }

        /// <summary>计算加权皮尔逊积差相关系数.</summary>
        /// <param name="dataA">数据样本A.</param>
        /// <param name="dataB">数据样本B.</param>
        /// <param name="weights">数据权重.</param>
        /// <returns>加权皮尔逊积差相关系数.</returns>
        public static double WeightedPearson(IEnumerable<double> dataA, IEnumerable<double> dataB, IEnumerable<double> weights)
        {
            int n = 0;

            double meanA = 0;
            double meanB = 0;
            double varA = 0;
            double varB = 0;
            double sumWeight = 0;

            double covariance = 0;

            using (IEnumerator<double> ieA = dataA.GetEnumerator())
            using (IEnumerator<double> ieB = dataB.GetEnumerator())
            using (IEnumerator<double> ieW = weights.GetEnumerator())
            {
                while (ieA.MoveNext())
                {
                    if (!ieB.MoveNext())
                    {
                        throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
                    }
                    if (!ieW.MoveNext())
                    {
                        throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
                    }
                    ++n;

                    double xi = ieA.Current;
                    double yi = ieB.Current;
                    double wi = ieW.Current;

                    double temp = sumWeight + wi;

                    double deltaX = xi - meanA;
                    double rX = deltaX * wi / temp;
                    meanA += rX;
                    varA += sumWeight * deltaX * rX;

                    double deltaY = yi - meanB;
                    double rY = deltaY * wi / temp;
                    meanB += rY;
                    varB += sumWeight * deltaY * rY;

                    sumWeight = temp;

                    covariance += deltaX * deltaY * (n - 1) * wi / n;
                }
                if (ieB.MoveNext())
                {
                    throw new ArgumentOutOfRangeException("dataB", Resources.ArgumentArraysSameLength);
                }
                if (ieW.MoveNext())
                {
                    throw new ArgumentOutOfRangeException("weights", Resources.ArgumentArraysSameLength);
                }
            }
            return covariance / Math.Sqrt(varA * varB);
        }

        /// <summary>计算皮尔逊积差相关矩阵</summary>
        /// <param name="vectors">数据矩阵</param>
        /// <returns>皮尔逊积差相关矩阵.</returns>
        public static Matrix<double> PearsonMatrix(params double[][] vectors)
        {
            var m = Matrix<double>.Build.DenseIdentity(vectors.Length);
            for (int i = 0; i < vectors.Length; i++)
            {
                for (int j = i + 1; j < vectors.Length; j++)
                {
                    var c = Pearson(vectors[i], vectors[j]);
                    m.At(i, j, c);
                    m.At(j, i, c);
                }
            }

            return m;
        }

        /// <summary> 计算皮尔逊积差相关矩阵</summary>
        /// <param name="vectors">数据集合.</param>
        /// <returns>皮尔逊积差相关矩阵.</returns>
        public static Matrix<double> PearsonMatrix(IEnumerable<double[]> vectors)
        {
            return PearsonMatrix(vectors as double[][] ?? vectors.ToArray());
        }

        /// <summary>
        /// 斯皮尔曼等级相关系数
        /// </summary>
        /// <param name="dataA">数据集A.</param>
        /// <param name="dataB">数据集B.</param>
        /// <returns>斯皮尔曼等级相关系数.</returns>
        public static double Spearman(IEnumerable<double> dataA, IEnumerable<double> dataB)
        {
            return Pearson(Rank(dataA), Rank(dataB));
        }

        /// <summary>
        /// 斯皮尔曼等级相关矩阵
        /// Computes the Spearman Ranked Correlation matrix.
        /// </summary>
        /// <param name="vectors">数据集.</param>
        /// <returns>斯皮尔曼等级相关矩阵.</returns>
        public static Matrix<double> SpearmanMatrix(params double[][] vectors)
        {
            return PearsonMatrix(vectors.Select(Rank).ToArray());
        }

        /// <summary>计算斯皮尔曼等级相关矩阵</summary>
        /// <param name="vectors">数据集合.</param>
        /// <returns>斯皮尔曼等级相关矩阵.</returns>
        public static Matrix<double> SpearmanMatrix(IEnumerable<double[]> vectors)
        {
            return PearsonMatrix(vectors.Select(Rank).ToArray());
        }

        static double[] Rank(IEnumerable<double> series)
        {
            if (series == null)
            {
                return new double[0];
            }

            // WARNING: do not try to cast series to an array and use it directly,
            // as we need to sort it (inplace operation)

            var data = series.ToArray();
            return ArrayStatistics.RanksInplace(data, RankDefinition.Average);
        }
    }
}

 第三步 使用

//先生成数据集合data
var chiSquare = new ChiSquared(5);
Console.WriteLine(@"2. Generate 1000 samples of the ChiSquare(5) distribution");
var data = new double[1000];
for (var i = 0; i < data.Length; i++)
{
    data[i] = chiSquare.Sample();
}

//生成数据集合dataB
var chiSquareB = new ChiSquared(2);
var dataB = new double[1000];
for (var i = 0; i < data.Length; i++)
{
    dataB[i] = chiSquareB.Sample();
}

// 5. 计算data和dataB的相关系数
var r1 =  Correlation.Pearson(data, dataB);
var r2 = Correlation.Spearman(data, dataB);

以上是关于C#相关系数计算的主要内容,如果未能解决你的问题,请参考以下文章

皮尔森相关性系数的计算python代码(热力图版)

皮尔森相关性系数的计算python代码

计算矩阵的 Matthew 相关系数需要很长时间

皮尔森相关性系数的计算python代码

如何计算两变量的相关系数r?

相关系数r的计算公式是啥?