Spark MLlib之使用Breeze操作矩阵向量

Posted dy9776

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Spark MLlib之使用Breeze操作矩阵向量相关的知识,希望对你有一定的参考价值。

这下面的练习中,需要自己将spark的jar包 添加进来。

在使用Breeze 库时,需要导入相关包:

import breeze.linalg._

import breeze.numerics._

  

具体练习如下:

package leaning

import breeze.linalg._
import breeze.numerics._
import breeze.stats.distributions.Rand

/**
  * Created by dy9776 on 2017/12/5.
  */


object Practise_breeze{
  def main(args: Array[String]) {
    val matrix: DenseMatrix[Double] = DenseMatrix.zeros[Double](3,2)
    println(matrix)
    /*
      0.0  0.0
      0.0  0.0
      0.0  0.0
    */
    //全0向量
    val testVector: DenseVector[Double] = DenseVector.zeros[Double](2)
    println(testVector)

    //全1向量
    val allOneVector=DenseVector.ones[Double](2)
    println(allOneVector)


    //按数值填充向量
    val haveNumberFill =DenseVector.fill[Double](3,2)
    println(haveNumberFill)

    //生成随机向量
    val rangeNUm= DenseVector.range(1,  10 , 2)//DenseVector(1, 3, 5, 7, 9)
    val rangeNUmD= DenseVector.rangeD(1,  9 , 2)//DenseVector(1.0, 3.0, 5.0, 7.0)
    val rangeNUmF= DenseVector.rangeF(1,  7 , 2)//DenseVector(1.0, 3.0, 5.0)
    println(rangeNUm)
    println(rangeNUmD)
    println(rangeNUmF)

    //单位矩阵
    val unitMatrix=DenseMatrix.eye[Double](4)
//    println(unitMatrix)
     /*
     1.0  0.0  0.0  0.0
     0.0  1.0  0.0  0.0
     0.0  0.0  1.0  0.0
     0.0  0.0  0.0  1.0
      */

    //对角矩阵
    val doubleVecoter=diag(DenseVector(3.0, 4.0 , 5.0))
//    println(doubleVecoter)
    /*
    3.0  0.0  0.0
    0.0  4.0  0.0
    0.0  0.0  5.0
     */

    //按照行创建矩阵
    val byRowCreateMatrix= DenseMatrix( (4.0, 5.0, 6.0 ) , (7.0 ,8.0 ,9.0))
//    println(byRowCreateMatrix)
    /*
    4.0  5.0  6.0
    7.0  8.0  9.0
     */

    //按照行创建向量
    val denseCreateVector = DenseVector((4.0, 5.0, 6.0, 7.0, 8.0, 9.0))
//    println(denseCreateVector) ///DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)

    //向量装置
    val vectorTranspostion= DenseVector( (4.0, 5.0, 6.0, 7.0, 8.0, 9.0) ).t
    println(vectorTranspostion)//Transpose(DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)))

    //从函数创建向量
    val funCreateVector=DenseVector.tabulate(5)(i=> i*i)
    println(funCreateVector)//DenseVector(0, 1, 4, 9, 16)
    val funCreateVector2=DenseVector.tabulate( 0 to 5)(i=> i*i)
    println(funCreateVector2)//DenseVector(0, 1, 4, 9, 16, 25)

    //从函数创建矩阵
    val createFuncMatrix= DenseMatrix.tabulate(3, 4) {
      case (i ,j ) => i*i + j*j
    }
//    println(createFuncMatrix)
    /*
       0  1  4  9
       1  2  5  10
       4  5  8  13
     */

    //从数组创建矩阵
    val createFunctionMatrix= new DenseMatrix[Double](3, 2, Array(1.0, 4.0, 7.0, 3.0, 6.0, 9.0))
//    println(createFunctionMatrix)
    /*
    1.0  3.0
    4.0  6.0
    7.0  9.0
     */

    //0 到 1的随机向量
    val formZeroToOneRandomVector= DenseVector.rand( 9, Rand.uniform)
    println(formZeroToOneRandomVector)
//    DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)
    val formZeroToOneRandomVector2= DenseVector.rand( 9, Rand.uniform)
    println(formZeroToOneRandomVector2)
//DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)


    //0 到 1 的随机矩阵
    val formZeroToOneRandomMatrix= DenseMatrix.rand(3, 2, Rand.uniform)
    println(formZeroToOneRandomMatrix)
    /*
    0.8036324612618653  0.538112087890035
    0.6864375371630702  0.3123993272549075
    0.9458628172312897  0.01137554621536796
     */
    val formZeroToOneRandomMatrix2=DenseMatrix.rand(3, 2, Rand.gaussian)
    println(formZeroToOneRandomMatrix2)
    /*
      0.9510499901472648   0.287812938654061
      -0.5266499883462216  0.9380426076781263
      -0.3959295333472151  -0.9057610233257112
     */

    //Breeze元素访问
    val a = new DenseVector[Int](Array(1 to 20 : _*))
    println(a)//DenseVector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)

    //指定位置

    println(a(0)) //1

    //向量子集
    println( a(1 to 4) )//DenseVector(2, 3, 4, 5)
    println( a(1 until 4) )//DenseVector(2, 3, 4)

    //指定开始位置至结尾
    println( a(1 to -1) )//DenseVector(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)

    //按照指定步长去子集  这个是倒序方式
    println( a(5 to 0 by -1) )//DenseVector(6, 5, 4, 3, 2, 1)


    //最后一个元素
    println( a(-1)) //20


    val m = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
    println(m)
    /*
    1.0  2.0  3.0
    4.0  5.0  6.0
     */

    //指定位置
    println( m(0 ,1) ) //2.0

    //矩阵指定列
    println( m(:: ,1) ) // DenseVector(2.0, 5.0)


//Breeze元素操作

    //调整矩阵形状
    val justAdjustMatrix =m.reshape(3, 2)
    println(justAdjustMatrix)
    /*
    DenseVector(2.0, 5.0)
    1.0  5.0
    4.0  3.0
    2.0  6.0
     */

    //矩阵转成向量
    val toVector=m.toDenseVector
    println(toVector)//DenseVector(1.0, 4.0, 2.0, 5.0, 3.0, 6.0)
    println(toVector.toDenseMatrix)//1.0  4.0  2.0  5.0  3.0  6.0

    //复制下三角
    println(lowerTriangular(m))//
    /*
    1.0  0.0
    4.0  5.0
     */

    //复制上三角
    println(upperTriangular(m))
    /*
    1.0  2.0
    0.0  5.0
     */

    //矩阵复制
    println(m.copy)
    //m 原始为这样的
    //    1.0  2.0
    //    0.0  5.0


    //取对角线元素
    println(diag(upperTriangular(m)))
    //DenseVector(1.0, 5.0)   个人觉得很怪异,不是应该为(1.0, 6.0)吗?上面上、下三角 也好像出乎的意料


    //子集赋数值
    println(a(1 to 4 ):=5)
    //(1.0, 5.0)

    //子集赋向量
    println( a(1 to 4):=DenseVector(1,2,3,4) )
      //DenseVector(1, 2, 3, 4)

    println(m)
    //矩阵赋值
//    println( m( 1 to 2, 1 to 2) := 0.0 )
//Exception in thread "main" java.lang.IndexOutOfBoundsException: Row slice of Range(1, 2) was bigger than matrix rows of 2
    println("-==========m1================-")
    println( m( 0 to 1, 1 to 2) := 0.0 )
    println("-==========m================-")
    println(m)
    println("-==========m end================-")
    /*
    -==========m1================-
    0.0  0.0
    0.0  0.0
    -==========m================-
    0.0  0.0  3.0
    0.0  0.0  6.0
    -==========m end================-
    */

    //矩阵列赋值
    val re=m(::, 2) := 5.0
    println(re.toDenseMatrix)
    //5.0  5.0


    val a1 = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
    val a2 = DenseMatrix((7.0, 8.0, 9.0), (10.0, 11.0, 12.0))


    //垂直连接矩阵
    val verticalLike=DenseMatrix.vertcat(a1, a2)
    println(verticalLike)
    println("-==========================-")
    /*
    1.0   2.0   3.0
    4.0   5.0   6.0
    7.0   8.0   9.0
    10.0  11.0  12.0
     */

    //横向连接矩阵
    val twoMatrixConn=DenseMatrix.horzcat( a1, a2)
    println(twoMatrixConn)
    println("-==========================-")
/*
1.0  2.0  3.0  7.0   8.0   9.0
4.0  5.0  6.0  10.0  11.0  12.0
 */

    //向量的连接
    val connnectVector1=DenseVector.vertcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))
    val connnectVector2=DenseVector.horzcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))

    println(connnectVector1)//DenseVector(20, 21, 22, 23, 24, 25)
    println(connnectVector2)
    /*
      20  23
      21  24
      22  25
     */


//Breeze数值计算函数
    //元素加法
    println(a1 + a2)
    /*
      8.0   10.0  12.0
      14.0  16.0  18.0
     */

    //元素乘法
    println(a1 :* a2)
    /*
      7.0   16.0  27.0
      40.0  55.0  72.0
     */

    //元素除法
    println(a1 :/ a2)
    /*
      0.14285714285714285  0.25                 0.3333333333333333
      0.4                  0.45454545454545453  0.5
     */

    //元素比较
    println(a1 :< a2)
    /*
      true  true  true
      true  true  true
     */

    //元素相等
    println(a1 :== a2)
    /*
      false  false  false
      false  false  false
     */

    //元素追加
    println(a1 :+=2.0)
    /*
      3.0  4.0  5.0
      6.0  7.0  8.0
     */

    //元素追乘
    println(a1 :*=2.0)
    /*
      6.0   8.0   10.0
      12.0  14.0  16.0
     */

    //向量点积
    val vectorDot=DenseVector(1, 2, 3, 4) dot DenseVector(1, 1, 1, 1)
    println(vectorDot)//10

    //元素最大值
    println(max(a1))//16.0

    //元素最小值
    println(min(a1))//6.0

    //元素最大值的位置
    println(argmax(a1))// (1,2)

    //元素最小值的位置
    println(argmin(a1))// (0,0)

    //Breeze求和函数

    val m1 = DenseMatrix((1.0, 2.0, 3.0, 4.0), (5.0, 6.0, 7.0, 8.0), (9.0, 10.0, 11.0, 12.0))
    println(m1)
    /*
        1.0  2.0   3.0   4.0
        5.0  6.0   7.0   8.0
        9.0  10.0  11.0  12.0
     */

    println("-==========================-")
    //元素求和
    println(sum(m1))//78.0

    //每一列求和
    println(sum(m1, Axis._0))//res59: breeze.linalg.DenseMatrix[Double] = 15.0  18.0  21.0  24.0

    //每一行求和
    println(sum(m1, Axis._1))//res60: breeze.linalg.DenseVector[Double] = DenseVector(10.0, 26.0, 42.0)

    //对角线元素和
     println(trace(lowerTriangular(m1)))// res61: Double = 18.0

    //累积和
    val a3 = new DenseVector[Int](Array(10 to 20: _*))
    println(accumulate(a3)) // DenseVector(10, 21, 33, 46, 60, 75, 91, 108, 126, 145, 165)


//Breeze布尔函数

    val c = DenseVector(true, false, true)
    val d = DenseVector(false, true, true)
    //元素与操作
    println(c :& d) // DenseVector(false, false, true)

    //元素或操作
    println(c :| d) //DenseVector(true, true, true)

    //元素非操作
    println(!c) //DenseVector(false, true, false)


    val e = DenseVector[Int](-3, 0, 2)


    //存在非零元素
    println(any(e)) //true

    //所有元素非零
    println(all(e)) //false

//Breeze线性代数函数
    val f = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0))
    val g = DenseMatrix((1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0))

    //线性求解,AX = B,求解X
    println(f \ g)
      /* breeze.linalg.DenseMatrix[Double] =
        -2.5  -2.5  -2.5
        4.0   4.0   4.0
        -1.5  -1.5  -1.5
       */

    //转置
    println(f.t)
    /* breeze.linalg.DenseMatrix[Double] =
      1.0  4.0  7.0
      2.0  5.0  8.0
      3.0  6.0  9.0
     */

    //求特征值
    println(det(f)) // Double = 6.661338147750939E-16

    //求逆
    println(inv(f))
    /*
      -4.503599627370499E15  9.007199254740992E15    -4.503599627370495E15
      9.007199254740998E15   -1.8014398509481984E16  9.007199254740991E15
      -4.503599627370498E15  9.007199254740992E15    -4.5035996273704955E15
     */

    //求伪逆
    println(pinv(f))
    /*
      -3.7720834019330525E14  7.544166803866101E14    -3.77208340193305E14
      7.544166803866094E14    -1.5088333607732208E15  7.544166803866108E14
      -3.772083401933041E14   7.544166803866104E14    -3.772083401933055E14
     */

    //特征值和特征向量
    println(eig(f))
    /*
    Eig(DenseVector(16.116843969807043, -1.1168439698070427, -1.3036777264747022E-15),DenseVector(0.0, 0.0, 0.0),-0.23197068724628617  -0.7858302387420671   0.40824829046386363
        -0.5253220933012336   -0.08675133925662833  -0.816496580927726
         -0.8186734993561815   0.61232756022881      0.4082482904638625
        )
     */


    //奇异值分解
    val svd.SVD(u,s,v) = svd(g)
    println(u)
    /*
      -0.5773502691896255  -0.5773502691896257  -0.5773502691896256
      -0.5773502691896256  -0.2113248654051871  0.7886751345948126
      -0.5773502691896256  0.7886751345948129   -0.21132486540518708
     */
    println("==============================")
    println(s) //DenseVector(3.0000000000000004, 0.0, 0.0)
    println("==============================")
    println(v)
    /*
      -0.5773502691896256  -0.5773502691896257  -0.5773502691896256
      0.0                  -0.7071067811865474  0.7071067811865477
      0.816496580927726    -0.4082482904638629  -0.4082482904638628
     */


    //求矩阵的秩
    println(rank(f))  //2

    //矩阵长度
    println(f.size) //9

    //矩阵行数
    println(f.rows) // 3

    //矩阵列数
    f.cols // 3


//Breeze取整函数

    val h = DenseVector(-1.2, 0.7, 2.3) // breeze.linalg.DenseVector[Double] = DenseVector(-1.2, 0.7, 2.3)

    //四舍五入
    println( round(h)  ) // breeze.linalg.DenseVector[Long] = DenseVector(-1, 1, 2)

    //大于它的最小整数
    println( ceil(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 3.0)

    //小于它的最大整数
    println( floor(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-2.0, 0.0, 2.0)

    //符号函数
    println( signum(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 1.0)

    //取正数
    println( abs(h) )  // breeze.linalg.DenseVector[Double] = DenseVector(1.2, 0.7, 2.3)


  }

}

  

 

以上是关于Spark MLlib之使用Breeze操作矩阵向量的主要内容,如果未能解决你的问题,请参考以下文章

Spark MLlib数据类型

Spark MLlib数据类型

Spark MLlib数据类型

spark-mllib 密集向量和稀疏向量

机器学习 spark.mllib 数据类型学习

如何利用Spark MLlib进行个性推荐?