Spark-Core练习题(身高排序,平均年龄,所有姓氏,每月最大天数三人,相同生日的人)
Posted Mr.zhou_Zxy
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Spark-Core练习题(身高排序,平均年龄,所有姓氏,每月最大天数三人,相同生日的人)相关的知识,希望对你有一定的参考价值。
1.已知学生数据如下:请用spark core完成下列需求
班级 学号 性别 姓名 出生年月 血型 家庭地址 身高 手机号
RB171 RB17101 男 张** 1997-02-10 AB 河南省郑州市1号 172 11122223333
RB171 RB17102 女 冯** 1996-10-01 A 河南省洛阳市2号 175 18837110115
RB171 RB17103 男 卢** 1998-08-02 B 河南省开封市3号 165 19999228822
RB171 RB17104 男 杨** 1996-08-09 A 河南省安阳市4号 168 13322554455
RB172 RB17201 女 姜** 1997-01-03 A 河南省鹤壁市1号 170 13688552244
RB172 RB17282 男 高* 1996-08-27 B 河南省新乡市2号 171 13522114455
RB173 RB17203 女 何* 1997-12-20 B 河南省焦作市3号 168 13566998855
1.1按照身高排序
package com.zxy.spark.Streaming.day004
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object demo4 {
def main(args: Array[String]): Unit = {
val sparkContext = new SparkContext(new SparkConf().setAppName("demo4").setMaster("local[*]"))
val linesRDD: RDD[String] = sparkContext.textFile("date/student.txt")
val StudentRDD: RDD[(Int, (String, String, String, String, String, String, String, String))] = linesRDD.map(line => {
val fields: Array[String] = line.split("\\\\s+")
val ClassName: String = fields(0)
val StuID: String = fields(1)
val Sex: String = fields(2)
val Name: String = fields(3)
val Brithday: String = fields(4)
val TypeOfRed: String = fields(5)
val Adress: String = fields(6)
val Height: Int = fields(7).toInt
val PhoneNumber: String = fields(8)
(Height, (ClassName, StuID, Sex, Name, Brithday, TypeOfRed, Adress, PhoneNumber))
})
val resRDD: RDD[(Int, (String, String, String, String, String, String, String, String))] = StudentRDD.sortByKey(false,1)
resRDD.foreach(println)
sparkContext.stop()
}
}
1.2求平均年龄
package com.zxy.spark.Streaming.day004
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object demo4 {
def main(args: Array[String]): Unit = {
val sparkContext = new SparkContext(new SparkConf().setAppName("demo4").setMaster("local[*]"))
val linesRDD: RDD[String] = sparkContext.textFile("date/student.txt")
val ageRDD: RDD[Int] = linesRDD.map(line => {
val fields: Array[String] = line.split("\\\\s+")
val Brithday: String = fields(4)
val fields: Array[String] = Brithday.split("-")
val year = fields(0).toInt
val age = 2021 - year
(age)
})
val num: Long = ageRDD.count()
val sum: Double = ageRDD.sum()
val avgage = (sum / num).toInt
println(s"$sum -> $num")
println(s"$avgage")
sparkContext.stop()
}
}
1.3求学生中出现的所有姓氏
package com.zxy.spark.Streaming.day004
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object demo4 {
def main(args: Array[String]): Unit = {
val sparkContext = new SparkContext(new SparkConf().setAppName("demo4").setMaster("local[*]"))
val linesRDD: RDD[String] = sparkContext.textFile("date/student.txt")
val firstName: RDD[String] = linesRDD.map(line => {
val fields: Array[String] = line.split("\\\\s+")
val Name: String = fields(3)
val firstName: String = Name.substring(0, 1)
(firstName)
})
val thefirstName: RDD[String] = firstName.distinct()
thefirstName.foreach(println)
sparkContext.stop()
}
}
1.4返回出生在每月最大天数的3人
package com.zxy.spark.Streaming.day004
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object demo4 {
def main(args: Array[String]): Unit = {
val sparkContext = new SparkContext(new SparkConf().setAppName("demo4").setMaster("local[*]"))
val linesRDD: RDD[String] = sparkContext.textFile("date/student.txt")
val studentRDD: RDD[(String, String)] = linesRDD.map(line => {
val fields: Array[String] = line.split("\\\\s+")
val Name: String = fields(3)
val Brithday: String = fields(4)
(Name, Brithday)
})
val monthRDD: RDD[(String, (String, Int))] = studentRDD.map(info => {
val brithday: String = info._2
val fields: Array[String] = brithday.split("-")
val month: String = fields(1)
val day: Int = fields(2).toInt
val days = 30 - day
(month, (info._1, days))
})
val gbkRDD: RDD[(String, Iterable[(String, Int)])] = monthRDD.groupByKey()
gbkRDD.foreach(println)
val resRDD: RDD[(String, List[(String, Int)])] = gbkRDD.map(line => {
val arrs = line._2
val tuples: List[(String, Int)] = arrs.toList.sorted.reverse.take(3)
(line._1, tuples)
})
resRDD.foreach(println)
sparkContext.stop()
}
}
1.5索引出相同生日下同学的姓名链表
package com.zxy.spark.Streaming.day004
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object demo4 {
def main(args: Array[String]): Unit = {
val sparkContext = new SparkContext(new SparkConf().setAppName("demo4").setMaster("local[*]"))
val linesRDD: RDD[String] = sparkContext.textFile("date/student.txt")
val studentRDD: RDD[(String, String)] = linesRDD.map(line => {
val fields: Array[String] = line.split("\\\\s+")
val Name: String = fields(3)
val Brithday: String = fields(4)
(Brithday,Name)
})
val dateRDD: RDD[((String, String), String)] = studentRDD.map(line => {
val brithday: String = line._1
val field: Array[String] = brithday.split("-")
val month: String = field(1)
val day: String = field(2)
((month, day), line._2)
})
val resRDD: RDD[((String, String), Iterable[String])] = dateRDD.groupByKey()
resRDD.foreach(println)
sparkContext.stop()
}
}
以上是关于Spark-Core练习题(身高排序,平均年龄,所有姓氏,每月最大天数三人,相同生日的人)的主要内容,如果未能解决你的问题,请参考以下文章