spark与flume整合
Posted 牵牛花
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了spark与flume整合相关的知识,希望对你有一定的参考价值。
spark-streaming与flume整合 push
package cn.my.sparkStream import org.apache.spark.SparkConf import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.flume._ /** */ object SparkFlumePush { def main(args: Array[String]) { if (args.length < 2) { System.err.println( "Usage: FlumeEventCount <host> <port>") System.exit(1) } LogLevel.setStreamingLogLevels() val Array(host, port) = args val batchInterval = Milliseconds(2000) // Create the context and set the batch size val sparkConf = new SparkConf().setAppName("FlumeEventCount").setMaster("local[2]") val ssc = new StreamingContext(sparkConf, batchInterval) // Create a flume stream val stream = FlumeUtils.createStream(ssc, host, port.toInt, StorageLevel.MEMORY_ONLY_SER_2) // Print out the count of events received from this server in each batch stream.count().map(cnt => "Received " + cnt + " flume events.").print() //拿到消息中的event,从event中拿出body,body是真正的消息体 stream.flatMap(t=>{new String(t.event.getBody.array()).split(" ")}).map((_,1)).reduceByKey(_+_).print ssc.start() ssc.awaitTermination() } }
package cn.my.sparkStream
import java.net.InetSocketAddress
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming._
import org.apache.spark.streaming.flume._
/**
*
*/
object SparkFlumePull {
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println(
"Usage: FlumeEventCount <host> <port>")
System.exit(1)
}
LogLevel.setStreamingLogLevels()
val Array(host, port) = args
val batchInterval = Milliseconds(2000)
// Create the context and set the batch size
val sparkConf = new SparkConf().setAppName("FlumeEventCount").setMaster("local[2]")
val ssc = new StreamingContext(sparkConf, batchInterval)
// Create a flume stream
//val stream = FlumeUtils.createStream(ssc, host, port.toInt, StorageLevel.MEMORY_ONLY_SER_2)
// val flumeStream = FlumeUtils.createPollingStream(ssc, host, port.toInt)
/*
def createPollingStream(
jssc: JavaStreamingContext,
addresses: Array[InetSocketAddress],
storageLevel: StorageLevel
):
*/
//当sink有多个的时候
val flumesinklist = Array[InetSocketAddress](new InetSocketAddress("mini1", 8888))
val flumeStream = FlumeUtils.createPollingStream(ssc, flumesinklist, StorageLevel.MEMORY_ONLY_2)
flumeStream.count().map(cnt => "Received " + cnt + " flume events.").print()
flumeStream.flatMap(t => {
new String(t.event.getBody.array()).split(" ")
}).map((_, 1)).reduceByKey(_ + _).print()
// Print out the count of events received from this server in each batch
//stream.count().map(cnt => "Received " + cnt + " flume events.").print()
//拿到消息中的event,从event中拿出body,body是真正的消息体
//stream.flatMap(t=>{new String(t.event.getBody.array()).split(" ")}).map((_,1)).reduceByKey(_+_).print
ssc.start()
ssc.awaitTermination()
}
}
http://spark.apache.org/docs/1.6.3/streaming-flume-integration.html
以上是关于spark与flume整合的主要内容,如果未能解决你的问题,请参考以下文章
Spark 系列(十五)—— Spark Streaming 整合 Flume
大数据Spark“蘑菇云”行动之flume整合spark streaming
Spark Streaming实时流处理项目实战Spark Streaming整合Flume实战二