數據零丟失kafka + checkpoint


package kafkautils

/**
* Created on 上午12:48.
*
* High level comsumer api
*
* low level comsumer api(simple comsumer api)
*
*
*/
import kafka.serializer.StringDecoder
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Duration, Seconds, StreamingContext}


object StreamingWithCheckpoint {
def main(args: Array[String]) {
//val Array(brokers, topics) = args
val processingInterval = 2
val brokers = "spark123:9092"
val topics = "mytest1"
// Create context with 2 second batch interval
val sparkConf = new SparkConf().setAppName("ConsumerWithCheckPoint").setMaster("local[2]")
// Create direct kafka stream with brokers and topics
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers,
"auto.offset.reset" -> "smallest")
val checkpointPath = "hdfs://spark123:8020/spark_checkpoint10"
def functionToCreateContext(): StreamingContext = {
val ssc = new StreamingContext(sparkConf, Seconds(processingInterval))
val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)

ssc.checkpoint(checkpointPath)
messages.checkpoint(Duration(8*processingInterval.toInt*1000))
messages.foreachRDD(rdd => {
if(!rdd.isEmpty()){
println("################################" + rdd.count())
}

})
ssc
}

// 如果有checkpoint則checkpoint中記錄的信息恢復StreamingContext
val context = StreamingContext.getOrCreate(checkpointPath, functionToCreateContext _)
context.start()
context.awaitTermination()
}
}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM