Spark Streaming 高階消費kafka2.1.0---subscribe


package cn.brent

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent

object CKafkaToCluster {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("ConsumerKafka1")
    val batch = 10
    val ssc = new StreamingContext(conf,Seconds(batch))

    ssc.sparkContext.setLogLevel("warn")


    // 設置檢查點,放在HDFS上
    ssc.checkpoint("checkpoint")
    // Zookeeper服務器地址

    val bstrapServers = "10.253.129.232:9092,10.253.129.233:9092,10.253.129.234:9092,10.253.129.235:9092"
    // topic所在的group,可以設置為其他的名稱

    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> bstrapServers,//kafka.2.1.0
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "use_a_separate_group_id_for_each_stream",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Array("kafka2Test1")
    val stream = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)

    )

    val lines = stream.map(item=> item.value())
    val words = lines.flatMap(_.split("\\s+"))
    val pairs = words.map(x => (x,1))
    val wordCounts = pairs.reduceByKey(_+_)
    wordCounts.print

    ssc.start
    ssc.awaitTermination
  }
}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM