package cn.brent
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
object CKafkaToCluster {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("ConsumerKafka1")
val batch = 10
val ssc = new StreamingContext(conf,Seconds(batch))
ssc.sparkContext.setLogLevel("warn")
// 设置检查点,放在HDFS上
ssc.checkpoint("checkpoint")
// Zookeeper服务器地址
val bstrapServers = "10.253.129.232:9092,10.253.129.233:9092,10.253.129.234:9092,10.253.129.235:9092"
// topic所在的group,可以设置为其他的名称
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> bstrapServers,//kafka.2.1.0
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("kafka2Test1")
val stream = KafkaUtils.createDirectStream[String, String](
ssc,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
val lines = stream.map(item=> item.value())
val words = lines.flatMap(_.split("\\s+"))
val pairs = words.map(x => (x,1))
val wordCounts = pairs.reduceByKey(_+_)
wordCounts.print
ssc.start
ssc.awaitTermination
}
}