1. 程式人生 > 其它 >spark streaming 消費kafka資料

spark streaming 消費kafka資料

1.在虛擬機器啟動zookeeper和kafka,新建topic test1,這裡使用的topic 是test1。

2.Scala程式

要修改 3.定義 Kafka 引數 中的主機名稱以及要消費的topic名稱

package scala.spark
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object Kafka { def main(args: Array[String]): Unit = { //1.建立 SparkConf val sparkConf: SparkConf = new SparkConf().setAppName("ReceiverWordCount").setMaster("local[*]") //2.建立 StreamingContext val ssc = new StreamingContext(sparkConf, Seconds(3)) //3.定義 Kafka 引數 val kafkaPara: Map[String, Object] = Map[String, Object]( ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "
hadoop02:9092", ConsumerConfig.GROUP_ID_CONFIG -> "test1", "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer" ) //4.讀取 Kafka 資料建立 DStream val kafkaDStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](Set(
"test1"), kafkaPara)) //5.將每條訊息的 KV 取出 val valueDStream: DStream[String] = kafkaDStream.map(record => record.value()) //6. 計 算 valueDStream.flatMap(_.split(" ")) .map((_, 1)) .reduceByKey(_ + _) .print() //7. 開 啟 任 務 ssc.start() ssc.awaitTermination() ssc.start() ssc.awaitTermination() } }

3.啟動kafka生產者

消費到了 好椰