Spark Streaming帶狀態更新
阿新 • • 發佈:2018-11-13
帶狀態的更新是使用的updateStateByKey方法,裡面傳入一個函式,函式要自己寫,注意需要設定checkpoint
import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkContext} /** * 需要設定checkpoint * 有狀態的計算 */ class UpdataByKey { } object UpdataByKey{ //自定義函式進行帶狀態更新 def addFunc (currValue:Seq[Int],point:Option[Int])={ Some(currValue.sum+point.getOrElse(0)); } def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("UpdataByKey").setMaster("local[*]") val sc = new SparkContext(conf) val ssc = new StreamingContext(sc,Seconds(10)) val topics = "xiaopeng"; val topicMap = topics.split(",").map((_,2)).toMap val lines = KafkaUtils.createStream(ssc,"192.168.10.219:2181","han",topicMap) val words = lines.flatMap(line =>line._2.split(" ")).map(word =>(word,1)) words.updateStateByKey[Int](addFunc _) words.print() ssc.start() ssc.awaitTermination() } }