sparksql 2.x demo I
阿新 • • 發佈:2018-12-13
package com.ws.sparksql import org.apache.spark.rdd.RDD import org.apache.spark.sql.types._ import org.apache.spark.sql._ import org.apache.spark.{SparkConf, SparkContext} /** * spark sql版本 2.x */ object SparkSql2Demo { def main(args: Array[String]): Unit = { //spark 2.X sql建立執行過程 val session = SparkSession.builder().appName("SparkSql2Demo").master("local[*]").getOrCreate() val dataRdd: RDD[String] = session.sparkContext.textFile("hdfs://hadoop-01:9000/student") val studentRowRdd: RDD[Row] = dataRdd.map(line => { val fieldArr: Array[String] = line.split(",") val id = fieldArr(0).toLong val name = fieldArr(1) val age = fieldArr(2).toInt val score = fieldArr(3).toInt Row(id, name, age, score) }) //建立結構化表 val schema: StructType = StructType(List( StructField("id", LongType, true), StructField("name", StringType, true), StructField("age", IntegerType, true), StructField("score", IntegerType, true) )) val dataFrame: DataFrame = session.createDataFrame(studentRowRdd, schema) import session.implicits._ val result: Dataset[Row] = dataFrame.where($"score" > 100).orderBy($"score" desc, $"age" asc) result.show() session.stop() } }
結果 :
+---+----+---+-----+
| id|name|age|score|
+---+----+---+-----+
| 1| 張三| 18| 150|
| 2| 李四| 19| 150|
+---+----+---+-----+