sparkSql 第一個demo
阿新 • • 發佈:2018-12-13
package com.ws.sparksql import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkContext} /** * spark sql版本 1.1x * spark sql 第一個demo */ object SparkSqlDemo { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("SparkSqlDemo").setMaster("local[2]") val sc = new SparkContext(conf) //建立一個sparksql例項 val sQLContext = new SQLContext(sc) val dataRdd: RDD[String] = sc.textFile("hdfs://hadoop-01:9000/student") val studentRdd: RDD[Student] = dataRdd.map(line => { val strArr: Array[String] = line.split(",") val id = strArr(0).toLong val name = strArr(1) val age = strArr(2).toInt val score = strArr(3).toInt Student(id, name, age, score) }) import sQLContext.implicits._ //把普通RDD轉換成特殊的RDD,變成dataFrame val sdf: DataFrame = studentRdd.toDF //將dataFrame註冊一個表 sdf.registerTempTable("t_student") //寫sprak sql,本質也是Transformation val result: DataFrame = sQLContext.sql("select * from t_student order by score desc , age asc") //觸發Action操作,展示結果 result.show() sc.stop() } } case class Student(id: Long, name: String, age: Int, score: Int)
結果 :
+---+----+---+-----+
| id|name|age|score|
+---+----+---+-----+
| 1| 張三| 18| 150|
| 2| 李四| 19| 150|
| 3| 王五| 20| 98|
| 4| 趙六| 17| 88|
+---+----+---+-----+