1. 程式人生 > 實用技巧 >Spark整合Kudu

Spark整合Kudu

package spark.demo

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.kudu.spark.kudu._

/**
  * <dependency>
  * <groupId>org.apache.kudu</groupId>
  * <artifactId>kudu-spark2_2.11</artifactId>
  * <version>1.8.0</version>
  * </dependency>
  * 
  * <dependency>
  * <groupId>org.apache.spark</groupId>
  * <artifactId>spark-sql_2.11</artifactId>
  * <version>2.1.1</version>
  * </dependency>
  */
object SparkKuduDemo {

  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession
      .builder()
      .appName(getClass.getSimpleName)
      .master("local[2]")
      .getOrCreate()

    val df: DataFrame = spark.read.options(Map(
      "kudu.master" -> "centos00:7051",
      "kudu.table" -> "my_kudu_table")).kudu

    /**
      * 獲取全表內容(左對齊)
      *
      * +---+-------+
      * |id |name   |
      * +---+-------+
      * |2  |Mike   |
      * |10 |Phoniex|
      * |1  |David  |
      * |8  |Alex   |
      * |5  |Jorden |
      * +---+-------+
      */
    df.show(false)

    /**
      * 按照id倒序排序, 且只顯示前3條記錄
      *
      * +---+-------+
      * |id |name   |
      * +---+-------+
      * |10 |Phoniex|
      * |8  |Alex   |
      * |5  |Jorden |
      * +---+-------+
      */
    df.sort(df("id").desc).show(3, false)

  }

}