1. 程式人生 > 實用技巧 >SparkSQL -- 內建函式 - max, min, filter, orderBy

SparkSQL -- 內建函式 - max, min, filter, orderBy

scala> val df = Seq(
     |              ("01", "Jack",  "2020-06-05"),
     |              ("02", "Tom",   "2020-01-01"),
     |              ("03", "Mike",  "2020-09-01"),
     |              ("04", "Tina",  "2020-09-01"),
     |              ("05", "Alex",  "2020-06-10"),
     |              ("06", "Bob",   "2020-01-01"),
     |              ("07", "David", "2020-09-01"),
     |              ("08", "Ben",   "2020-09-01"),
     |              ("09", "Allen", "2020-06-05"),
     |              ("10", "Caesar","2020-01-01")
     |           ).toDF("id", "name", "entrytime")
df: org.apache.spark.sql.DataFrame = [id: string, name: string ... 1 more field]

// 獲取最大入職時間
scala> df.select(max($"entrytime")).show
+--------------+
|max(entrytime)|
+--------------+
|    2020-09-01|
+--------------+

// 獲取最小入職時間
scala> df.select(min($"entrytime")).show
+--------------+
|min(entrytime)|
+--------------+
|    2020-01-01|
+--------------+

// 統計欄位姓名的記錄數
scala> df.select("name").count
res2: Long = 10

// 統計欄位姓名中含有"A"的記錄數
scala> df.select("name").filter($"name".contains("A")).count
res3: Long = 2

// 過濾出姓名中含有"A"的記錄
scala> df.select("id", "name", "entrytime").filter($"name".contains("A")).show()
+---+-----+----------+
| id| name| entrytime|
+---+-----+----------+
| 05| Alex|2020-06-10|
| 09|Allen|2020-06-05|
+---+-----+----------+

// 按入職時間正序排序
scala> df.select(col("*")).orderBy("entrytime").show
+---+------+----------+
| id|  name| entrytime|
+---+------+----------+
| 06|   Bob|2020-01-01|
| 10|Caesar|2020-01-01|
| 02|   Tom|2020-01-01|
| 01|  Jack|2020-06-05|
| 09| Allen|2020-06-05|
| 05|  Alex|2020-06-10|
| 07| David|2020-09-01|
| 08|   Ben|2020-09-01|
| 03|  Mike|2020-09-01|
| 04|  Tina|2020-09-01|
+---+------+----------+

// 按入職時間倒序排序
scala> df.select(col("*")).orderBy($"entrytime".desc).show
+---+------+----------+
| id|  name| entrytime|
+---+------+----------+
| 04|  Tina|2020-09-01|
| 03|  Mike|2020-09-01|
| 07| David|2020-09-01|
| 08|   Ben|2020-09-01|
| 05|  Alex|2020-06-10|
| 01|  Jack|2020-06-05|
| 09| Allen|2020-06-05|
| 02|   Tom|2020-01-01|
| 06|   Bob|2020-01-01|
| 10|Caesar|2020-01-01|
+---+------+----------+