spark parquet 從hdfs 上讀 和寫 scala 版本
阿新 • • 發佈:2019-02-09
import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.SQLContext import org.apache.spark.sql.DataFrame import org.apache.spark.sql.SaveMode object GenericLoadSave { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setAppName("GenericLoadSave") .setMaster("local") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) //讀取一個parquet檔案 val usersDF = sqlContext.read.format("parquet").load("hdfs://hadoop1:9000/input/users.parquet") usersDF.write.mode(SaveMode.Overwrite).format("parquet").save("hdfs://hadoop1:9000/output/namesAndFavColors_scala") val tDF = sqlContext.read.format("parquet").load("hdfs://hadoop1:9000/output/namesAndFavColors_scala") tDF.show() } }