spark 讀取orc檔案
阿新 • • 發佈:2018-12-10
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-mapreduce</artifactId>
<version>1.1.0</version>
</dependency>
SparkConf sparkConf = new SparkConf();
sparkConf.setAppName("spark-orc" );
sparkConf.set("fs.hdfs.impl",org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaPairRDD<LongWritable, OrcStruct> rdd =sc.hadoopFile(args[0],rcInputFormat.class,LongWritable.class, OrcStruct.class);