hadoop javaapi讀取資料夾下的資料
阿新 • • 發佈:2018-12-16
導包:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.net.URI;
程式碼:
只需要複製貼上
改改output和input就能用了,如果要讀取檔案的話,直接使用readHdfsFile()方法即可。
public class HdfsApp { private static String output="F:/Tags/jiemianshouye"; private static String input="hdfs://192.168.163.120:8020/user/hive/external/jiemianhomepage/dt=2018-12-12"; private static FileSystem getFileSystem(String direPath)throws Exception{ Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(URI.create(direPath),configuration); return fileSystem; } private static void readHdfsFile(String filePath){ FSDataInputStream fsDataInputStream=null; try { Path path = new Path(filePath); fsDataInputStream = getFileSystem(filePath).open(path); OutputStream out = new FileOutputStream(output,true); IOUtils.copyBytes(fsDataInputStream,out,4096,false); }catch (Exception e){ e.printStackTrace(); }finally { if(fsDataInputStream != null){ IOUtils.closeStream(fsDataInputStream); } } } public static void getDirectoryFromHdfs(String direPath) throws Exception { FileSystem fs = getFileSystem(direPath); FileStatus[] filelist = fs.listStatus(new Path(direPath)); for (int i = 0; i < filelist.length; i++) { System.out.println("_________________第" + i + "個檔案" + "____________________"); FileStatus fileStatus = filelist[i]; System.out.println("Name:" + fileStatus.getPath().getName()); System.out.println("Path:" + fileStatus.getPath()); readHdfsFile(fileStatus.getPath().toString()); System.out.println("size:" + fileStatus.getLen()); System.out.println("_________________第" + i + "個檔案" + "____________________"); } fs.close(); } public static void main(String[] args) throws Exception{ HdfsApp hdfsApp = new HdfsApp(); //hdfsApp.readHdfsFile("/user/data/JMRecommend/data-20181205-1/news/vectorize"); ///user/hive/external/jiemianhomepage/dt=2018-12-12 getDirectoryFromHdfs(input); // String inPath="D://workSpace/src/main/resources/hdfs-site.xml"; // String outPath="hdfs://ns/user/kfk/data/local.xml"; //hdfsApp.writeHdfsFile(inPath,outPath); } }
希望能幫到有需要的朋友