讀取hdfs檔案上的第二個塊的資料
阿新 • • 發佈:2018-12-24
package com.ghgj.cn.zy;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache .hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;
public class InputSecondBlock {
//讀取第二個塊的資料,並輸出到hdfs上
public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop01:9000"), conf, "hadoop");
//資料路徑
Path p = new Path("/tt/aa/ff.txt");
//listfiles中可以獲取到塊的資訊
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(p, false);
LocatedFileStatus next = listFiles.next ();
BlockLocation[] bl = next.getBlockLocations();
long offset = bl[1].getOffset();//獲取偏移量
long length = bl[1].getLength();
//輸入流
FSDataInputStream in = fs.open(p);
//設定偏移量
in.seek(offset);
//輸出流
FSDataOutputStream out = fs.create(new Path("/test01"));
IOUtils.copyBytes(in, out, length, true);
}
}