1. 程式人生 > >hadoop 檔案目錄操作

hadoop 檔案目錄操作

可以用bin/Hadoop fs -ls 來讀取HDFS上的檔案屬性資訊。

也可以採用HDFS的API來讀取。如下:

import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
public class FileInfo
{
 public static void main(String[] args) throws Exception
 {
  if(args.length != 1){
  System.out.println("Usage FileInfo <target>");
  System.exit(1);
  }
  Configuration conf = new Configuration();
  FileSystem hdfs = FileSystem.get(URI.create(args[0]),conf);
  FileStatus fs = hdfs.getFileStatus(new Path(args[0]));
  System.out.println("path: "+fs.getPath());
  System.out.println("length: "+fs.getLen());
  System.out.println("modify time: "+fs.getModificationTime());
  System.out.println("owner: "+fs.getOwner());
  System.out.println("replication: "+fs.getReplication());
  System.out.println("blockSize: "+fs.getBlockSize());
  System.out.println("group: "+fs.getGroup());
  System.out.println("permission: "+fs.getPermission().toString());
 }

}

/**
 * @see 讀取path下的所有檔案
 * @param path
 * @return
 * @throws IOException
*/
 public static String[] getFileList(String path) throws IOException{

Configuration conf = new Configuration();
 FileSystem fs = FileSystem.get(conf);
 List<String> files = new ArrayList<String>();
 Path s_path = new Path(path);
if(fs.exists(s_path)){

for(FileStatus status:fs.listStatus(s_path)){


files.add(status.getPath().toString());

}
}
fs.close();

return files.toArray(new String[]{});
}