hdfs遍歷檔案方法
阿新 • • 發佈:2018-11-11
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
class HelloWord {
public static void main(String[] args){
Hdfs();
}
//方法1:
public static void Hdfs(){
try{
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://10.8.6.126:8020");
FileSystem fs = null;
//fs = FileSystem.get(new URI("hdfs://10.8.6.126:8020"),conf); //這兩種方式都可以配置hdfs ip
fs = FileSystem.get(conf);
RemoteIterator<LocatedFileStatus> lt = fs.listFiles(new Path("hdfs://10.8.6.126:8020/ada/lyy/App"), true);
while (lt.hasNext()) {
LocatedFileStatus file = lt.next();
if(file.isFile())
{
Path path = file.getPath();
System.out.println("檔案:["+path.toString()+"]");
System.out.println("檔名:["+path.getName.toString()+"]"); //只是檔名,沒有路徑資訊
}else{
Path path = file.getPath();
System.out.println("目錄:["+path.toString()+"]");
}
}
}
catch( IOException e){
System.out.println(e.getStackTrace());
}
}
}
//方法2:
public static void Hdfs(){
try{
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://10.8.6.126:8020");
FileSystem fs = null;
//fs = FileSystem.get(new URI("hdfs://10.8.6.126:8020"),conf); //這兩種方式都可以配置hdfs ip
fs = FileSystem.get(conf);
Path path = new Path("/shell");
//通過fs的listStatus方法獲取一個指定path的所有檔案資訊(status),因此我們需要傳入一個hdfs的路徑,返回的是一個filStatus陣列
FileStatus[] fileStatuses = fs.listStatus(path);
for (FileStatus fileStatus : fileStatuses) {
//判斷當前迭代物件是否是目錄
boolean isDir = fileStatus.isDirectory();
//獲取當前檔案的絕對路徑
String fullPath = fileStatus.getPath().toString();
System.out.println("isDir:" + isDir + ",Path:" + fullPath);
}
}
同時讀取本地和hdfs目錄:
public class TestHdfs{
public static void main(String[] args) {
//讀取配置檔案
Configuration conf=new Configuration();
//String path = "hdfs://10.8.6.126:8020/ada/lyy/data/NaiveBayesModel.model";
String path = "file:///home/liyanyan/cluster/NaiveBayesModel.model";
String classfile = "";
//讀取配置檔案
FileStatus[] listFile = null;
FileSystem fs = null;
try {
if(path.startsWith("hdfs:")){
conf.set("fs.defaultFS",path.substring(0,path.indexOf('/', path.indexOf(':') + 3)));
fs = FileSystem.get(conf);
}else if(path.startsWith("file:")){
fs=FileSystem.getLocal(conf);
//獲取檔案目錄
}
listFile =fs.listStatus(new Path(path+"/metadata"), new RegxRejectPathFilter("^[._]+\\w+.*$"));
if(listFile.length != 1){
}else{
classfile = listFile[0].getPath().toString();
}
}catch(IOException e){
e.printStackTrace();
}
System.out.println("classfile = "+classfile.replaceFirst("file:","file://"));
}
private static class RegxRejectPathFilter implements PathFilter {
private final String regex;
public RegxRejectPathFilter(String regex) {
this.regex=regex;
}
@Override
public boolean accept(Path path) {
// TODO 自動生成的方法存根
boolean flag=path.getName().toString().matches(regex);
return !flag;
}
}
}