讀取hdfs上指定檔案中的內容
阿新 • • 發佈:2018-11-12
package com.yc.hadoop.hdfs;
import java.net.URI;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
/**
* 讀取hdfs上指定檔案中的內容
* @company 源辰資訊
* @author navy
*/
public class Hadoop_HdfsApi05 {
private static Logger log = Logger.getLogger(Hadoop_HdfsApi05.class); // 建立日誌記錄器
public static void main(String[] args) {
Scanner input = new Scanner(System.in);
FileSystem fs = null;
try {
Configuration conf = new Configuration(); // 載入配置檔案
URI uri = new URI("hdfs://192.168.30.130:9000/"); // 連線資源位置
fs = FileSystem.get(uri,conf,"navy"); // 建立檔案系統例項物件
//FileStatus[] files = fs.listStatus(new Path("/user/navy/")); // 列出檔案
FileStatus[] files = fs.listStatus(new Path("/input")); // 列出檔案
System.out.println("該目錄下的檔名有:");
for (FileStatus f : files) {
System.out.println("\t" + f.getPath().getName());
}
System.out.print("請輸入要檢視的檔名:");
Path p= new Path("/input/"+input.next()); // 預設是讀取/user/navy/下的指定檔案
System.out.println("要檢視的檔案路徑為:" + fs.getFileStatus(p).getPath());
//不可以用java檔案流讀取資料hdfs上的資料
//System.out.println(fs.getFileStatus(p).getPath().toUri());
//File remoteFile = new File(fs.getFileStatus(p).getPath().toUri());
//InputStream in = new FileInputStream(remoteFile);
//FSDataInputStream fsin = new FSDataInputStream(in);
FSDataInputStream fsin = fs.open(fs.getFileStatus(p).getPath());
byte[] bs = new byte[1024 * 1024];
int len = 0;
while((len = fsin.read(bs)) != -1){
System.out.print(new String(bs, 0, len));
}
System.out.println();
fsin.close();
input.close();
} catch (Exception e) {
log.error("hdfs操作失敗!!!", e);
}
}
}
import java.net.URI;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
/**
* 讀取hdfs上指定檔案中的內容
* @company 源辰資訊
* @author navy
*/
public class Hadoop_HdfsApi05 {
public static void main(String[] args) {
Scanner input = new Scanner(System.in);
FileSystem fs = null;
try {
Configuration conf = new Configuration(); // 載入配置檔案
URI uri = new URI("hdfs://192.168.30.130:9000/"); // 連線資源位置
fs = FileSystem.get(uri,conf,"navy"); // 建立檔案系統例項物件
//FileStatus[] files = fs.listStatus(new Path("/user/navy/")); // 列出檔案
FileStatus[] files = fs.listStatus(new Path("/input")); // 列出檔案
System.out.println("該目錄下的檔名有:");
for (FileStatus f : files) {
System.out.println("\t" + f.getPath().getName());
System.out.print("請輸入要檢視的檔名:");
Path p= new Path("/input/"+input.next()); // 預設是讀取/user/navy/下的指定檔案
System.out.println("要檢視的檔案路徑為:" + fs.getFileStatus(p).getPath());
//不可以用java檔案流讀取資料hdfs上的資料
//System.out.println(fs.getFileStatus(p).getPath().toUri());
//File remoteFile = new File(fs.getFileStatus(p).getPath().toUri());
//InputStream in = new FileInputStream(remoteFile);
//FSDataInputStream fsin = new FSDataInputStream(in);
FSDataInputStream fsin = fs.open(fs.getFileStatus(p).getPath());
byte[] bs = new byte[1024 * 1024];
int len = 0;
while((len = fsin.read(bs)) != -1){
System.out.print(new String(bs, 0, len));
}
System.out.println();
fsin.close();
input.close();
} catch (Exception e) {
log.error("hdfs操作失敗!!!", e);
}
}
}