hadoop 實現檔案壓縮
阿新 • • 發佈:2019-02-18
cd /home/xm/compress 進入compress目錄
touch HadoopImg.java 建立java檔案
vim HadoopImg.java 寫進程式碼 (附後)
對.java檔案進行編譯
javac HadoopImg.java
得到class檔案
打jar包
jar -cvf hadoopimgc.jar HadoopImg.class
進行壓縮
[root@master compress]# $HADOOP_HOME/bin/hadoop jar ./hadoopimgc.jar compress timg BZip2Codec
hadoopimgc.jar 為要執行的jar包
compress jar包中要執行的函式
timg 要壓縮的檔案
BZip2Codec 採取的壓縮方法
解壓縮
[root@master compress]# $HADOOP_HOME/bin/hadoop jar ./hadoopimgc.jar decompress timg.bz2
decompress 執行的函式
timg.bz2 解壓縮的檔案
java原始碼:
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundExceiption;
import java.io.FileOutputStream;
import java.io.IOException ;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory ;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.util.ReflectionUtils;
public class HadoopImg {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if (args[0].equals("compress")) {
compress(args[1], "org.apache.hadoop.io.compress." + args[2]);
}
else if (args[0].equals("decompress"))
decompres(args[1]);
else {
System.err.println("Error!\n usgae: hadoop jar HadoopImg.jar [compress] [filename] [compress type]");
System.err.println("\t\ror [decompress] [filename] ");
return;
}
System.out.println("down");
}
/*
* filename是希望壓縮的原始檔案,method是欲使用的壓縮方法(如BZip2Codec等)
*/
public static void compress(String filername, String method) throws ClassNotFoundException, IOException {
System.out.println("[" + new Date() + "] : enter compress");
File fileIn = new File(filername);
InputStream in = new FileInputStream(fileIn);
Class codecClass = Class.forName(method);
Configuration conf = new Configuration();
// 通過名稱找到對應的編碼/解碼器
CompressionCodec codec = (CompressionCodec)
ReflectionUtils.newInstance(codecClass, conf);
// 該壓縮方法對應的副檔名
File fileOut = new File(filername + codec.getDefaultExtension());
fileOut.delete();
OutputStream out = new FileOutputStream(fileOut);
CompressionOutputStream cout = codec.createOutputStream(out);
System.out.println("[" + new Date() + "]: start compressing ");
IOUtils.copyBytes(in, cout, 1024*1024*5, false); // 緩衝區設為5MB
System.out.println("[" + new Date() + "]: compressing finished ");
in.close();
cout.close();
}
/*
* filename是希望解壓的檔案
*/
public static void decompres(String filename) throws FileNotFoundException, IOException {
System.out.println("[" + new Date() + "] : enter compress");
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(new Path(filename));
if (null == codec) {
System.out.println("Cannot find codec for file " + filename);
return;
}
File fout = new File(filename+ ".decoded");
InputStream cin = codec.createInputStream(new FileInputStream(filename));
OutputStream out = new FileOutputStream(fout);
System.out.println("[" + new Date() + "]: start decompressing ");
IOUtils.copyBytes(cin, out, 1024*1024*5, false);
System.out.println("[" + new Date() + "]: decompressing finished ");
cin.close();
out.close();
}
}