1. 程式人生 > >hadoop 實現檔案壓縮

hadoop 實現檔案壓縮

cd /home/xm/compress   進入compress目錄
touch HadoopImg.java  建立java檔案
vim HadoopImg.java  寫進程式碼 (附後)

對.java檔案進行編譯
javac HadoopImg.java 得到class檔案

打jar包
jar -cvf hadoopimgc.jar HadoopImg.class

這裡寫圖片描述

進行壓縮

[root@master compress]# $HADOOP_HOME/bin/hadoop jar ./hadoopimgc.jar compress timg BZip2Codec

hadoopimgc.jar 為要執行的jar包
compress jar包中要執行的函式
timg 要壓縮的檔案
BZip2Codec 採取的壓縮方法
這裡寫圖片描述

這裡寫圖片描述

解壓縮

[root@master compress]# $HADOOP_HOME/bin/hadoop jar ./hadoopimgc.jar decompress timg.bz2

decompress 執行的函式
timg.bz2 解壓縮的檔案

這裡寫圖片描述

這裡寫圖片描述

java原始碼:

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundExceiption;
import java.io.FileOutputStream;
import java.io.IOException
; import java.io.InputStream; import java.io.OutputStream; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory
; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.util.ReflectionUtils; public class HadoopImg { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args[0].equals("compress")) { compress(args[1], "org.apache.hadoop.io.compress." + args[2]); } else if (args[0].equals("decompress")) decompres(args[1]); else { System.err.println("Error!\n usgae: hadoop jar HadoopImg.jar [compress] [filename] [compress type]"); System.err.println("\t\ror [decompress] [filename] "); return; } System.out.println("down"); } /* * filename是希望壓縮的原始檔案,method是欲使用的壓縮方法(如BZip2Codec等) */ public static void compress(String filername, String method) throws ClassNotFoundException, IOException { System.out.println("[" + new Date() + "] : enter compress"); File fileIn = new File(filername); InputStream in = new FileInputStream(fileIn); Class codecClass = Class.forName(method); Configuration conf = new Configuration(); // 通過名稱找到對應的編碼/解碼器 CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); // 該壓縮方法對應的副檔名 File fileOut = new File(filername + codec.getDefaultExtension()); fileOut.delete(); OutputStream out = new FileOutputStream(fileOut); CompressionOutputStream cout = codec.createOutputStream(out); System.out.println("[" + new Date() + "]: start compressing "); IOUtils.copyBytes(in, cout, 1024*1024*5, false); // 緩衝區設為5MB System.out.println("[" + new Date() + "]: compressing finished "); in.close(); cout.close(); } /* * filename是希望解壓的檔案 */ public static void decompres(String filename) throws FileNotFoundException, IOException { System.out.println("[" + new Date() + "] : enter compress"); Configuration conf = new Configuration(); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(new Path(filename)); if (null == codec) { System.out.println("Cannot find codec for file " + filename); return; } File fout = new File(filename+ ".decoded"); InputStream cin = codec.createInputStream(new FileInputStream(filename)); OutputStream out = new FileOutputStream(fout); System.out.println("[" + new Date() + "]: start decompressing "); IOUtils.copyBytes(cin, out, 1024*1024*5, false); System.out.println("[" + new Date() + "]: decompressing finished "); cin.close(); out.close(); } }