1. 程式人生 > >Hadoop定義的SequenceFile和MapFile的程式設計實現

Hadoop定義的SequenceFile和MapFile的程式設計實現

Hadoop定義了SequenceFile 和MapFile兩種型別的資料結構以適應Mapreduce程式設計框架的需要,Map輸出的中間結果就是由他們表示的。其中MapFile是經過排序並帶有索引的SequenceFile.

SequenceFile記錄的是key/value對的列表,是序列化後的二進位制檔案,因此是不能直接檢視的,可以通過命令檢視內容:

hadoop fs -text myseq.seq

程式碼實現:

package com.jr.sun.ly;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;
import org.junit.Test;


/**
 * 序列檔案格式
 *
 */
public class TestSequenceFile {
/**
 * 寫入
 * @throws IOException 
 */
	@Test
	public void write() throws IOException {
		Configuration conf=new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path name=new Path("/user/hadoop/myseq.seq");
	//	Path name=new Path("g:/elyq/myseq.seq");
		Writer w=SequenceFile.createWriter(fs, conf, name, IntWritable.class, Text.class);
		w.append(new IntWritable(100), new Text("tom"));
		w.append(new IntWritable(100), new Text("toms"));
		w.append(new IntWritable(100), new Text("tomLee"));
		w.close();
		
	}
	/**
     *讀取
     */
	@Test
	public void read() throws IOException {
		Configuration conf=new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path name=new Path("/user/hadoop/myseq.seq");
		IntWritable key=new IntWritable();
		Text value=new Text();
		SequenceFile.Reader reader=new SequenceFile.Reader(fs, name,conf);
		//遍歷所有key-value
		while(reader.next(key))
		{
			reader.getCurrentValue(value);
			System.out.println(key.get()+":"+value.toString());
		}
	}
}

MapFile是已經拍過序的SequenceFile,它的使用與SequenceFile類似。

package com.jr.sun.ly;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.junit.Test;

public class TestMapFile {
	/**
	 * 寫入
	 * @throws IOException 
	 */
		@Test
		public void write() throws IOException {
			Configuration conf=new Configuration();
			FileSystem fs = FileSystem.get(conf);
			String name="/user/hadoop/mymap";
			IntWritable iw=new IntWritable();
			Text txt=new Text();
			MapFile.Writer w=new MapFile.Writer(conf, fs, name, IntWritable.class, Text.class);
			w.setIndexInterval(256);
			for(int i=1;i<=1000;i++) {
				w.append(new IntWritable(i), new Text("tom"+i));
			}
			w.close();
			
		}
		
		/**
		 * 讀
		 * @throws IOException 
		 */
			@Test
			public void getClosestByKey() throws IOException {
				Configuration conf=new Configuration();
				FileSystem fs = FileSystem.get(conf);
				String name="/user/hadoop/mymap";
				IntWritable iw=new IntWritable();
				Text txt=new Text();
				MapFile.Reader reader=new MapFile.Reader(fs, name, conf);
				IntWritable key= (IntWritable)reader.getClosest(new IntWritable(0), txt);
				System.out.println(key);
				
			}
}