1. 程式人生 > >學習筆記(六)---------查找相同字母組成的字謎

學習筆記(六)---------查找相同字母組成的字謎

apache ringbuf 作業 tex method add pub per anagram

package com.hadoop.base;

import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import
org.apache.hadoop.util.ToolRunner; public class Anagrams extends Configured implements Tool { //寫Map過程 public static class Anagramsmapper extends Mapper<LongWritable, Text, Text, Text> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String text
= value.toString(); //將輸入的Text類型的字母表value轉為String類型 char[] textCharArray = text.toCharArray(); //把String類型的字母表轉成字符數組 Arrays.sort(textCharArray); //對字符數組進行排序 String sortedText = new String(textCharArray); //將排序後的字符數組,轉成String字符串 context.write(new Text(sortedText), value); //寫入context,輸出key(排序後的字母表)和輸出value(原始字母表) } } //寫Reduce過程 public static class Anagramsreducer extends Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuffer res = new StringBuffer(); //新建一個空的StringBuffer實例res int count = 0; //計數器初始值為0 //開始遍歷values裏的值 for (Text text : values) { //如果res數組裏面有值,添加新值的時候先加的一個“,”號作分割符 if(res.length() > 0) { res.append(","); } //往res裏添加values裏的值 res.append(text); //計數 count++; } //必須有兩個或者兩個以上相同字母組成的單詞,才顯示 if(count > 1) { context.write(key, new Text(res.toString())); } } } public static void main(String[] args) throws Exception { // TODO Auto-generated method stub String[] arg0 = { "hdfs://master:9000/middle/anagram/", "hdfs://master:9000/middle/anagram/out/"}; //執行mapreduce int ec = ToolRunner.run(new Configuration(), new Anagrams(), arg0); //設置退出 System.exit(ec); } //寫Run方法 @Override public int run(String[] arg0) throws Exception { // TODO Auto-generated method stub //加載配置 Configuration conf = new Configuration(); //輸出目錄,如果存在就刪除 Path mypath = new Path(arg0[1]); FileSystem fs =mypath.getFileSystem(conf); if(fs.isDirectory(mypath)) { fs.delete(mypath, true); } //創建Job對象 Job job = new Job(conf,"Anagrams"); job.setJarByClass(Anagrams.class); //指定輸入、輸出目錄 FileInputFormat.addInputPath(job, new Path(arg0[0])); FileOutputFormat.setOutputPath(job, new Path(arg0[1])); //指定Mapper和Reduce job.setMapperClass(Anagramsmapper.class); job.setReducerClass(Anagramsreducer.class); //指定Mapper、Reduce的輸出類型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //提交作業 return job.waitForCompletion(true) ? 0: 1; } }
package com.hadoop.base;

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.junit.Before;
import org.junit.Test;

public class AnagramsMapperTest {
    private Mapper mapper;
    private MapDriver driver;
    
    @Before
    public void init() {
        mapper = new Anagrams.Anagramsmapper();
        driver = new MapDriver(mapper);
    }
    
    @Test
    public void test() throws IOException {
        String line = "gfedcba";  //自定這個字母,驗證輸出結果是否會正確排序
        driver.withInput(new LongWritable(), new Text(line))
              .withOutput(new Text("abcdefg"),new Text("gfedcba"))  //驗證輸出Key是否進行了字母排序,輸出Value不變
              .runTest();
        
    }

}
package com.hadoop.base;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;

public class AnagramsReduceTest {
    private Reducer reducer;
    private ReduceDriver driver;
    @Before
    public void init() {
        reducer = new Anagrams.Anagramsreducer();
        driver = new ReduceDriver(reducer);
    }
    
    @Test
    public void test() throws IOException {
        Text key = new Text("abcdefg"); //新建一個Key,輸出固定不變
        List values = new ArrayList();  //在新建數組列表中寫入4組字母的Value值,目的驗證其最後是否按預定格式輸出
        values.add(new Text("gfedcba"));
        values.add(new Text("decgfba"));
        values.add(new Text("fedgcba"));
        values.add(new Text("gcbfeda"));
        
        driver.withInput(key, values)
              .withOutput(key, new Text("gfedcba,decgfba,fedgcba,gcbfeda"))  //驗證是否按此格式輸出
              .runTest();
        
    }
    

}

技術分享圖片

技術分享圖片

學習筆記(六)---------查找相同字母組成的字謎