package cn.itcast.hadoop;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class wordcountmapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    @Override
    protected void map(LongWritable key, Text value,
                       Mapper<LongWritable, Text, Text, IntWritable>.Context context)
            throws IOException, InterruptedException {
        String line = value.toString();
        String[] words = line.split(" ");
        Text text = new Text();
        IntWritable intWritable=new IntWritable();
        for (String word : words) {
            text.set(word);
            intWritable.set(1);
            context.write(text, intWritable);
        }
    }

    public static void main(String[] args) {
        // TODO Auto-generated method stub

    }

}

package cn.itcast.hadoop;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class wordcountreduce extends
        Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values,
                          Reducer<Text, IntWritable, Text, IntWritable>.Context context)
            throws IOException, InterruptedException {
        int count = 0;
        for (IntWritable value : values) {
            count += value.get();
        }
        context.write(key, new IntWritable(count));
    }

    public static void main(String[] args) {
        // TODO Auto-generated method stub

    }

}

package cn.itcast.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class wordcountdrive {

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(wordcountdrive.class);
        job.setMapperClass(wordcountmapper.class);
        job.setReducerClass(wordcountreduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        boolean b=job.waitForCompletion(true);
        System.exit(b?0:1);
    }

}

2.資料去重

"資料去重"主要是為了掌握和利用並行化思想來對資料進行有意義的篩選。統計大資料集上的資料種類個數、從網站日誌中計算訪問地等這些看似龐雜的任務都會涉及資料去重。下面就進入這個例項的MapReduce程式設計。

2.1 例項描述

1）

66 55 23 23 55

2）

12 23 12 66

2.2 設計思路

資料去重的最終目標是讓原始資料中出現次數超過一次的資料在輸出檔案中只出現一次。我們自然而然會想到將同一個資料的所有記錄都交給一臺reduce機器，無論這個資料出現多少次，只要在最終結果中輸出一次就可以了。具體就是reduce的輸入應該以資料作為key，而對value-list則沒有要求。當reduce接收到一個<key，value-list>時就直接將key複製到輸出的key中，並將value設定成空值。

　　在MapReduce流程中，map的輸出<key，value>經過shuffle過程聚整合<key，value-list>後會交給reduce。所以從設計好的reduce輸入可以反推出map的輸出key應為資料，value任意。繼續反推，map輸出資料的key為資料，而在這個例項中每個資料代表輸入檔案中的一行內容，所以map階段要完成的任務就是在採用Hadoop預設的作業輸入方式之後，將value設定為key，並直接輸出（輸出中的value任意）。map中的結果經過shuffle過程之後交給reduce。reduce階段不會管每個key有多少個value，它直接將輸入的key複製為輸出的key，並輸出就可以了（輸出中的value被設定成空了）。

2.3 程式程式碼

package cn.cast.hadoop;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class RemoveRepeatmapper extends Mapper<LongWritable, Text, Text, Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        Text text = value;
        context.write(text, new Text(""));
    }

    public static void main(String[] args) {

    }
}

package cn.cast.hadoop;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class RemoveRepeatreducer extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        context.write(key, new Text(""));
    }

    public static void main(String[] args) {

    }
}

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



public class RemoveRepeatdrive {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(RemoveRepeatdrive.class);
        job.setMapperClass(RemoveRepeatmapper.class);
        job.setReducerClass(RemoveRepeatreducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        boolean b=job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

3.班級學科平均分數

"平均成績"主要目的還是在重溫經典"WordCount"例子，可以說是在基礎上的微變化版，該例項主要就是實現一個計算學生平均成績的例子。

3.1 例項描述

樣本輸入

1）

張三：

math 88

english 86

history 78

李四：

math 98

english 66

history 82

3.2 設計思路

將學科名字作為map輸入key，學科分數作為map輸入value，輸出為Test和InWritable，傳入reduce時，將分數傳入迭代器，遍歷迭代器時將所有值相加除以2，得到平均分。

3.3 程式程式碼

import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class Averagemapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String results[] = line.split(" ");
        IntWritable intWritable =new IntWritable();
        intWritable.set(Integer.parseInt(results[1]));
        String result =results[0];
        context.write(new Text(result),intWritable);
    }

    public static void main(String[] args) {

    }
}

package cn.itcast.hadoop;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class Averagereducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int count = 0;
        for (IntWritable intWritable : values) {
            count+=intWritable.get();
        }
        context.write(key,new IntWritable(count/2));
    }

    public static void main(String[] args) {

    }
}

package cn.itcast.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Averagedriver {
    public static void main(String[] args) throws Exception{
        Configuration conf=new Configuration();
        Job job=Job.getInstance(conf);
        job.setJarByClass(Averagedriver.class);
        job.setMapperClass(Averagemapper.class);
        job.setReducerClass(Averagereducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.setInputPaths(job,args[0]);
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        System.exit(job.waitForCompletion(true)?0:1);
    }
}

參考：http://www.cnblogs.com/xia520pi/archive/2012/06/04/2534533.html

持續更新~

mapreduce初級案例

1.單詞統計

1.1 例項描述

1.2 設計思路

1.3 程式程式碼

2.資料去重

2.1 例項描述

2.2 設計思路

2.3 程式程式碼

3.班級學科平均分數

3.1 例項描述

3.2 設計思路

3.3 程式程式碼

mapreduce初級案例

MapReduce初級案例（3）：使用MapReduce實現平均成績

MapReduce初級經典案例實現

Hadoop-MapReduce計算案例1：WordCount

siki學院_Unity初級案例_憤怒的小鳥_學習筆記2/3

MapReduce程式設計案例系列篇（1-9）

第十七天 -- IDEA -- MAVEN -- AWK -- MapReduce簡單案例

路由事件初級案例

Unity3D初級案例-經典貪吃蛇一

java8之Lambda表示式 4：MapReduce開發案例

初學MapReduce-WordCount案例遇到的問題

MapReduce程式設計案例

大資料_Shuffle、MapReduce程式設計案例(資料去重、多表查詢、倒排索引、使用單元測試)

05 MapReduce應用案例03

Unity3D初級案例-經典貪吃蛇二

MapReduce程序之序列化原理與Writable案例

大數據采集、清洗、處理：使用MapReduce進行離線數據分析完整案例

mapreduce 統計PV案例

mapreduce 高級案例倒排索引

大數據之---Yarn偽分布式部署和MapReduce案例

mapreduce初級案例

1.單詞統計

1.1 例項描述

1.2 設計思路

1.3 程式程式碼

2.資料去重

2.1 例項描述

2.2 設計思路

2.3 程式程式碼

3.班級學科平均分數

3.1 例項描述

3.2 設計思路

3.3 程式程式碼

相關推薦