MapReduce功能實現六---最大值(Max)、求和(Sum)、平均值(Avg)

阿新 • • 發佈：2019-01-30

MapReduce功能實現系列：

一、最大值（Max）

情況1：

[[email protected] q1]$ vi ql.txt
aa 111
22 555
[[email protected] q1]$ hadoop fs -put ql.txt /input

java程式碼：

import java.io.IOException;
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 

public class MaxValue extends Configured implements Tool { 
	
	public static class MapClass extends Mapper<LongWritable, Text, Text, IntWritable> { 
		private int maxNum = 0; 
		public void map(LongWritable key, Text value, Context context) 
				throws IOException, InterruptedException { 
			String[] str = value.toString().split(" "); 
			try {// 對於非數字字元我們忽略掉
				for(int i=0;i<str.length;i++){
					int temp = Integer.parseInt(str[i]); 
					if (temp > maxNum) {
						maxNum = temp; 
					}
				}
			} catch (NumberFormatException e) { 
			} 
		} 
		@Override 
		protected void cleanup(Context context) throws IOException, 
		InterruptedException { 
			context.write(new Text("Max"), new IntWritable(maxNum)); 
		} 
	} 

	public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { 
		private int maxNum = 0; 
		private Text one = new Text();
		public void reduce(Text key, Iterable<IntWritable> values, Context context) 
				throws IOException, InterruptedException { 
			for (IntWritable val : values) { 
				if ( val.get() > maxNum) { 
					maxNum = val.get(); 
				} 
			} 
			one = key;
		} 
		@Override 
		protected void cleanup(Context context) throws IOException, 
		InterruptedException { 
			context.write(one, new IntWritable(maxNum));
		} 
	} 

	public int run(String[] args) throws Exception { 
		Configuration conf = getConf(); 
		conf.set("mapred.jar","mv.jar");
		Job job = new Job(conf, "MaxNum"); 
		job.setJarByClass(MaxValue.class); 
		FileInputFormat.setInputPaths(job, new Path(args[0])); 
		FileOutputFormat.setOutputPath(job, new Path(args[1])); 
		job.setMapperClass(MapClass.class); 
		job.setCombinerClass(Reduce.class); 
		job.setReducerClass(Reduce.class); 
		job.setInputFormatClass(TextInputFormat.class); 
		job.setOutputFormatClass(TextOutputFormat.class); 
		job.setOutputKeyClass(Text.class); 
		job.setOutputValueClass(IntWritable.class); 
		System.exit(job.waitForCompletion(true) ? 0 : 1); 
		return 0; 
	} 

	public static void main(String[] args) throws Exception { 
		long start = System.nanoTime(); 
		int res = ToolRunner.run(new Configuration(), new MaxValue(), args); 
		System.out.println(System.nanoTime()-start); 
		System.exit(res); 
	} 
}

[[email protected] q1]$ /usr/jdk1.7.0_25/bin/javac MaxValue.java
[[email protected] q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar MaxValue*class
[[email protected] q1]$ hadoop jar xx.jar MaxValue /input/ql.txt /output
[[email protected] q1]$ hadoop fs -cat /user/hadoop/output/part-r-00000
Max     555

*************
setup()，此方法被MapReduce框架僅且執行一次，在執行Map任務前，進行相關變數或者資源的集中初始化工作。若是將資源初始化工作放在方法map()中，導致Mapper任務在解析每一行輸入時都會進行資源初始化工作，導致重複，程式執行效率不高！
cleanup(),此方法被MapReduce框架僅且執行一次，在執行完畢Map任務後，進行相關變數或資源的釋放工作。若是將釋放資源工作放入方法map()中，也會導致Mapper任務在解析、處理每一行文字後釋放資源，而且在下一行文字解析前還要重複初始化，導致反覆重複，程式執行效率不高！
*************

情況2：

[[email protected] q1]$ vi ceshi.txt
2
8
8
3
2
3
5
3
0
2
7
[[email protected] q1]$ hadoop fs -put ceshi.txt /input

java程式碼：

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Max {
	
    public static class MaxMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable> {
        public long max = Long.MIN_VALUE;
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            max = Math.max(Long.parseLong(value.toString()), max);
        }
        protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
            context.write(new LongWritable(max), NullWritable.get());
        }
    }

    public static class MaxReducer extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable> {
        public long max = Long.MIN_VALUE;
        public void reduce(LongWritable key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            max = Math.max(max, key.get());
        }
        protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
            context.write(new LongWritable(max), NullWritable.get());
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
            System.err.println("Usage: Max <in> [<in>...] <out>");
            System.exit(2);
        }

        Job job = Job.getInstance(conf, "Max");
        job.setJarByClass(Max.class);
        job.setMapperClass(MaxMapper.class);
        job.setCombinerClass(MaxReducer.class);
        job.setReducerClass(MaxReducer.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(NullWritable.class);

        for (int i = 0; i < otherArgs.length - 1; ++i) {
            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }
        FileOutputFormat.setOutputPath(job,
                new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

[[email protected] q1]$ /usr/jdk1.7.0_25/bin/javac Max.java
[[email protected] q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar Max*class
[[email protected] q1]$ hadoop jar xx.jar Max /input/ceshi.txt /output

[[email protected] q1]$ hadoop fs -cat /output/part-r-00000
8

二、求和（Sum）

[[email protected] q1]$ vi ceshi.txt
2
8
8
3
2
3
5
3
0
2
7
[[email protected] q1]$ hadoop fs -put ceshi.txt /input

java程式碼：

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Sum {

    public static class SumMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable> {
        public long sum = 0;
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            sum += Long.parseLong(value.toString());
        }
        protected void cleanup(Context context) throws IOException, InterruptedException {
            context.write(new LongWritable(sum), NullWritable.get());
        }
    }

    public static class SumReducer extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable> {
        public long sum = 0;
        public void reduce(LongWritable key, Iterable<NullWritable> values, Context context)
                throws IOException, InterruptedException {
            sum += key.get();
        }
        protected void cleanup(Context context) throws IOException, InterruptedException {
            context.write(new LongWritable(sum), NullWritable.get());
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
            System.err.println("Usage: Sum <in> [<in>...] <out>");
            System.exit(2);
        }

        Job job = Job.getInstance(conf, "Sum");
        job.setJarByClass(Sum.class);
        job.setMapperClass(SumMapper.class);
        job.setCombinerClass(SumReducer.class);
        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(NullWritable.class);

        for (int i = 0; i < otherArgs.length - 1; ++i) {
            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }
        FileOutputFormat.setOutputPath(job,
                new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

[[email protected] q1]$ hadoop fs -cat /output/part-r-00000
43

三、平均值（Avg）
情況1：
[[email protected] q1]$ vi math.txt
zs 80
ls 90
ww 95
[[email protected] q1]$ vi china.txt
zs 60
ls 65
ww 90
[[email protected] q1]$ hadoop fs -put math.txt /input
[[email protected] q1]$ hadoop fs -put china.txt /input

java程式碼：

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 
public class Score {
 
    public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
        // 實現map函式
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            // 將輸入的純文字檔案的資料轉化成String
            String line = value.toString();
            // 將輸入的資料首先按行進行分割
            StringTokenizer tokenizerArticle = new StringTokenizer(line, "\n");
            // 分別對每一行進行處理
            while (tokenizerArticle.hasMoreElements()) {
                // 每行按空格劃分
                StringTokenizer tokenizerLine = new StringTokenizer(tokenizerArticle.nextToken());
                String strName = tokenizerLine.nextToken();// 學生姓名部分
                String strScore = tokenizerLine.nextToken();// 成績部分
                Text name = new Text(strName);
                int scoreInt = Integer.parseInt(strScore);
                // 輸出姓名和成績
                context.write(name, new IntWritable(scoreInt));
            }
        }
    }
 
    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
        // 實現reduce函式
        public void reduce(Text key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {
            int sum = 0;
            int count = 0;
            Iterator<IntWritable> iterator = values.iterator();
            while (iterator.hasNext()) {
                sum += iterator.next().get();// 計算總分
                count++;// 統計總的科目數
            }
            int average = (int) sum / count;// 計算平均成績
            context.write(key, new IntWritable(average));
        }
    }
 
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("mapred.jar","Score.jar");
        
        Job job = new Job(conf, "Score Average");
        job.setJarByClass(Score.class);
 
        // 設定Map、Combine和Reduce處理類
        job.setMapperClass(Map.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);
 
        // 設定輸出型別
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
 
        // 將輸入的資料集分割成小資料塊splites，提供一個RecordReder的實現
        job.setInputFormatClass(TextInputFormat.class);
        // 提供一個RecordWriter的實現，負責資料輸出
        job.setOutputFormatClass(TextOutputFormat.class);
 
        // 設定輸入和輸出目錄
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

[[email protected] q1]$ /usr/jdk1.7.0_25/bin/javac Score.java
[[email protected] q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar Score*class
[[email protected] q1]$ hadoop jar xx.jar Score /input/* /output

[[email protected] q1]$ hadoop fs -cat /output/part-r-00000
ls 77
ww 92
zs 70

補充：迭代器（Iterator）
　　迭代器是一種設計模式，它是一個物件，它可以遍歷並選擇序列中的物件，而開發人員不需要了解該序列的底層結構。迭代器通常被稱為“輕量級”物件，因為建立它的代價小。
　　Java中的Iterator功能比較簡單，並且只能單向移動：
　　(1) 使用方法iterator()要求容器返回一個Iterator。第一次呼叫Iterator的next()方法時，它返回序列的第一個元素。注意：iterator()方法是java.lang.Iterable介面,被Collection繼承。
　　(2) 使用next()獲得序列中的下一個元素。
　　(3) 使用hasNext()檢查序列中是否還有元素。
　　(4) 使用remove()將迭代器新返回的元素刪除。
　　Iterator是Java迭代器最簡單的實現，為List設計的ListIterator具有更多的功能，它可以從兩個方向遍歷List，也可以從List中插入和刪除元素。
1.建立集合：
Collection c = new ArrayList<String>();
2.新增元素：
c.add("hehehe");
c.add("huhuhu");
c.add("wawawa");
3.獲取集合的迭代器：
Iterator iterator = c.iterator();
4.進行遍歷：
while(iterator.hasNext())//如果仍有元素可以迭代，則返回 true
{
System.out.println(iterator.next());//返回迭代的下一個元素。
}

情況2：

[[email protected] q1]$ vi ceshi.txt
2
8
8
3
2
3
5
3
0
2
7
[[email protected] q1]$ hadoop fs -put ceshi.txt /input

java程式碼：

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Average {
	
    public static class AvgMapper extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
        public long sum = 0;
        public long count = 0;
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            sum += Long.parseLong(value.toString());
            count += 1;
        }
        protected void cleanup(Context context) throws IOException, InterruptedException {
            context.write(new LongWritable(sum), new LongWritable(count));
        }
    }

    public static class AvgCombiner extends Reducer<LongWritable, LongWritable, LongWritable, LongWritable> {
        public long sum = 0;
        public long count = 0;
        public void reduce(LongWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            sum += key.get();
            for (LongWritable v : values) {
                count += v.get();
            }
        }
        protected void cleanup(Context context) throws IOException, InterruptedException {
            context.write(new LongWritable(sum), new LongWritable(count));
        }
    }

    public static class AvgReducer extends Reducer<LongWritable, LongWritable, DoubleWritable, NullWritable> {
        public long sum = 0;
        public long count = 0;
        public void reduce(LongWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            sum += key.get();
            for (LongWritable v : values) {
                count += v.get();
            }
        }
        protected void cleanup(Context context) throws IOException, InterruptedException {
            context.write(new DoubleWritable(new Double(sum)/count), NullWritable.get());
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
            System.err.println("Usage: Avg <in> [<in>...] <out>");
            System.exit(2);
        }

        Job job = Job.getInstance(conf, "Avg");
        job.setJarByClass(Average.class);
        job.setMapperClass(AvgMapper.class);
        job.setCombinerClass(AvgCombiner.class);
        job.setReducerClass(AvgReducer.class);

        //注意這裡:由於Mapper與Reducer的輸出Key,Value型別不同,所以要單獨為Mapper設定型別
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setOutputKeyClass(DoubleWritable.class);
        job.setOutputValueClass(NullWritable.class);

        for (int i = 0; i < otherArgs.length - 1; ++i) {
            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }
        FileOutputFormat.setOutputPath(job,
                new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

[[email protected] q1]$ /usr/jdk1.7.0_25/bin/javac Average.java
[[email protected] q1]$ /usr/jdk1.7.0_25/bin/jar cvf xx.jar Average*class
[[email protected] q1]$ hadoop jar xx.jar Average /input/ceshi.txt /output

[[email protected] q1]$ hadoop fs -cat /output/part-r-00000
3.909090909090909

MapReduce功能實現六---最大值(Max)、求和(Sum)、平均值(Avg)

MapReduce功能實現六---最大值(Max)、求和(Sum)、平均值(Avg)

Java:定義五個函式，分別實現①計算陣列的最大值MAX②最小值MIN③陣列和SUM④拼接兩個陣列a和b⑤擷取a陣列的一部分

清北學堂模擬賽d2t4 最大值(max)

編寫一求兩個數的最大值的函式Max，要求用模板實現對任意資料型別資料都可應用該函式求取結果，

騰訊面試題：模板實現一個棧，要求Push（入棧），Pop（出棧），Max（返回最大值的操作）的時間複雜度為O(1)

預處理、const、static與sizeof-用#define實現宏並求最大值和最小值

【劍指offer】滑動窗口的最大值，C++實現

Python：lambda表達式實現求兩個變量的最大值

關於MYSQL group by 分組按時間取最大值的實現方法！

js獲取陣列最大值（Math.max.apply(null, arr)）

【Java】劍指offer(59-2) 佇列的最大值《劍指Offer》Java實現合集《劍指Offer》Java實現合集

JS中實現陣列取最大值

max和zip一起使用取出字典中最大值的key、

初學Java：計算陣列中最大值 ---計算陣列中最小值----計算陣列之和----實現兩個陣列----拼接陣列擷取

python實現查詢使用者輸入的數字中的最大值

用c語言實現求數值的最大值。

Haskell --- 利用遞迴實現選出List中最大值

滑動視窗最大值的golang實現

Python：lambda表示式實現求兩個變數的最大值

【佇列】滑動視窗的最大值序列,帶max函式的佇列

MapReduce功能實現六---最大值(Max)、求和(Sum)、平均值(Avg)

相關推薦