MapReduce——WordCount

阿新 • • 發佈：2018-12-19

新增依賴

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.6.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>2.6.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
      <version>2.6.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
      <version>2.6.0</version>
    </dependency>

一、jar方式

package Hadoop;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {

    //四個引數，前兩個為輸入<key,value>對，後兩個為輸出<key,value>對;
    //LongWritable、IntWritable、Text可視為Java 的long、int、String替代品;
    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
        //一個標記單詞個數的常量，值為1，這個常量也可以不定義，在後面程式直接用整數1代替，private final static定義的是常量;
        private final static IntWritable one = new IntWritable(1);
        //充當中間變數，儲存詞;
        private Text word = new Text();
        //map方法，key為偏移量，對value進行拆分，<span style="font-family: Arial, Helvetica, sans-serif;">context為上下文機制;</span>
        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            //對轉換的字串進行分隔;
            StringTokenizer itr = new StringTokenizer(value.toString());
            //利用迴圈函式進行依次處理;
            while (itr.hasMoreTokens()) {
                //返回從當前位置到下一個分隔符的字串;
                word.set(itr.nextToken());
                //如 context.write("hello",1);
                context.write(word, one);
            }
        }
    }

    //四個引數，前兩個為輸入<key,value>對，後兩個為輸出<key,value>對;
    public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
        //定義一個變數;
        private IntWritable result = new IntWritable();
        //reduce方法，key為如 "hello"，Iterable遍歷所有key的個數;
        public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
            //  用於記錄key個數的變數;
            int sum = 0;
            //求key的個數;
            for (IntWritable val : values) {
                sum += val.get();
            }
            //把sum個數存到result中去;
            result.set(sum);
            //如 context.write("hello",7);
            context.write(key, result);
        }
    }

    //主方法;
    public static void main(String[] args) throws Exception {
        //指定作業執行規範;
        Configuration conf = new Configuration();
        //這裡需要配置引數即輸入和輸出的HDFS的檔案路徑
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
            System.err.println("Usage: wordcount <in> [<in>...] <out>");
            System.exit(2);
        }
        //設定Job名稱、執行物件;
        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        //為job設定map類;
        job.setMapperClass(TokenizerMapper.class);
        //為job設定Combiner類;
        job.setCombinerClass(IntSumReducer.class);
        //為job設定 reduce類;
        job.setReducerClass(IntSumReducer.class);
        //設定輸出key型別;
        job.setOutputKeyClass(Text.class);
        //設定輸出value型別;
        job.setOutputValueClass(IntWritable.class);
        //設定輸入路徑;
        for (int i = 0; i < otherArgs.length - 1; ++i) {
            FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
        }
        //設定輸出路徑;
        FileOutputFormat.setOutputPath(job,new Path(otherArgs[otherArgs.length - 1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

生成jar，並將其拷貝到/usr/local/hadoop 目錄下，執行以下命令

hadoop  jar  /usr/local/hadoop/MavenMapReduceHelloWorld-1.0-SNAPSHOT.jar
Hadoop.WordCount /input /output

結果

二、IDEA 遠端執行

package Hadoop;
import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount2 {

    //四個引數，前兩個為輸入<key,value>對，後兩個為輸出<key,value>對;
    //LongWritable、IntWritable、Text可視為Java 的long、int、String替代品;
    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
        //一個標記單詞個數的常量，值為1，這個常量也可以不定義，在後面程式直接用整數1代替，private final static定義的是常量;
        private final static IntWritable one = new IntWritable(1);
        //充當中間變數，儲存詞;
        private Text word = new Text();
        //map方法，key為偏移量，對value進行拆分，<span style="font-family: Arial, Helvetica, sans-serif;">context為上下文機制;</span>
        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            System.out.println("Map key:" + key + ",value:" + value);
            //對轉換的字串進行分隔;
            StringTokenizer itr = new StringTokenizer(value.toString());
            //利用迴圈函式進行依次處理;
            while (itr.hasMoreTokens()) {
                //返回從當前位置到下一個分隔符的字串;
                word.set(itr.nextToken());
                //如 context.write("hello",1);
                context.write(word, one);
            }
        }
    }

    //四個引數，前兩個為輸入<key,value>對，後兩個為輸出<key,value>對;
    public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
        //定義一個變數;
        private IntWritable result = new IntWritable();
        //reduce方法，key為如 "hello"，Iterable遍歷所有key的個數;
        public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
            StringBuffer sb = new StringBuffer();
            sb.append("Reduce key:" + key + ",value:");
            //  用於記錄key個數的變數;
            int sum = 0;
            //求key的個數;
            for (IntWritable val : values) {
                sb.append(val.get()+" ");
                sum += val.get();
            }
            System.out.println(sb.toString());
            //把sum個數存到result中去;
            result.set(sum);
            //如 context.write("hello",7);
            context.write(key, result);
        }
    }

    //主方法;
    public static void main(String[] args) throws Exception {
        //指定作業執行規範;
        Configuration conf = new Configuration();

        conf.set("fs.defaultFS", "hdfs://192.168.255.128:9000");
        System.setProperty("HADOOP_USER_NAME", "root");
        //hadoop2.6 資料夾放在 hadoop2.6.rar 中
        System.setProperty("hadoop.home.dir", "E:/hadoop2.6");
        final String OUTPUT_PATH="hdfs://192.168.255.128:9000/output";
        Path outpath = new Path(OUTPUT_PATH);

        //清空原先的資料
        FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH),conf);
        if(fs.exists(outpath)){
            fs.delete(outpath,true);
        }

        //設定Job名稱、執行物件;
        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        //為job設定map類;
        job.setMapperClass(TokenizerMapper.class);
        //為job設定Combiner類;
        job.setCombinerClass(IntSumReducer.class);
        //為job設定 reduce類;
        job.setReducerClass(IntSumReducer.class);
        //設定輸出key型別;
        job.setOutputKeyClass(Text.class);
        //設定輸出value型別;
        job.setOutputValueClass(IntWritable.class);

        //設定輸入路徑
        FileInputFormat.addInputPath(job, new Path("hdfs://192.168.255.128:9000/input"));
        //設定輸出路徑;
        FileOutputFormat.setOutputPath(job,new  Path("hdfs://192.168.255.128:9000/output"));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

將 hadoop2.6/bin 資料夾下面的hadoop.dll拷貝到C:\Windows\ System32

hadoop---運行自帶的MapReduce WordCount程序

share inf .com 保存 mapred 技術分享 ima 上傳 img MapReduce入門程序：WordCount hadoop的share目錄下創建一個HDFS數據目錄，用於保存mapreduce的輸入文件創建一個目錄，用於保存m

大資料之八 hadoop MapReduce-WordCount

前兩篇中，我們瞭解了MapReduce的執行流程及其架構實現，今天我們就在本地通過經典例項WordCount來了解一下MapReduce的程式設計實現叢集配置 stop-dfs.sh 配置mapred-site.xml檔案 <!-- Ma

MapReduce——WordCount

新增依賴 <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId&g

MapReduce WordCount 練習max avg

max計算需求：輸出每天最高溫度的日期及溫度資料： 20170931 20.1 20170930 30.6 20170931 30.6 20170929 30.02 20170928 10.3 20170928 30.3 20170927 28.3 20170931 2

在Hadoop平臺中執行MapReduce WordCount程式

一、實驗名稱在Hadoop平臺執行MapReduce程式二、實驗過程 1.設定環境變數（1）編輯~/.bashrc檔案，新增下列語句 export HADOOP_HOME=/usr/local/hadoop export CLASSPATH=.:$JAVA_HOME/

初學MapReduce-WordCount案例遇到的問題

一、WordCount案例1.Driver類中容易發生導包錯誤//6指定輸入輸出路徑 FileInputFormat.setInputPaths(job, new Path(args [0])); FileOutputFormat.setOutputPath(job, n

Hadoop Mapreduce之WordCount實現

註意 com split gin 繼承 [] leo ring exce 1.新建一個WCMapper繼承Mapper public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritab

Hadoop MapReduce 官方教程 -- WordCount示例

get pre red oop hadoop apache tor ria pac Hadoop MapReduce 官方教程 -- WordCount示例： http://hadoop.apache.org/docs/r1.0.4/cn/mapred_tutorial.h

Python開發MapReduce系列（一）WordCount Demo

logs 3-9 line counter ota python開發 home num brush 　原創，轉發請註明出處。　　MapReduce是hadoop這只大象的核心，Hadoop 中，數據處理核心就是 MapReduce 程序設計模型。一個Map/Reduc

大數據學習——MapReduce配置及java代碼實現wordcount算法

鍵值 example nds clas spl key lru 這樣的 java_home ---恢復內容開始--- 配置MapReduce需要在之前配置的基礎上配置兩個xml文件一個是yarn-site.xml一個是mapred-site.xml，在之前配置的hadoop

MapReduce編寫wordcount程序代碼實現

經典 .com .class count -o args val ring xtend MapReduce經典案例代碼（wordcount）以經典的wordcount為例，通過自定義的mapper和reducer來實現單詞計數 package com.fwmagic.ma

Wordcount -- MapReduce example -- Reducer

ces overview exception ng- desire put exc result get() Reducer receives (key, values) pairs and aggregate values to a desired format, th

Java編程MapReduce實現WordCount

submit option next cte line 出現 ask text ide Java編程MapReduce實現WordCount 1.編寫Mapper package net.toocruel.yarn.mapreduce.wordcount; import

MapReduce 程式執行演示（示例PI程式 wordcount程式）

你說的9000埠應該指的是fs.default.name或fs.defaultFS（新版本）這一配置屬性吧，這個屬性是描述叢集中NameNode結點的URI(包括協議、主機名稱、埠號) 50070其實是在hdfs-site.xml裡面的配置引數dfs.namenode.http-address，

Hadoop基礎-MapReduce入門篇之編寫簡單的Wordcount測試程式碼

　　　　　　　　　　　　Hadoop基礎-MapReduce入門篇之編寫簡單的Wordcount測試程式碼　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　作者：尹正傑版權宣告：原創作品，謝絕轉載！否則將追究法律責任。　　　

MapReduce根據WordCount分析map和Reducer原理

Mapper 階段 package com.zyd.wc; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.

大資料篇：hadoop測試WordCount mapreduce出錯問題

[[email protected] ~]# hadoop jar /usr/local/hadoop-2.8.4/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.4.jar wordcount /data/wordcount /o

大資料入門（9）mapreduce計算wordcount的程式編寫

1、外部寫好的程式打Java jar 包，匯入jar sftp> put e:/wc.jar 2、建立文字進行計算 vi words.log hadoop fs -mkdir /wc hadoop fs -mkdir /wc/srcData/ 3、執行jar hadoop ja

Hadoop之MapReduce過程，單詞計數WordCount

單詞計數是最簡單也是最能體現MapReduce思想的程式之一，可以稱為MapReduce版“Hello World”，該程式的完整程式碼可以在Hadoop安裝包的src/example目錄下找到。單詞計數主要完成的功能：統計一系列文字檔案中每個單詞出現的次數，如下圖所示。 WordCo

大資料學習——MapReduce學習——字元統計WordCount

操作背景 jdk的版本為1.8以上 ubuntu12 hadoop2.5偽分佈安裝 Hadoop-Eclipse-Plugin 要在 Eclipse 上編譯和執行 MapReduce 程式，需要安裝 hadoop-eclipse-plugin，可下載 Github 上的 hadoop2x

MapReduce——WordCount

相關推薦