windows本地執行hadoop的MapReduce程式
阿新 • • 發佈:2018-11-29
1.下載hadoo安裝到windows本地
地址 https://archive.apache.org/dist/hadoop/core/hadoop-2.6.0/hadoop-2.6.0.tar.gz
2. 解壓之後進行設定環境變數
新建 HADOOP_HOME D:\software\hadoop-2.6.0
Path中增加 %HADOOP_HOME%\bin 和 %HADOOP_HOME%\sbin
3.範例程式碼
專案結構
pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.xiaobao</groupId> <artifactId>hadooptest</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.0</version> </dependency> </dependencies> </project>
log4j.properties
log4j.rootLogger = debug,stdout log4j.appender.stdout = org.apache.log4j.ConsoleAppender log4j.appender.stdout.Target = System.out log4j.appender.stdout.layout = org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
WordCountMapper.java
package com.xiaobao.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] words = value.toString().split("\\s+");
for (String word : words) {
context.write(new Text(word), new IntWritable(1));
}
}
}
WordCountReducer.java
package com.xiaobao.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(new Text(key), new IntWritable(sum));
}
}
RunJob.java
package com.xiaobao.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class RunJob {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(configuration);
Job job = Job.getInstance(configuration);
job.setJarByClass(RunJob.class);
job.setJobName("wordCount");
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path("/wordcount/input"));
Path outPath = new Path("/wordcount/output");
if (fs.exists(outPath)) {
fs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);
boolean completion = job.waitForCompletion(true);
if (completion) {
System.out.println("執行完成");
}
}
}
4. 然後在專案的根目錄下建立 D:\wordcount\input 和 D:\wordcount\output資料夾input 資料夾中放入 words.txt內容為
hello world
hello hadoop
hadoop yes
hadoop no
5. 然後啟動RunJob的main方法,會出現報錯
[ERROR] 2018-11-13 10:12:43,981 method:org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:373)
Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:355)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:370)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:363)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79)
at org.apache.hadoop.security.Groups.parseStaticMapping(Groups.java:104)
at org.apache.hadoop.security.Groups.<init>(Groups.java:86)
at org.apache.hadoop.security.Groups.<init>(Groups.java:66)
at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:280)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:271)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:248)
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:763)
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:748)
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:621)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2753)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2745)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2611)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:169)
at com.xiaobao.wordcount.RunJob.main(RunJob.java:24)
5.看日誌的錯誤資訊是沒有winutils.exe這個檔案,需要下載winutils.exe檔案,注意要和自己hadoop版本一致,否則還是報錯
下載地址: https://github.com/steveloughran/winutils 從github下載下來壓縮包,將對應版本的bin下的檔案拷貝並替換自己本 地的hadoop的bin目錄中檔案
6. 繼續執行main方法,執行成功
此時 D:\wordcount\output 資料夾中出現執行完成的檔案_SUCCESS 和 part-r-00000 等檔案, _SUCCESS 代表執行成功, part-r-00000中存單詞統計的結果
hadoop 3
hello 2
no 1
world 1
yes 1