1. 程式人生 > >使用maven開發hadoop的mapreduce應用

使用maven開發hadoop的mapreduce應用

1. 建立maven應用。

2. centos裡將下載好的maven資源包線上解壓。資源包下載地址:
tar -zxvf m2.tar.gz

3. 配置pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>cn.itcast.hadoop.mr</groupId>
  <artifactId>datacount</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  
  <dependencies>
  	<dependency>
  		<groupId>org.apache.hadoop</groupId>
  		<artifactId>hadoop-common</artifactId>
  		<version>2.2.0</version>
  	</dependency>
  	
  	<dependency>
  		<groupId>org.apache.hadoop</groupId>
  		<artifactId>hadoop-mapreduce-client-core</artifactId>
  		<version>2.2.0</version>
  	</dependency>
  </dependencies>
</project>

4. mapreduct程式碼:

package cn.itcast.hadoop.mr.dc;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;


import org.apache.hadoop.io.Writable;


/**
 * @author root
 *
 */
public class DataBean implements Writable {
	
	private String telNo;
	private long upPayLoad;
	private long downPayLoad;
	private long totalPayLoad;
	
	
	public DataBean(){}
	
	public DataBean(String telNo, long upPayLoad, long downPayLoad) {
		this.telNo = telNo;
		this.upPayLoad = upPayLoad;
		this.downPayLoad = downPayLoad;
		this.downPayLoad = upPayLoad+downPayLoad;
	}


	@Override
	public String toString(){
		return this.upPayLoad + "\t" + this.downPayLoad + "\t" + this.totalPayLoad;
	}
	
	//serialize
	public void write(DataOutput out) throws IOException {
		out.writeUTF(telNo);
		out.writeLong(upPayLoad);
		out.writeLong(downPayLoad);
		out.writeLong(totalPayLoad);
	}


	//deserialize
	public void readFields(DataInput in) throws IOException {
		this.telNo = in.readUTF();
		this.upPayLoad = in.readLong();
		this.downPayLoad = in.readLong();
		this.totalPayLoad = in.readLong();
	}
	
	public String getTelNo() {
		return telNo;
	}


	public void setTelNo(String telNo) {
		this.telNo = telNo;
	}


	public long getUpPayLoad() {
		return upPayLoad;
	}


	public void setUpPayLoad(long upPayLoad) {
		this.upPayLoad = upPayLoad;
	}


	public long getDownPayLoad() {
		return downPayLoad;
	}


	public void setDownPayLoad(long downPayLoad) {
		this.downPayLoad = downPayLoad;
	}


	public long getTotalPayLoad() {
		return totalPayLoad;
	}


	public void setTotalPayLoad(long totalPayLoad) {
		this.totalPayLoad = totalPayLoad;
	}


	


}


package cn.itcast.hadoop.mr.dc;


import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class DataCount {


	public static class DCMapper extends Mapper<LongWritable, Text, Text, DataBean>{


		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			//accept 
			String line = value.toString();
			//split
			String[] fields = line.split("\t");
			String tel = fields[1];
			long up = Long.parseLong(fields[8]);
			long down = Long.parseLong(fields[9]);	
			DataBean bean = new DataBean(tel, up, down);
			//send
			context.write(new Text(tel), bean);
		}
		
	}
	
	public static class DCReducer extends Reducer<Text, DataBean, Text, DataBean>{


		@Override
		protected void reduce(Text key, Iterable<DataBean> values, Context context)
				throws IOException, InterruptedException {
			long up_sum = 0;
			long down_sum = 0;
			for(DataBean bean : values){
				up_sum += bean.getUpPayLoad();
				down_sum += bean.getDownPayLoad();
			}
			DataBean bean = new DataBean("", up_sum, down_sum);
			context.write(key, bean);
		}
		
		
	}
	
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(DataCount.class);
		
		job.setMapperClass(DCMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(DataBean.class);
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		
		job.setReducerClass(DCReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(DataBean.class);
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.waitForCompletion(true);
		
	}
}