自定義InputFormat

OutputFormat

示例程式碼

package com.vip09;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class ScoreWritable implements WritableComparable<Object>{

	//在自定義的資料型別中，建議使用java原生的資料型別
	private float chinese ;
	private float math ;
	private float english ;
	private float physics ;
	private float chemistry ;
	
	//在自定義的資料型別中，必須要有一個無參的構造方法
	public ScoreWritable(){}
	
	public ScoreWritable(float chinese, float math, float english, float physics, float chemistry) {
		this.chinese = chinese;
		this.math = math;
		this.english = english;
		this.physics = physics;
		this.chemistry = chemistry;
	}

	public void set(float chinese, float math, float english, float physics, float chemistry){
		this.chinese = chinese;
		this.math = math;
		this.english = english;
		this.physics = physics;
		this.chemistry = chemistry;
	}
	
	public float getChinese() {
		return chinese;
	}

	public float getMath() {
		return math;
	}

	public float getEnglish() {
		return english;
	}

	public float getPhysics() {
		return physics;
	}

	public float getChemistry() {
		return chemistry;
	}

	//是在寫入資料的時候呼叫，進行序列化
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeFloat(chinese);
		out.writeFloat(math);
		out.writeFloat(english);
		out.writeFloat(physics);
		out.writeFloat(chemistry);
	}

	//該方法是在取出資料時呼叫，反序列化，以便生成物件
	@Override
	public void readFields(DataInput in) throws IOException {
		chinese = in.readFloat() ;
		math = in.readFloat() ;
		english = in.readFloat() ;
		physics = in.readFloat() ;
		chemistry = in.readFloat() ;
	}

	@Override
	public int compareTo(Object o) {
		// TODO Auto-generated method stub
		return 0;
	}

}

package com.vip09;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class ScoreCount extends Configured implements Tool{

	//map和reduce
	public static class ScoreMapper extends Mapper<Text, ScoreWritable, Text, ScoreWritable>{
		@Override
		protected void map(Text key, ScoreWritable value,
				Context context)
				throws IOException, InterruptedException {
			context.write(key, value);
		}
	}
	
	public static class ScoreReducer extends Reducer<Text, ScoreWritable, Text, Text>{
		private Text text = new Text() ;
		@Override
		protected void reduce(Text key, Iterable<ScoreWritable> value,
				Context context) throws IOException, InterruptedException {
			float totalScore = 0.0f ;
			float avgScore = 0.0f ;
			for (ScoreWritable sw : value) {
				totalScore = sw.getChinese() + sw.getEnglish() + sw.getMath() + sw.getPhysics() + sw.getChemistry() ;
				avgScore = totalScore/5 ;
			}
			text.set(totalScore + "\t" + avgScore);
			context.write(key, text);
		}
	}
	
	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = new Configuration() ;
		//刪除已經存在的輸出目錄
		Path mypath = new Path(args[1]) ;
		FileSystem hdfs = mypath.getFileSystem(conf);
		if(hdfs.isDirectory(mypath)){
			hdfs.delete(mypath, true) ;
		}
		
		Job job = Job.getInstance(conf, "scorecount") ;
		job.setJarByClass(ScoreCount.class);
		
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.setMapperClass(ScoreMapper.class);
		job.setReducerClass(ScoreReducer.class);
		
		//如果是自定義的型別，需要進行設定
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(ScoreWritable.class);
		
		//設定自定義的輸入格式
		job.setInputFormatClass(ScoreInputFormat.class);
		
		job.waitForCompletion(true) ;
		return 0;
	}

	public static void main(String[] args) throws Exception {
		String[] args0 = {"hdfs://192.168.153.111:9000/input5",
		"hdfs://192.168.153.111:9000/output15"} ;
		int res = ToolRunner.run(new ScoreCount(), args0) ;
		System.exit(res);
	}

}

package com.vip09;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class ScoreInputFormat extends FileInputFormat<Text, ScoreWritable> {

	//需要注意的是：
	/*
	 * 對於一個數據輸入格式，都需要一個對應的RecordReader
	 * 重寫createRecordReader()方法，其實也就是重寫其返回的物件
	 * 這裡就是自定義的ScoreRecordReader類，該類需要繼承RecordReader，實現資料的讀取
	 * */
	@Override
	public RecordReader<Text, ScoreWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return new ScoreRecordReader();
	}

}

package com.vip09;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.LineReader;

public class ScoreRecordReader extends RecordReader<Text, ScoreWritable>{

	public LineReader in ;				//行讀取器
	public Text lineKey ;				//自定義key型別
	public ScoreWritable linevalue ;	//自定義的value型別
	public Text line ;					//行資料
	
	//初始化方法，只執行一次
	@Override
	public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
		FileSplit fsplit = (FileSplit)split ;
		Configuration conf = context.getConfiguration();
		Path file = fsplit.getPath();
		FileSystem fs = file.getFileSystem(conf);
		FSDataInputStream filein = fs.open(file);
		in = new LineReader(filein, conf) ;
		line = new Text() ;
		lineKey = new Text() ;
		linevalue = new ScoreWritable() ;
	}

	//讀取每一行資料的時候，都會執行該方法
	//我們只需要根據自己的需求，重點編寫該方法即可，其他的方法比較固定，仿照就好
	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
		int linesize = in.readLine(line);
		if(linesize == 0){
			return false ;
		}
		String[] pieces = line.toString().split("\\s+") ;
		if(pieces.length != 7){
			throw new IOException("無效的資料") ;
		}
		//將學生的每門成績轉換為float型別
		float a =0 , b= 0 , c = 0 ,d = 0, e =0 ;
		try{
			a = Float.parseFloat(pieces[2].trim()) ;
			b = Float.parseFloat(pieces[3].trim()) ;
			c = Float.parseFloat(pieces[4].trim()) ;
			d = Float.parseFloat(pieces[5].trim()) ;
			e = Float.parseFloat(pieces[6].trim()) ;
		}catch(NumberFormatException nfe){
			nfe.printStackTrace(); 
		}
		lineKey.set(pieces[0] + "\t" + pieces[1]);	//完成自定義的key資料
		linevalue.set(a, b, c, d, e);				//封裝自定義的value資料
		return true;
	}

	@Override
	public Text getCurrentKey() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return lineKey;
	}

	@Override
	public ScoreWritable getCurrentValue() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return linevalue;
	}

	@Override
	public float getProgress() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return 0;
	}

	@Override
	public void close() throws IOException {
		if(in != null){
			in.close();
		}
	}

}

package com.vip09;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class MapReduceCaseEmail extends Configured implements Tool{

	public static class EmailMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
		private final static IntWritable one = new IntWritable(1) ;
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			context.write(value, one);
		}
	}
	
	public static class EmailReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
		private IntWritable result = new IntWritable() ;
		//輸出到多個檔案或多個資料夾，使用Multipleoutputs
		private MultipleOutputs<Text, IntWritable> mout ;
		
		@Override
		protected void setup(Context context)
				throws IOException, InterruptedException {
			mout = new MultipleOutputs<Text, IntWritable>(context) ;
		}
		
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Context context) throws IOException, InterruptedException {
			int begin = key.toString().indexOf("@") ;
			int end = key.toString().indexOf(".") ;
			if(begin >= end){
				return ;
			}
			
			//獲取郵箱類別，比如qq，163等
			String name = key.toString().substring(begin + 1, end);
			int sum = 0 ;
			for (IntWritable value : values) {
				sum += value.get() ;
			}
			result.set(sum);
			//baseoutputpath-r-nnnnn
			mout.write(key, result, name);
		}
		
		@Override
		protected void cleanup(Context context)
				throws IOException, InterruptedException {
			mout.close();
		}
	}
	
	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = new Configuration() ;
		//刪除已經存在的輸出目錄
		Path mypath = new Path(args[1]) ;
		FileSystem hdfs = mypath.getFileSystem(conf);
		if(hdfs.isDirectory(mypath)){
			hdfs.delete(mypath, true) ;
		}
		
		Job job = Job.getInstance(conf, "emailcount") ;
		job.setJarByClass(MapReduceCaseEmail.class);
		
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.setMapperClass(EmailMapper.class);
		job.setReducerClass(EmailReducer.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		job.waitForCompletion(true) ;
		return 0;
	}

	public static void main(String[] args) throws Exception {
		String[] args0 = {"hdfs://192.168.153.111:9000/input6",
		"hdfs://192.168.153.111:9000/output16"} ;
		int res = ToolRunner.run(new MapReduceCaseEmail(), args0) ;
		System.exit(res);
	}

}








 
 
              
           
              
              
            
            相關推薦
			   
            
            
            
 

    

    
    大資料（hadoop-自定義資料型別、檔案格式）
      
                                        
                                                
自定義InputFormat 
 

OutputFormat 
 
示例程式碼 
package com.vip09;

 

  
 

    

    
    SQL Server——保證資料的完整性（使用者自定義資料型別、使用規則、解除和刪除規則）
       
 
 
 目錄 
   
 一、使用者自定義資料型別 
 二、使用規則 
 規則和CHECK約束的比較： 
 三、解除和刪除規則 
 
 一、使用者自定義資料型別 
 使用者自己設計並實現的資料型別就是使用者自定義資料型別。舉例：當幾個表中要存同種資料型別時，並且保證他們有相同的資料型別、長度和 

  
 

    

    
    Hadoop——自定義資料型別，實現WritableComparable,  並且 分組，排序
      
                
http://blog.csdn.net/u014432433/article/details/51104026
1. 在進行mapreduce程式設計時key鍵往往用於分組或排序，當我們在進行這些操作時Hadoop內建的key鍵資料型別不能滿足需求時，
或針對用例優化自定 

  
 

    

    
    ios自定義物件（或自定義資料型別）歸檔
      
                

*  歸檔是一種很常用的檔案儲存方法，幾乎任何型別的物件都能夠被歸檔儲存（實際上是一種檔案儲存的形式），步驟如下


 1、物件要遵守NSCoding協議


 2、實現以下兩個方法：

解檔方法：

        -(id)initWithCoder:(NSCoder 

  
 

    

    
    Hive（一）資料型別、檔案格式和資料定義
      
							
							
							1、基本資料型別 
Hive支援多種不同長度的整型和浮點型資料型別，支援布林型別，也支援無長度限制的字串型別，後續的Hive增加了時間戳資料型別和二進位制陣列資料型別。 
 


和其他的SQL語言一樣，這些都是保留字。需要注意的是所有的這些資料型別都是對Jav 

  
 

    

    
    WebApi資料驗證——編寫自定義資料註解（Data Annotations）
      
                                        
                                                配合ModelState使用，關於使用方法，參考微軟文件 
https://docs.microsoft.com/en-us/as 

  
 

    

    
    SprinMVC轉發、重定向、收集date資料、自定義轉換器、檔案上傳、json資料轉換
       
 
  
  
 1.專案模組圖  2.完成SpringMVC的基本搭建 pom.xml（下載jar包的檔案） 
 <?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4. 

  
 

    

    
    用js將HTML的Table匯出為Excel（可自定義：表格樣式+Excel名稱）
      
                <html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<script type="text/javascript" la 

  
 

    

    
    vue外掛（自定義元件或js檔案使用），vue.extend()，vue.component()註冊元件
      
                一、vue.use()自定義元件或者引用第三方元件（即vue文件中的外掛）？參考：https://cn.vuejs.org/v2/guide/plugins.html1.1、使用第三方元件通常我們在vue裡面使用別人開發的元件，第一步就是install,第二步在main.js 

  
 

    

    
    Hadoop系列-MapReduce自定義資料型別（序列化、反序列化機制）（十二）
      
                


Github程式碼下載地址：


大家都知道，Hadoop中為Key的資料型別必須實現WritableComparable介面，而Value的資料型別只需要實現Writable介面即可；能做Key的一定可以做Value，能做Value的未必能做Key。但是具體應該怎麼應 

  
 

    

    
    Hadoop MapReduce自定義資料型別
      一 自定義資料型別的實現 
1.繼承介面Writable,實現其方法write()和readFields(), 以便該資料能被序列化後完成網路傳輸或檔案輸入/輸出； 
2.如果該資料需要作為主鍵key使用，或需要比較數值大小時，則需要實現WritalbeComparable介面,實現其方法write(),re 

  
 

    

    
    自定義資料型別 --- 列舉型別全解（swift2.3）
      
							
							
							自定義資料型別 — 列舉（swift）


  下面簡單的介紹列舉這一自定義資料型別的應用



列舉的基本語法
列舉中rawValue應用
列舉中associatedValue應用
可選型的實質型別就是列舉
列舉的遞迴應用




一：列舉的基礎語法

imp 

  
 

    

    
    偶然所得！C#後臺呼叫.Net Web API  [HttpPost] 傳參問題（基本資料型別引數+自定義實體型別引數）
      
                （說明：以下程式碼僅做參考）

現象：定義一個API介面規範，介面引數包含三個，兩個string型別引數，一個自定義實體型別引數，C#控制檯或Winform程式如何傳送POST請求，訪問API介面？

示例如下，含API介面規範：

下面看C#後臺如何請求該API：

1、先 

  
 

    

    
    javaSE_day8_構造方法_super關鍵字_final關鍵字_static關鍵字_內部類_訪問許可權和修飾符_程式碼塊_自定義資料型別
       
 
 1.構造方法 
 作用：用來給類的成員進行初始化操作 
 定義格式：許可權  方法名（引數列表）{ ... } //注意：方法的名字必須和類名完全一致，構造方法不允許寫返回值型別，void也不能寫 
 構造方法在什麼時候執行呢：在new物件的時候，自動執行，且 

  
 

    

    
    Centos6.10下Open-falcon學習記錄（一）——自定義資料採集、歷史查詢、程序監控
       
 
 記錄了學習過程，官方文件地址http://book.open-falcon.org/zh_0_2/usage/getting-started.html 
 另外還看了Open-falcon作者的寫的設計理念的文，見open-falcon編寫的整個腦洞歷程 
 1 自定義資料採集 
 自定義的資料要求 

  
 

    

    
    WPF--繫結自定義資料型別
       
  
  
 WPF作為資料繫結的目標： 使用DataContext屬性，表示當前控制元件中的每個元素都繫結此資料: 資料型別定義： 
 public class FamilyData
{
    private string m_familyName;
    private string m_widt 

  
 

    

    
    C++ protobuf 自定義資料型別的賦值
       
  
  
 對於C++ protobuf 自定義資料型別的賦值，有兩種方式 
  
  set_allocate_XXX 
  mutable_XXX 舉例說明 
  
 message SAT_JSON_CONFIG
{
required int32 AxisYMax 	= 1;
required i 

  
 

    

    
    3. CKeditor+ckfinder ---CKFinder原始碼修改上傳 自定義資料夾名（以時間年月YYYYMM為例）
       
 
 
 1 準備
 請自行參考  第2文章  此處就不做過多解釋了 ^_^
     2. CKeditor+ckfinder ---CKFinder原始碼修改自定義上傳檔名，連結如下
 修改上傳檔名
 2  查詢官網資料
 連結如下 

  
 

    

    
    分享知識-快樂自己：Liunx-大資料（Hadoop）初始化環境搭建
      大資料初始化環境搭建： 
一）：大資料（hadoop）初始化環境搭建 
二）：大資料（hadoop）環境搭建 
三）：執行wordcount案例 
四）：揭祕HDFS 
五）：揭祕MapReduce 
六）：揭祕HBase 
七）：HBase程式設計 
---------------------------- 

  
 

    

    
    分享知識-快樂自己：大資料（hadoop）環境搭建
      大資料 hadoop 環境搭建： 
一）：大資料（hadoop）初始化環境搭建 
二）：大資料（hadoop）環境搭建 
三）：執行wordcount案例 
四）：揭祕HDFS 
五）：揭祕MapReduce 
六）：揭祕HBase 
七）：HBase程式設計 
-----------------------

大資料（hadoop-自定義資料型別、檔案格式）

自定義InputFormat

OutputFormat

大資料（hadoop-自定義資料型別、檔案格式）

SQL Server——保證資料的完整性（使用者自定義資料型別、使用規則、解除和刪除規則）

Hadoop——自定義資料型別，實現WritableComparable, 並且分組，排序

ios自定義物件（或自定義資料型別）歸檔

Hive（一）資料型別、檔案格式和資料定義

WebApi資料驗證——編寫自定義資料註解（Data Annotations）

SprinMVC轉發、重定向、收集date資料、自定義轉換器、檔案上傳、json資料轉換

用js將HTML的Table匯出為Excel（可自定義：表格樣式+Excel名稱）

vue外掛（自定義元件或js檔案使用），vue.extend()，vue.component()註冊元件

Hadoop系列-MapReduce自定義資料型別（序列化、反序列化機制）（十二）

Hadoop MapReduce自定義資料型別

自定義資料型別 --- 列舉型別全解（swift2.3）

偶然所得！C#後臺呼叫.Net Web API [HttpPost] 傳參問題（基本資料型別引數+自定義實體型別引數）

javaSE_day8_構造方法_super關鍵字_final關鍵字_static關鍵字_內部類_訪問許可權和修飾符_程式碼塊_自定義資料型別

Centos6.10下Open-falcon學習記錄（一）——自定義資料採集、歷史查詢、程序監控

WPF--繫結自定義資料型別

C++ protobuf 自定義資料型別的賦值

3. CKeditor+ckfinder ---CKFinder原始碼修改上傳自定義資料夾名（以時間年月YYYYMM為例）

分享知識-快樂自己：Liunx-大資料（Hadoop）初始化環境搭建

分享知識-快樂自己：大資料（hadoop）環境搭建