MapReduce部分練習使用API程式設計示例之PageRank

阿新 • • 發佈：2018-12-11

package com.sxt.hadoop.mr.pagerank;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class RunJob {

	public static enum Mycounter {
		my
	}

	public static void main(String[] args) {
		
		Configuration conf = new Configuration(true);
		conf.set("mapreduce.app-submission.corss-paltform", "true");
		//如果分散式執行,必須打jar包
		//且,client在叢集外非hadoop jar 這種方式啟動,client中必須配置jar的位置
		conf.set("mapreduce.framework.name", "local");
		//這個配置,只屬於,切換分散式到本地單程序模擬執行的配置
		//這種方式不是分散式,所以不用打jar包
		
		
		double d = 0.00001;
		int i = 0;
		while (true) {
			i++;
			try {
				conf.setInt("runCount", i);
				
				FileSystem fs = FileSystem.get(conf);
				Job job = Job.getInstance(conf);				
				job.setJarByClass(RunJob.class);
				job.setJobName("pr" + i);
				job.setMapperClass(PageRankMapper.class);
				job.setReducerClass(PageRankReducer.class);
				job.setMapOutputKeyClass(Text.class);
				job.setMapOutputValueClass(Text.class);
//				job.setJar("/ooxx/jar");
				//使用了新的輸入格式化類
				job.setInputFormatClass(KeyValueTextInputFormat.class);
				
				
				Path inputPath = new Path("/data/pagerank/input/");
				
				if (i > 1) {
					inputPath = new Path("/data/pagerank/output/pr" + (i - 1));
				}
				FileInputFormat.addInputPath(job, inputPath);

				Path outpath = new Path("/data/pagerank/output/pr" + i);
				if (fs.exists(outpath)) {
					fs.delete(outpath, true);
				}
				FileOutputFormat.setOutputPath(job, outpath);

				boolean f = job.waitForCompletion(true);
				if (f) {
					System.out.println("success.");
					long sum = job.getCounters().findCounter(Mycounter.my).getValue();
					
					System.out.println(sum);
					double avgd = sum / 4000.0;
					if (avgd < d) {
						break;
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}

	static class PageRankMapper extends Mapper<Text, Text, Text, Text> {
		protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
			
			int runCount = context.getConfiguration().getInt("runCount", 1);
			
			//A	   B D
			
			
			
			//A	   B D 0.3
			
			
			//K:A
			//V:B D
			//K:A
			//V:0.3 B D
			String page = key.toString();
			Node node = null;
			if (runCount == 1) {
				node = Node.fromMR("1.0" , value.toString());
			} else {
				node = Node.fromMR(value.toString());
			}
			// A   1.0 B D  傳遞老的pr值和對應的頁面關係
			context.write(new Text(page), new Text(node.toString()));
			
			if (node.containsAdjacentNodes()) {
				//票面值的計算
				double outValue = node.getPageRank() / node.getAdjacentNodeNames().length;
				for (int i = 0; i < node.getAdjacentNodeNames().length; i++) {
					String outPage = node.getAdjacentNodeNames()[i];
					// B:0.5
					// D:0.5    頁面A投給誰，誰作為key，val是票面值，票面值為：A的pr值除以超連結數量
					context.write(new Text(outPage), new Text(outValue + ""));
				}
			}
		}
	}

	static class PageRankReducer extends Reducer<Text, Text, Text, Text> {
		protected void reduce(Text key, Iterable<Text> iterable, Context context)
				throws IOException, InterruptedException {
			
			//相同的key為一組
			//key：頁面名稱比如B 
			//包含兩類資料
			//B:1.0 C  //頁面對應關係及老的pr值
			
			//B:0.5		//投票值
			//B:0.5
			
			
			double sum = 0.0;
			
			Node sourceNode = null;
			for (Text i : iterable) {
				Node node = Node.fromMR(i.toString());
				if (node.containsAdjacentNodes()) {
					sourceNode = node;
				} else {
					sum = sum + node.getPageRank();
				}
			}

			// 4為頁面總數
			double newPR = (0.15 / 4.0) + (0.85 * sum);
			System.out.println("*********** new pageRank value is " + newPR);

			// 把新的pr值和計算之前的pr比較
			double d = newPR - sourceNode.getPageRank();

			int j = (int) (d * 1000.0);
			j = Math.abs(j);
			System.out.println(j + "___________");
			context.getCounter(Mycounter.my).increment(j);

			sourceNode.setPageRank(newPR);
			context.write(key, new Text(sourceNode.toString()));
			//A  B D 0.8
		}
	}
}

package com.sxt.hadoop.mr.pagerank;

import java.io.IOException;
import java.util.Arrays;

import org.apache.commons.lang.StringUtils;

public class Node {

	private double pageRank = 1.0;
	private String[] adjacentNodeNames;

	public static final char fieldSeparator = '\t';

	public double getPageRank() {
		return pageRank;
	}

	public Node setPageRank(double pageRank) {
		this.pageRank = pageRank;
		return this;
	}

	public String[] getAdjacentNodeNames() {
		return adjacentNodeNames;
	}

	public Node setAdjacentNodeNames(String[] adjacentNodeNames) {
		this.adjacentNodeNames = adjacentNodeNames;
		return this;
	}

	public boolean containsAdjacentNodes() {
		return adjacentNodeNames != null && adjacentNodeNames.length > 0;
	}

	@Override
	public String toString() {
		StringBuilder sb = new StringBuilder();
		sb.append(pageRank);

		if (getAdjacentNodeNames() != null) {
			sb.append(fieldSeparator).append(
					StringUtils.join(getAdjacentNodeNames(), fieldSeparator));
		}
		return sb.toString();
	}

	// value =1.0 B D
	public static Node fromMR(String value) throws IOException {
		String[] parts = StringUtils.splitPreserveAllTokens(value,
				fieldSeparator);
		if (parts.length < 1) {
			throw new IOException("Expected 1 or more parts but received "
					+ parts.length);
		}
		Node node = new Node().setPageRank(Double.valueOf(parts[0]));
		if (parts.length > 1) {
			node.setAdjacentNodeNames(Arrays
					.copyOfRange(parts, 1, parts.length));
		}
		return node;
	}
	public static Node fromMR(String v1,String v2) throws IOException {
		return fromMR(v1+fieldSeparator+v2);
		//1.0	B D
	}
}

MapReduce部分練習使用API程式設計示例之PageRank

package com.sxt.hadoop.mr.pagerank; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs

Muduo 網路程式設計示例之八：用 Timing wheel 踢掉空閒連線

Muduo 網路程式設計示例之八：Timing wheel 踢掉空閒連線陳碩 (giantchen_AT_gmail) 這是《Muduo 網路程式設計示例》系列的第八篇文章，原計劃講檔案傳輸，這裡插入一點計劃之外的內容。本文介紹如何使用 timing wheel 來踢

PAT練習基礎程式設計題目之求單鏈表結點的階乘和

求單鏈表結點的階乘和導語：連結串列一直是我的弱項，做這道題做了比較久，程式本身不難，只是對指標的理解不到位，況且很久沒有接觸指標相關，遺忘了不少。風蕭蕭兮易水寒，壯士一去兮不復還。 - 本題要求實現一個函式，求單鏈表L結點的階乘和。這裡預設所有結點的值非

MapReduce部分API程式設計練習（好友推薦）

1、主方法 package com.bjsxt.FOF; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path;

MapReduce利用API程式設計的框架示例

排序器分組器分割槽器，打成K,V,P的形式 buffer緩衝區是環形的，buffer環形緩衝區環形緩衝區完了的時候，需要手動寫一個比較器comporator 之後是combiner聚合 //這裡是配置層 Configuration conf=new

Windows API程式設計——最簡單的視窗程式框架示例

用Windows API實現一個自定義視窗也需要這麼一大堆最基本的程式框架： #include <windows.h> static LPCTSTR lpszAppName = "windows API 視窗示例";//視窗名稱 HBRUSH hBlueB

【程式設計記錄，親測有效】希望為程式設計資料缺失的部分貢獻自己的微薄之力……

【友情連結】親筆小說：《90後的18年》【博文GIF動畫的錄製方法】先用螢幕錄製軟體Freez Screen Video Capture錄製螢幕，再把得到的AVI，利用GIF Movie Gear轉化成GIF

DirectX遊戲程式設計入門——第二部分（遊戲程式設計工具箱) ——精靈程式設計之透明的精靈

本系列文章由 net_assassin 整理編寫，轉載請註明出處。 http://blog.csdn.net/net_assassin/article/category/1100363 作者：net_assassin 郵箱

2017 程式設計實習之C++部分作業題彙總

1、C01:看上去好坑的運算子過載總時間限制: 1000ms 記憶體限制: 65536kB 描述程式填空 #include <iostream> using namespace std; class MyInt {

VS2013和halcon聯合程式設計示例程式之hough_lines

#include<iostream> #include"HalconCpp.h" using namespace std; using namespace HalconCpp; int main() {HImage image;image.ReadImage("

六類運算子+鍵盤錄入+流程控制語句之順序結構與選擇結構中的if與switch語句部分+練習+面試題（java基礎語法篇二）

一。運算子（對常量和變數進行操作的符號）1.1算術運算子嘗試敲出以下程式碼，看看輸出的結果分別是什麼？相信通過敲出上面程式碼並執行，已經看出 /運算子是取商，而且商是向下取整，%則是取餘數沒理解的話做下下面兩個題試試（答案在下面，先自己計算出答案再敲出來驗證）第一題答案 9

Windows API 程式設計之建立一個windows視窗

直接上程式碼。 /************************************************************************************** * 問題：使用windows API函式建立一個window

sqlite之我見--C/C++ API介面示例 .

在之前的兩篇博文中，分別介紹了SQLITE的基礎知識和操作，C/C++ 的一些常用API 本文中，我會給大家用幾個小程式示例SQLITE C/C++ API的使用。 1.我們看下最簡單的sqlite程式，通過sqlite3_open， sqlite3_exec， sq

MapReduce程式設計例項之WordCount

1.MapReduce計算框架 2.例項WordCount package org.apache.hadoop.examples; import java.io.IOException; import java.util.StringTokeni

Hbase程式設計入門之MapReduce

refer to: http://blog.csdn.net/darke1014/article/details/8665484 Tips：如果用Eclipse開發，需要加入hadoop所有的jar包以及HBase三個jar包（hbase，zooKooper，proto

linux多執行緒程式設計之一多執行緒資料同步及相關api使用示例

多執行緒的使用在編碼過程中非常常見，如果快速的理解和掌握linux下的多執行緒程式設計呢？下文即將為您揭曉。一.linux多執行緒基本的建立及相關API使用： 1.執行緒的建立: int pthread_create(pthread_t thread, const pthread_attr_t attr,

【Java併發程式設計】之八：多執行緒環境中安全使用集合API（含程式碼）

在集合API中，最初設計的Vector和Hashtable是多執行緒安全的。例如：對於Vector來說，用來新增和刪除元素的方法是同步的。如果只有一個執行緒與Vector的例項互動，那麼，要求獲取

[hadoop2.7.1]I/O之SequenceFile最新API程式設計例項（寫入、讀取）

寫操作根據上一篇的介紹，在hadoop2.x之後，hadoop中的SequenceFile.Writer將會逐漸摒棄大量的createWriter（）過載方法，而整合為更為簡潔的createWriter()方法，除了配置引數外，其他的引數統統使用SequenceFil

java併發程式設計學習之髒讀程式碼示例及處理

public class Thread10 { public static void main(String[] args) { Thread10_Entity entity = new Thread10_Entity();

Mapreduce程式設計1之WordCount

Mapreduce是hadoop的計算框架，對資料的處理操作都要在這裡程式設計來實現功能。這是我學習的第一個程式，也算是入門程式，相當於其他語言的helloworld，雖然還有很多不懂的地方，但相信通過以後的學習能夠懂更多東西。 WordCount 實現

MapReduce部分練習使用API程式設計示例之PageRank

相關推薦