1. 程式人生 > >同步圖運算框架GraphLite例項之PageRank演算法

同步圖運算框架GraphLite例項之PageRank演算法

1.PageRank演算法介紹

PageRank,網頁排名,又稱網頁級別、Google左側排名或佩奇排名,是一種由[1] 根據網頁之間相互的超連結計算的技術,而作為網頁排名的要素之一,以Google公司創辦人拉里·佩奇(Larry Page)之姓來命名。Google用它來體現網頁的相關性和重要性,在搜尋引擎優化操作中是經常被用來評估網頁優化的成效因素之一。Google的創始人拉里·佩奇和謝爾蓋·布林於1998年在斯坦福大學發明了這項技術。
PageRank通過網路浩瀚的超連結關係來確定一個頁面的等級。Google把從A頁面到B頁面的連結解釋為A頁面給B頁面投票,Google根據投票來源(甚至來源的來源,即連結到A頁面的頁面)和投票目標的等級來決定新的等級。簡單的說,一個高等級的頁面可以使其他低等級頁面的等級提升。

2.PageRank演算法原理

這裡寫圖片描述
這裡寫圖片描述
這裡寫圖片描述

3.GraphLite圖運算系統的PageRank演算法實現

/**
 * @file PageRankVertex.cc
 * This file implements the PageRank algorithm using graphlite API.
 */
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "GraphLite.h"

#define VERTEX_CLASS_NAME(name) PageRankVertex##name
#define EPS 1e-6 //class PageRankVertexInputFormatter: public InputFormatter class VERTEX_CLASS_NAME(InputFormatter): public InputFormatter { public: int64_t getVertexNum() { unsigned long long n; sscanf(m_ptotal_vertex_line, "%lld", &n);// read one long long number ,and let n=it
printf("at class PageRankVertexInputFormatter: m_total_vertex= %lld \n",n); m_total_vertex= n; return m_total_vertex; } int64_t getEdgeNum() { unsigned long long n; sscanf(m_ptotal_edge_line, "%lld", &n);// read one long long number ,and let n=it m_total_edge= n; printf("at class PageRankVertexInputFormatter: m_total_edge= %lld \n",n); return m_total_edge; } int getVertexValueSize() { m_n_value_size = sizeof(double); return m_n_value_size; } int getEdgeValueSize() { m_e_value_size = sizeof(double); return m_e_value_size; } int getMessageValueSize() { m_m_value_size = sizeof(double); return m_m_value_size; } void loadGraph() { unsigned long long last_vertex; unsigned long long from; unsigned long long to; double weight = 0; double value = 1;//initial PageRank int outdegree = 0;//outdegree of node const char *line= getEdgeLine(); // Get edge line, for user. Read from current file offset. // return a string of edge in local subgraph // Note: modify this if an edge weight is to be read // modify the 'weight' variable sscanf(line, "%lld %lld", &from, &to);//from=source node, to=dest node addEdge(from, to, &weight);//add one edge form->to weight=0 last_vertex = from; ++outdegree; printf("Excute loadGraph() , m_total_edge= %ld\n",m_total_edge); for (int64_t i = 1; i < m_total_edge; ++i) { line= getEdgeLine();// Get edge line, for user. Read from current file offset. // return a string of edge in local subgraph // Note: modify this if an edge weight is to be read // modify the 'weight' variable sscanf(line, "%lld %lld", &from, &to); if (last_vertex != from) { addVertex(last_vertex, &value, outdegree);//addVertex and it's PageRank value,outdegree last_vertex = from; outdegree = 1; } else { ++outdegree; } addEdge(from, to, &weight); } addVertex(last_vertex, &value, outdegree); } }; class VERTEX_CLASS_NAME(OutputFormatter): public OutputFormatter { public: void writeResult() { int64_t vid; double value; char s[1024]; for (ResultIterator r_iter; ! r_iter.done(); r_iter.next() ) { r_iter.getIdValue(vid, &value); int n = sprintf(s, "%lld: %f\n", (unsigned long long)vid, value); writeNextResLine(s, n); } } }; // An aggregator that records a double value tom compute sum // <double> set the type of m_global and m_local value is double class VERTEX_CLASS_NAME(Aggregator): public Aggregator<double> { public: void init() { m_global = 0; //aggregator global value of AggrValue m_local = 0; //aggregator local value of AggrValue } void* getGlobal() { return &m_global; } void setGlobal(const void* p) { m_global = * (double *)p; } void* getLocal() { return &m_local; } void merge(const void* p) { m_global += * (double *)p; printf("excute merge() on PageRankAggregator class, m_global= %lf\n",m_global); } void accumulate(const void* p) { m_local += * (double *)p; printf("excute accumulate() on PageRankAggregator class, m_local= %lf\n",m_local); } }; class VERTEX_CLASS_NAME(): public Vertex <double, double, double> { public: void compute(MessageIterator* pmsgs) { printf("Excute compute(), MessageIterrator *pmsgs, pmsgs.size= %d\n ",pmsgs->m_vector_size); double val;//PageRank value if (getSuperstep() == 0) { //Get current superstep number val= 1.0; //initial all vertex's PageRank=1 u maybe not initial val there,because we initial val at loadGraph() printf("getSuperstep()==0 val=%lf\n",getValue()); } else { if (getSuperstep() >= 2) { double global_val = * (double *)getAggrGlobal(0); //Get global value of aggregator index=0 if (global_val < EPS) { //judge convergence printf("at compute() on PageRankVertex class, global_val==%lf\n",global_val); voteToHalt(); return; } } double sum = 0; for ( ; ! pmsgs->done(); pmsgs->next() ) { sum += pmsgs->getValue();//getValue() on MessageIterator class return message value. } val = 0.15 + 0.85 * sum; double acc = fabs(getValue() - val);//getValude on Vertex class return vertex value accumulateAggr(0, &acc);// Accumulate local value of some aggregator. first param is Aggregator index * mutableValue() = val; } //set new PageRank value and then send Message // * mutableValue() = val; const int64_t n = getOutEdgeIterator().size();//Get an out-edge iterator.size() sendMessageToAllNeighbors(val / n);//R_v/L_v R_v=value L_v=n } }; class VERTEX_CLASS_NAME(Graph): public Graph { public: VERTEX_CLASS_NAME(Aggregator)* aggregator; public: // argv[0]: PageRankVertex.so // argv[1]: <input path> // argv[2]: <output path> void init(int argc, char* argv[]) { setNumHosts(5); //machine count=5, one master and 4 workers setHost(0, "localhost", 1411); setHost(1, "localhost", 1421); setHost(2, "localhost", 1431); setHost(3, "localhost", 1441); setHost(4, "localhost", 1451); if (argc < 3) { //the number of param printf ("Usage: %s <input path> <output path>\n", argv[0]); exit(1); } m_pin_path = argv[1];//input file path m_pout_path = argv[2];//output file path aggregator = new VERTEX_CLASS_NAME(Aggregator)[1]; //define class array PageRankAggregator[1] regNumAggr(1);//set m_aggregator_cnt=param, aggregator count regAggr(0, &aggregator[0]); // m_paggregator[0]= second param ,type: pointers of AggregatorBase } void term() { delete[] aggregator; } }; /* STOP: do not change the code below. */ extern "C" Graph* create_graph() { Graph* pgraph = new VERTEX_CLASS_NAME(Graph); pgraph->m_pin_formatter = new VERTEX_CLASS_NAME(InputFormatter); pgraph->m_pout_formatter = new VERTEX_CLASS_NAME(OutputFormatter); pgraph->m_pver_base = new VERTEX_CLASS_NAME(); return pgraph; } extern "C" void destroy_graph(Graph* pobject) { delete ( VERTEX_CLASS_NAME()* )(pobject->m_pver_base); delete ( VERTEX_CLASS_NAME(OutputFormatter)* )(pobject->m_pout_formatter); delete ( VERTEX_CLASS_NAME(InputFormatter)* )(pobject->m_pin_formatter); delete ( VERTEX_CLASS_NAME(Graph)* )pobject; }