用Apache Spark進行大資料處理之用Spark GraphX圖資料分析(6)
阿新 • • 發佈:2019-01-29
import org.apache.spark._
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import java.util.Calendar
// 先匯入邊
val graph = GraphLoader.edgeListFile(sc, "data/page-rank-yt-data.txt")
// 計算圖中邊和節點等資訊
val vertexCount = graph.numVertices
val vertices = graph.vertices
vertices.count()
val edgeCount = graph.numEdges
val edges = graph.edges
edges.count()
//
// 現在來看看某些Spark GraphX API,如triplets、indegrees和outdegrees。
//
val triplets = graph.triplets
triplets.count()
triplets.take(5)
val inDegrees = graph.inDegrees
inDegrees.collect()
val outDegrees = graph.outDegrees
outDegrees.collect()
val degrees = graph.degrees
degrees.collect()
// 用迭代次數作為引數
val staticPageRank = graph.staticPageRank(10)
staticPageRank.vertices.collect()
Calendar.getInstance().getTime()
val pageRank = graph.pageRank(0.001).vertices
Calendar.getInstance().getTime()
// 輸出結果中前5個元素
println(pageRank.top(5).mkString("\n"))