阿新 • • 發佈:2018-11-07
用模組度modularity來衡量。模組度定義如下:模組度是評估一個社群網路劃分好壞的度量方法,它的物理含義是社群內節點的連邊數與隨機情況下的邊數只差,它的取值範圍是 [−1/2,1)。可以簡單地理解為社群內部所有邊權重和減去與社群相連的邊權重和。
# coding=utf-8 import collections import random def load_graph(path): G = collections.defaultdict(dict) with open(path) as text: for line in text: vertices = line.strip().split() v_i = int(vertices[0]) v_j = int(vertices[1]) w = float(vertices[2]) G[v_i][v_j] = w G[v_j][v_i] = w return G class Vertex(): def __init__(self, vid, cid, nodes, k_in=0): self._vid = vid self._cid = cid self._nodes = nodes self._kin = k_in # 結點內部的邊的權重 class Louvain(): def __init__(self, G): self._G = G self._m = 0 # 邊數量 self._cid_vertices = {} # 需維護的關於社群的資訊(社群編號,其中包含的結點編號的集合) self._vid_vertex = {} # 需維護的關於結點的資訊(結點編號,相應的Vertex例項) for vid in self._G.keys(): self._cid_vertices[vid] = set([vid]) self._vid_vertex[vid] = Vertex(vid, vid, set([vid])) self._m += sum([1 for neighbor in self._G[vid].keys() if neighbor > vid]) def first_stage(self): mod_inc = False # 用於判斷演算法是否可終止 visit_sequence = self._G.keys() random.shuffle(list(visit_sequence)) while True: can_stop = True # 第一階段是否可終止 for v_vid in visit_sequence: v_cid = self._vid_vertex[v_vid]._cid k_v = sum(self._G[v_vid].values()) + self._vid_vertex[v_vid]._kin cid_Q = {} for w_vid in self._G[v_vid].keys(): w_cid = self._vid_vertex[w_vid]._cid if w_cid in cid_Q: continue else: tot = sum( [sum(self._G[k].values()) + self._vid_vertex[k]._kin for k in self._cid_vertices[w_cid]]) if w_cid == v_cid: tot -= k_v k_v_in = sum([v for k, v in self._G[v_vid].items() if k in self._cid_vertices[w_cid]]) delta_Q = k_v_in - k_v * tot / self._m # 由於只需要知道delta_Q的正負,所以少乘了1/(2*self._m) cid_Q[w_cid] = delta_Q cid, max_delta_Q = sorted(cid_Q.items(), key=lambda item: item[1], reverse=True)[0] if max_delta_Q > 0.0 and cid != v_cid: self._vid_vertex[v_vid]._cid = cid self._cid_vertices[cid].add(v_vid) self._cid_vertices[v_cid].remove(v_vid) can_stop = False mod_inc = True if can_stop: break return mod_inc def second_stage(self): cid_vertices = {} vid_vertex = {} for cid, vertices in self._cid_vertices.items(): if len(vertices) == 0: continue new_vertex = Vertex(cid, cid, set()) for vid in vertices: new_vertex._nodes.update(self._vid_vertex[vid]._nodes) new_vertex._kin += self._vid_vertex[vid]._kin for k, v in self._G[vid].items(): if k in vertices: new_vertex._kin += v / 2.0 cid_vertices[cid] = set([cid]) vid_vertex[cid] = new_vertex G = collections.defaultdict(dict) for cid1, vertices1 in self._cid_vertices.items(): if len(vertices1) == 0: continue for cid2, vertices2 in self._cid_vertices.items(): if cid2 <= cid1 or len(vertices2) == 0: continue edge_weight = 0.0 for vid in vertices1: for k, v in self._G[vid].items(): if k in vertices2: edge_weight += v if edge_weight != 0: G[cid1][cid2] = edge_weight G[cid2][cid1] = edge_weight self._cid_vertices = cid_vertices self._vid_vertex = vid_vertex self._G = G def get_communities(self): communities = [] for vertices in self._cid_vertices.values(): if len(vertices) != 0: c = set() for vid in vertices: c.update(self._vid_vertex[vid]._nodes) communities.append(c) return communities def execute(self): iter_time = 1 while True: iter_time += 1 mod_inc = self.first_stage() if mod_inc: self.second_stage() else: break return self.get_communities() if __name__ == '__main__': G = load_graph(r'C:\Users\程勇\PycharmProjects\graduation\data\graph.txt') algorithm = Louvain(G) communities = algorithm.execute() print(communities)