1. 程式人生 > 其它 >Identification of Key Nodes Based on Integrating of Global and Local Information論文

Identification of Key Nodes Based on Integrating of Global and Local Information論文

  之前使用PageRank提取關鍵結點的方法是計算每個結點的PageRank的值,然後提取top10%的結點作為關鍵結點。但是PageRank是從全域性視角給網頁排序,從而得到的每個結點的PageRank的值。

  這篇文章結合複雜網路的區域性特徵和全域性特徵,通過標準化每個節點的度和中間性中心性,利用節點之間的連線強度將它們整合在一起。最後根據計算的SDB的值來表示結點的重要性。SDB值越大,結點在維護連線方面的作用變得更加重要

文章方法的主要思路就是計算SDB的值

SDB的值的計算公式如下:

 ki表示第i個結點的度,kavg表示結點度的平均數;bi表示第i個結點的介中心性,bavg表示結點的平均介中心性;Sij表示連線強度

其中ki(di)的計算如下:

 bi的計算:

Sij的計算如下:

 N(i)和N(j)分別表示結點i和j的鄰居數,ki和kj分別表示結點i和j的度.Sij=0表示結點i和j沒有公共鄰居,Sij=1表示結點i和j的鄰居完全相同。

實驗部分的話就是主要選取幾個指標進行對比的過程。下面給出復現程式碼:

import math

import pandas as pd

import utils.generate_network as gennet
import networkx as nx
import numpy as np
import os


# 1.根據圖檔案生成graph
def gen_network(file_name): """ 根據圖結點檔案生成圖 :param file_name: :return: """ G = gennet.gen_network(file_name) return G # 2. 計算每個結點的度ki 和所有節點的平均度 kavg def compute_degree(G): """ 計算給定圖中每個結點的度和平均度 :param G: :return: """ degree_list = nx.degree(G) degree_val
= [gd[1] for gd in list(degree_list)] avg_degree = np.average(degree_val) degree_dict = {} for dl in degree_list: degree_dict[dl[0]] = dl[1] return degree_dict, avg_degree # 3. 計算每個結點的介中心性 betweenness bi 和平均值 bavg def compute_betweenness(G): """ 計算給定圖中每個結點的介中心性 :param G: :return: """ # 介中心性 betweenness = nx.betweenness_centrality(G, normalized=False) # 平均介中心性 avg_betweenness = np.average(list(betweenness.values())) return betweenness, avg_betweenness # 4. 計算結點i,j 的連線強度 sij def compute_connectivity(G, degree_dict): """ 計算結點間的連線強度 :param G: 圖 networkx :param degree_dict: 結點度 {nodei: ki} sij = |N(i) ∩ N(j)| / √(ki * kj) :return: """ nodes_list = list(G.nodes) node_num = len(nodes_list) # 每個結點與其他結點對應的連線性 connectivity = {} for i in range(0, node_num): neighbor_i = list(G.neighbors(nodes_list[i])) ki = degree_dict.get(nodes_list[i]) conn_i = {} for j in range(0, node_num): neighbor_ni = list(G.neighbors(nodes_list[j])) k_ni = degree_dict.get(nodes_list[j]) # 鄰居結點交集 intersection = [ni for ni in neighbor_i if ni in neighbor_ni] # 求sij sij = len(intersection) / math.sqrt(ki * k_ni) conn_i[nodes_list[j]] = sij connectivity[nodes_list[i]] = conn_i return connectivity # 5. 計算SDB = sum(((ki/kavg + b/ bavg) + (1-sij) * (ki/kavg + b/ bavg)平方) j∈Neighbor(i) def compute_sdb(G, out_path): """ 計算SDB :param G: 圖 networkx :param out_path: 計算結果輸出檔案 :return: """ # 計算度 degree_dict, avg_degree = compute_degree(G) # 計算介中心性 betweenness_dict, avg_betweenness = compute_betweenness(G) # 計算連線強度 connectivity_dict = compute_connectivity(G, degree_dict) node_list = list(G.nodes) SDB = [] for node in node_list: ki = degree_dict.get(node) bi = betweenness_dict.get(node) connectivity = connectivity_dict.get(node) node_neighbor = list(G.neighbors(node)) SDBi = 0 for nn in node_neighbor: tmp = (ki / avg_degree + bi / avg_betweenness) sij = connectivity.get(nn) curr_sbd = math.pow(tmp + (1 - sij) * tmp, 2) SDBi += curr_sbd SDB.append(SDBi) sdb_value = pd.DataFrame(columns=["Node", "SDB"]) sdb_value["Node"] = node_list sdb_value["SDB"] = SDB # 按照SDB排序 sdb_value.sort_values(by="SDB", inplace=True, ascending=False) # sdb_value.to_csv(out_path, index=False, encoding="utf8") return sdb_value def get_top_10_percent(sdb_file, out_file): """ 獲取sdb值最大的前10%的結點list :param out_file: :param sdb_file: :return: """ sdb_data = pd.read_csv(sdb_file) data_num = sdb_data.shape[0] top_10_percent_num = int(data_num * 0.1) top_10_percent_data = sdb_data.head(top_10_percent_num) # top_10_percent_node = top_10_percent_data["node"].values.tolist() top_10_percent_data.to_csv(out_file, index=False, encoding="utf8")