1. 程式人生 > >社交網路影響力最大化——貪心演算法實現(Python實現)

社交網路影響力最大化——貪心演算法實現(Python實現)

#!/usr/bin/env python
# coding=UTF-8
from nose.tools import *
from networkx import *
from linear_threshold_clime import *
from linear_threshold  import *
import math

#計算圖中邊的權重
def Buv_calculate(G,u,v):
    out_deg_all = G.out_degree()  # 獲取所有節點的出度
    in_edges_all = G.in_edges()  # 獲取所有的入邊
    out_deg = out_deg_all[u]  # 獲取節點e[0]的出度
    in_edges = in_edges_all._adjdict[v]  # 獲取節點e[1]的所有的入邊
    edges_dict = dict(in_edges)
    in_all_edges = list(edges_dict.keys())  # 獲取節點e[1]的所有入邊節點並存入列表
    out_deg_sum = 0
    for i in in_all_edges:  # 求節點e[1]所有入邊節點的出度和
        out_deg_sum = out_deg_sum + out_deg_all[i]
    return out_deg / out_deg_sum

#計算每個節點AP的值
def AP_calculate(node):
    data = []
    data.append(node)
    layer_two_nodes = linear_threshold(G, data, 2)  # 獲取每個節點的兩層出度節點數
    data.pop()
    del layer_two_nodes[-1]
    length = 0
    for i in range(len(layer_two_nodes)):
        length = length + len(layer_two_nodes[i])
    lengths = length - len(layer_two_nodes[0])

    out_edges = out_edges_all._adjdict[node]  # 獲得節點的出邊
    edges_dict = dict(out_edges)
    out_all_edges = list(edges_dict.keys())  # 將節點的所有出邊存入列表
    Buv_sum = 0
    for out_edge in out_all_edges:   # 計算該節點所有出邊的Buv的值
        Buv = Buv_calculate(G, node, out_edge)
        Buv_sum = Buv_sum + Buv
    cha_sum = 1 + math.e ** (-Buv_sum)
    AP = lengths + cha_sum
    return AP

def select_layers(G,node_list_sorted,k1):   #選擇前k/2個節點的演算法實現
    seed_nodes = []  # 存貯種子節點
    for i in range(k1):
        data = []
        data.append(node_list_sorted[0][0])
        seed_nodes.append(node_list_sorted[0][0])
        layers = linear_threshold(G, data)        # 使用LT演算法
        data.pop()
        del layers[-1]
        layers_activate = []
        for i in layers:  # 將種子節點和啟用的節點存入layers_activate列表
            for j in i:
                layers_activate.append(j)

        for m in node_list_sorted:  # 刪除node_list_sorted中的layers_activate
            for n in layers_activate:
                if m[0] == n:
                    node_list_sorted.remove(m)

    return seed_nodes,node_list_sorted

def _select_others(seed_nodes, other_nodes,k2):     #貪心演算法選擇剩餘k/2個節點
    for m in range(k2):
        all_nodes = list(other_nodes)   #將所有的節點儲存在all_nodes列表裡
        layers_activite = []    # 儲存每個節點的啟用節點列表
        lengths = []         # 儲存每個節點的啟用列表長度
        datas = []
        for i in all_nodes:   #遍歷所有的節點,分別求出每個節點對應的啟用節點集以及啟用節點集的長度
            data = []
            data.append(i)
            datas.append(i)
            data_test=seed_nodes+data
            layers = linear_threshold(G,data_test)
            data.pop()
            del layers[-1]
            length = 0
            layer_data = []
            for j in range(len(layers)):
                length = length + len(layers[j])
                layer_data = layer_data + layers[j]
            length_s = length - len(layers[0])
            for s in range(len(layers[0])):
                del layer_data[0]
            layers_activite.append(layer_data)
            lengths.append(length_s)
        layers_max = layers_activite[lengths.index(max(lengths))]  # 獲取被啟用節點數最多的列表
        seed_nodes.append(datas[lengths.index(max(lengths))])      # 獲取被啟用節點最多的子節點
        for i in all_nodes:       #該迴圈是刪除所有節點中seed_nodes節點集
            if i in seed_nodes:
                del all_nodes[all_nodes.index(i)]
        other_nodes=all_nodes
    return seed_nodes,layers_max     #返回值是貪心演算法求得的子節點集和該子節點集啟用的最大節點集


if __name__=='__main__':
    datasets=[]
    f=open("Wiki-Vote.txt")
    data=f.read()
    rows=data.split('\n')
    for row in rows:
      split_row=row.split('\t')
      name=(int(split_row[0]),int(split_row[1]))
      datasets.append(name)
    G=networkx.DiGraph()
    G.add_edges_from(datasets)   #根據資料集建立有向圖

    allnodes=G.nodes()           #獲取圖中所有的節點
    all_nodes=list(allnodes)
    out_edges_all = G.out_edges() # 獲取所有節點的出邊
    node_dict={}               #將節點和節點對應的AP值存入字典


    for node in all_nodes:        #遍歷所有節點獲得每個節點的AP值
        AP=AP_calculate(node)
        node_dict[node]=AP

    node_list_sorted=sorted(node_dict.items(),key=lambda d:d[1],reverse=True)  #對字典按AP值進行由大到小排序
    '''
    f=open('data.txt','r')
    data=f.read()
    node_list_sorted=list(data)
    '''
    k=input('Please input inter of k=')
    seed_nodes, node_list_sorted=select_layers(G,node_list_sorted,k/2)
    other_nodes=[]
    '''
    for i in range(len(node_list_sorted)):
        other_nodes.append(node_list_sorted[i][0])
    '''
    for i in seed_nodes:
        if i in all_nodes:
            all_nodes.remove(i)
    other_nodes=all_nodes

    seed_nodes, layers_max=_select_others(seed_nodes,other_nodes,k/2)
    layer=linear_threshold(G,seed_nodes)
    lenth=len(layers_max)
    print(seed_nodes)
    print(layers_max)
    print(lenth)
    print(layer)