SAN - Practice 1 Python網路分析例項基本操作
一個都是賣Tech書的網站:https://www.packtpub.com/all-products/all-books
一個很好用的表格轉換器:https://tableconvert.com/
一個pulp求解物流配送問題:https://zhuanlan.zhihu.com/p/92421927 (有建立Digraph的程式碼)
主要參考
Complex Network Analysis in Python: Recognize - Construct - Visualize - Analyze
參考code地址:https://github.com/PacktPublishing/Network-Science-with-Python-and-NetworkX-Quick-Start-Guide
以及其他很多的code:https://github.com/PacktPublishing
1 What is a network?
-
這一章大概介紹了歷史 基本概念
-
常見的網路:social network, flow network, similarity network, spatial network
常見的Tools:
Networkx, iGraph, graph-tool, networkit
graphtool | iGraph | NetworkX | NetworKit | |
---|---|---|---|---|
語言 | C/C++ | C/C++ | Python | C/C++ |
語言繫結 | Python | C, python, R | Python | C++, Python |
是否內設community detection | Y | Y | N | Y |
是否內設advanced layouts | Y | Y | N | N |
2 From Data to networks
基本的操作
import networkx as nx G = nx.Graph() DG = nx.Digraph() MG = nx.MultiGraph() G.add_node() G.add_nodes_from() G.graph G.nodes # G.nodes(data=Ture)返回節點的ID和屬性 G.edges(self, nbunch=None, data=False, default=None) # data True返回 (u, v, ddict) False 返回(u,v) 預設False unition(G1,G2) to_undirected(G) - return an undirected representation of G DiGraph.to_undirected(reciprocal=False, as_view=False) # reci如果是True代表keep edges that appear in both directions in the original digraph import matplotlib.pyplot as plt # pos (dictionary, optional) – A dictionary with nodes as keys and positions as values. If not specified a spring layout positioning will be computed. nx.draw_networkx(G[, pos, arrows, with_labels]) nx.draw_networkx_nodes(G, pos[, nodelist, …]) nx.draw_networkx_edges(G, pos[, edgelist, …]) nx.draw_networkx_labels(G, pos[, labels, …]) nx.spring_layout(G[, k, pos, fixed, …])
讀edge list
G = nx.read_edgelist('path')
G = nx. read_gexf('path', node_type=int)
# visualize as usual
pos = nx.spring_layout(G)
nx.draw_networkx(G, pos)
plt.gca().margins(0.2, 0.2)
或者讀Digraph就是
G = nx.read_edgelist('path', create_using = nx.DiGraph)
讀weighted list
# read edge list
G = nx.read_weighted_edgelist('path')
# extract weight
weights = [d['weight'] for u, v, w in G.edges(data = True)]
nx.draw_networkx (G, pos, width=4, edge_color=weights, edge_cmap=plt.cm.Greys)
plt.gca().margins(0.2, 0.2)
讀帶了顏色的edge list
G = nx.read_edgelist('path', data = [('weight',float), ('color', str)])
colors = [d['color'] for u,v,d in G.edges(data=True)]
3 Working with networks in NetworkX 一個例子
The graph class - undirected networks
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams.update({
'figure.figsize': (7.5, 7.5),
'axes.spines.right': False,
'axes.spines.left': False,
'axes.spines.top': False,
'axes.spines.bottom': False})
# Seed random number generator
import random
from numpy import random as nprand
seed = hash("Network Science in Python") % 2**32
nprand.seed(seed)
random.seed(seed)
import networkx as nx
'''
以nx庫中的Zachary’s Karate Club graph為例子
Each node in the returned graph has a node attribute 'club' that indicates the name of the club to which the member represented by that node belongs,
either 'Mr. Hi' or 'Officer'.
'''
G = nx.karate_club_graph()
# spring_layout(G, dim=2, k=None, pos=None, fixed=None, iterations=50, weight='weight', scale=1.0)
# k:float (default = none), 可以繼續加
karate_pos = nx.spring_layout(G, k=0.3)
nx.draw_networkx (G, karate_pos)
Output:
# 檢視property
G.number_of_nodes()
G.number_of_edges()
list(G.adj.items())
# check 裡面是否含有某個id
mr_hi = 0
G.has_node(mr_hi)
# check 他的neighbors
list(G.neighbors(mr_hi))
# output: [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31]
# 這個data有node id和attribute,
nx.get_node_attributes(G, 'club')
# output類似於:{0: 'Mr. Hi',
# 1: 'Mr. Hi',}
# 給nodes和edges加attributes
# 分成了兩個club
member_club = [
0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1
]
# 在原來的基礎上 assign club
for node_id in G.nodes:
G.nodes[node_id]['club'] = member_club[node_id]
# 給兩個組上色
node_colors = [
'r' if G.nodes[v]['club'] == 0
else 'b' for v in G
]
nx.draw(G, karate_pos, label=True, node_color=node_colors)
Output:
如何用不同的線段型別分開nodes之間的連線呢
'''
這一part主要是看那些是internal間的連線 哪些是external連線
於是首先要Iterate through all edges, check whether the edge endpoints have the same club attribute
已知預設 G 裡 一條邊的屬性儲存在 G.edges[v, w], v和w都是edge endpoints的node ids
'''
for v, w in G.edges:
if G.nodes[v]['club'] == G.nodes[w]['club']:
G.edges[v, w]['internal'] = True
else:
G.edges[v, w]['internal'] = False
# e contains a 2-tuple of node IDs
internal = [e for e in G.edges if G.edges[e]['internal']]
external = [e for e in G.edges if not G.edges[e]['internal']]
# networkx 一次只能畫一種線,所以需要再分開畫nodes edges 和labels
nx.draw_networkx_nodes(G, karate_pos, node_color=node_colors)
nx.draw_networkx_labels(G, karate_pos)
nx.draw_networkx_edges(G, karate_pos, edgelist = internal)
nx.draw_networkx_edges(G, karate_pos, edgelist=external, style='dashed')
由於這個club network沒有包含edge的strength 所以要def一個計算strength的函式
strength 又是與 number of neighbors 相關
Graph.neighbors(n) returns an iterator over all neighbors of node n (即 iter(G[n]))
set_node_attributes(G, values, name=None)--Sets node attributes from a given value or dictionary of values.
# 定義一個tie strength的函式來compute他的strength
def tie_strength (G, v, w):
v_neighbors = set(G.neighbors(v))
w_neighbors = set(G.neighbors(w))
return 1 + len (v_neighbors & w_neighbors)
# compute weight
for v, w in G. edges:
G.edges[v,w]['weight'] = tie_strength(G,v,w)
# Store weights in a lis
edge_weight = [G.edges[v,w]['weight'] for v, w in G.edges]
# 將edge_weight傳給spring_layout()
weighted_pos = nx.spring_layout(G, pos=karate_pos, k = 0.3, weight = 'weight')
# 畫一個network edge color 是由edge weights決定
'''
pos: dictionary, 也就是包含layout的資訊, nodelist, edgelist(default=G.edges())
node_size, node_color, node_shape, cmap: matplotlib colormap,
edge_color: color string, 可以是color string, 或已經specified的numeric values
edge_vmin,edge_vmax: floats, minimum and maximam for edge colormap scaling
'''
nx.draw_networkx(
G, weighted_pos, width = 8, node_color=node_colors, edge_color=edge_weight,
edge_cmap=plt.cm.Blues, edge_vmin=0, edge_vmax=6
)
nx. draw_networkx_edges(
G, weighted_pos, edgelist=internal, edge_color='gray'
)
nx.draw_networkx_edges(
G, weighted_pos, edgelist=external, edge_color='gray', style='dashed'
)