1.一些基礎方法和屬性
import networkx as nx import matplotlib.pyplot as plt G = nx.Graph() # 創建空的無向圖 # G = nx.DiGraph() # 創建空的有向圖 # G = G.to_undirected() # 有向圖變無向圖 # 增加節點 G.add_node(1) # 每次增加一個節點 G.add_nodes_from([3,5,8,6]) # iterable container # (node, node_attribute_dict) 給節點附上屬性(weights, labels, direct...) G.add_nodes_from([(3, {'color':'red'}), (2, {'color':'green'})]) print(list(G.nodes)) # 獲取圖中的所有節點 G.nodes[1]['label']='a' # 增加節點屬性 print(G.nodes.data()) # 獲得節點的屬性 [(1, {'label': 'a'}), (3, {'color': 'red'}), (5, {}), (8, {}), (6, {}), (2, {'color': 'green'})] # 增加edges G.add_edge(1,3) G.add_edges_from([(1,3),(2,8),(1,5),(2,6,{'weight':3.1415}),(1,2),(0,9)])
# G.add_weighted_edges_from([(1,3,0.2),])
print(G[1]) # 以字典的形式返回節點1的鄰居
print(G.has_edge(1,3)) # 判斷節點對之間有沒有連邊 print(list(G.edges)) # 獲取所有邊,以列表的形式返回 print(list(G.adj[1])) # 節點1的鄰居節點 list(nx.neighbors(G, 1)) print(G.degree[1]) # 節點的度 3 G[1][2]['weight'] = 3.1 # 為邊添加權重 print(G.edges[(1,2)]) # 獲取邊的權重 {'weight': 3.1} print(G.number_of_nodes()) # 統計節點的數量 8 print(G.number_of_edges()) # 統計邊的數量 6 print(nx.shortest_path(G, 3, 8)) # 計算節點3和8的最短路徑 [3, 1, 2, 8] print(nx.shortest_path_length(G, 3, 8)) # 計算節點3和8最短路徑長度 3 sp = dict(nx.all_pairs_shortest_path(G))# 計算每個節點到鄰居節點的最短路徑print(nx.number_connected_components(G))# 統計圖中獨立鏈接塊的數量 2 nx.draw(G, with_labels=True) plt.show()
G=nx.from_pandas_edgelist(df, source='source', target='target', edge_attr=None, create_using=None)
df 至少包含源節點和目的節點兩列,可以包含edge的屬性,每一行代表一條鏈接
source\target: 源節點的列名稱 和 目的節點的列名稱;
edge_attr: edge屬性的列名稱,可以是 str or int, iterable, True, or None;
create_using: 創建的圖的類型,nx.Graph() 、nx.DiGraph() 、nx.MultiGraph()、nx.MultiDiGraph() 后兩種允許節點之間有兩條邊
畫圖函數
有的函數還可以單獨畫出節點,邊,節點的label,邊的label,可以點擊鏈接查看
nx.draw_networkx(G, pos=None,with_labels=True,labels=None, ) 這個是nx.draw()的加強版,可以標出節點的label和一些其他的特征
pos:圖的布局(節點位置算法)
nx.random_layout(G, center=None, dim=2, seed=None) 在[0,1)范圍內隨機生成節點的位置坐標
nx.spring_layout(G, k=None, pos=None, ) 等等
import pandas as pd import networkx as nx import matplotlib.pyplot as plt node_1 = [1,2,3,4,5] node_2 = [3,1,2,5,1] weight = [0.3, 2, 1.2, 3, 0.8] cost = ['a', 'b', 'c' ,'d' ,'e'] df = pd.DataFrame({'node_1':node_1, 'node_2':node_2, 'weight':weight, 'cost':cost}) G = nx.from_pandas_edgelist(df, 'node_1', 'node_2', edge_attr=True, create_using=nx.Graph()) print(G[1][3]['weight']) # 0.3 print(G[1][3]['cost']) # 'a' pos = nx.random_layout(G, seed=23) # 圖的布局;返回一個字典,key表示節點,value表示節點的位置 nx.draw(G, pos=pos, with_labels=True) # 在圖上標出邊的權重 labels = {e:G.edges[e]['weight'] for e in G.edges} # labels= nx.get_edge_attributes(G, 'weight') 獲取邊的labels nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=labels) # 在圖上標出labels, 這里的pos要和上面的pos保持一致,否則邊的權重會亂 plt.show()
3.創建圖的鄰接矩陣
A = nx.adjacency_matrix(G, nodelist=None, weight='weight')
nodelist:節點在行和列的位置;如果是None,則按G.nodes排序
L = nx.laplacian_matrix(G, nodelist=None, weight='weight')
拉普拉斯矩陣(L=D-A) D是節點的度的對角矩陣,A是節點的鄰接矩陣
print(A) 是按節點對的形式顯示;print(A.todense()) 按矩陣形式顯示
import pandas as pd import networkx as nx node_1 = [1,2,3,4,5] node_2 = [3,1,2,5,1] weight = [0.3, 2, 1.2, 1, 0.8] df = pd.DataFrame({'node_1':node_1, 'node_2':node_2, 'weight':weight}) G = nx.from_pandas_edgelist(df, 'node_1', 'node_2', edge_attr=True, create_using=nx.Graph()) nodes = list(G.nodes) print(nodes) # [1, 3, 2, 4, 5] adj_G = nx.to_numpy_matrix(G, nodelist=nodes) # 根據獲得的nodes的排序來創建鄰接矩陣,而不是順序排列 print(adj_G) ''' [[0. 0.3 2. 0. 0.8] [0.3 0. 1.2 0. 0. ] [2. 1.2 0. 0. 0. ] [0. 0. 0. 0. 1. ] [0.8 0. 0. 1. 0. ]] '''
4.從鄰接矩陣創建圖
G=nx.from_numpy_matrix(A, parallel_edges=False, create_using=None)
與dictionary,list,numpy,pandas有關的操作
import numpy as np import networkx as nx import matplotlib.pyplot as plt A = np.array([[0,0,3], [2,0,0], [0,1,0]]) # 從鄰接矩陣A創建有向權重圖,節點標號為[0,1,2] G = nx.from_numpy_matrix(A,create_using=nx.DiGraph()) pos = nx.random_layout(G, seed=23) nx.draw(G, pos=pos, with_labels=True) labels = nx.get_edge_attributes(G, 'weight') nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=labels) plt.show()
G=nx.read_edgelist(path, comments='#', delimiter=None,create_using=None,
nodetype=None, data=True, edgetype=None, encoding='utf-8')
path:文件名,如果是打開的文件,則要以‘rb‘的形式打開;Filenames ending in .gz or .bz2 will be uncompressed.
comments:如果文件的開頭不是節點對,則要在文件開頭加上 '#' ,否則會出錯,例如:#source,destination
delimiter:節點之間的分隔符,默認為空格;如果是其他字符作為分隔符,這里一定要設置,否則讀不出來
node_type:將節點數據從字符串轉化為 int, float 等數據類型 edgetype:將邊的數據從字符串轉化為 int, float 等數據類型
data:邊的權重等數據,是一個bool或者元組; 例如權重數據 data=(('weight', float),),第一個位置表示key-name,第二個位置表示data-type
nx.write_edgelist(G, path, comments='#', delimiter=' ', data=True, encoding='utf-8')
把一個圖按節點對的形式寫入文件
data:True表示寫入邊的屬性;False表示不寫入邊的屬性
G=nx.read_weighted_edgelist(G, path, comments='#', delimiter=' ', encoding='utf-8')
讀帶權重的文件
G=nx.parse_edgelist(lines, comments='#', delimiter=None, create_using=None, nodetype=None, data=True)
從列表里讀節點對數據,lines是包含節點對的列表 例如['1 2', '1 3', '2 3']
6.生成一個Barabási–Albert (BA network) 和一個Watts–Strogatz small-world graph
import networkx as nx import matplotlib.pyplot as plt G1 = nx.barabasi_albert_graph(50, 5) # nx.write_edgelist(G1,'dataset/BAnetwork', data=False) G2 = nx.watts_strogatz_graph(50, 2, 0.1) # nx.write_edgelist(G2, 'dataset/WSnetwork', data=False) nx.draw(G1, with_labels=True) plt.show() nx.draw(G2, with_labels=True) plt.show()
7.網絡分析
import networkx as nx import matplotlib.pyplot as plt G = nx.read_edgelist('dataset/twitter-edges.edges', create_using=nx.DiGraph()) N, K = G.order(), G.size() # 獲取節點的數量,邊的數量 avg_deg = float(K)/N # 計算average degree(這是有向網絡) 平均度是每個節點的度的總和除以節點總數 print(N, K, avg_deg) # 繪制冪律分布圖 in_degrees = G.in_degree() # 統計每個節點的in_degree out_degress = G.out_degree() inDegrees = {} outDegree = {} for i in in_degrees: inDegrees[i[0]] = i[1] for i in out_degress: outDegree[i[0]] = i[1] in_values = sorted(set(inDegrees.values())) inDegrees_values = list(inDegrees.values()) in_hist = [inDegrees_values.count(x) for x in in_values] # 統計每種度的數量 out_values = sorted(set(outDegree.values())) outDegrees_values = list(outDegree.values()) out_hist = [outDegrees_values.count(x) for x in out_values] plt.figure() plt.grid(True) plt.loglog(in_values, in_hist, 'ro-') # 繪制雙對數曲線;冪律分布圖 plt.loglog(out_values, out_hist, 'bv-') plt.legend(['In-degree', 'Out-degree']) plt.xlabel('Degree') plt.ylabel('Number of nodes') plt.show() # 分析聚類系數 G_ud = G.to_undirected() print('clust of 0:', nx.clustering(G_ud, '0')) # '0'節點的聚類系數 clust_coefficients = nx.clustering(G_ud) avg_clust = sum(clust_coefficients.values())/len(clust_coefficients) print('avg_clust:', avg_clust) # 平均聚類系數 print('avg_clust:', nx.average_clustering(G_ud))