DGL學習(五): DGL構建異質圖

本文轉載自查看原文 2020-07-23 15:29 1882 圖神經網絡

使用dgl.heterograph()構建異質圖，其參數是一個字典，key是一個三元組(srctype , edgetype, dsttype), 這個三元組被稱為規范邊類型( canonical edge types)。value 是一堆源數組和目標數組。節點是從零開始的整數ID，不同類型的節點ID具有單獨的計數。

import numpy as np
import dgl
import scipy.sparse as sp
import networkx as nx

ratings = dgl.heterograph(
    {('user', '+1', 'movie') : (np.array([0, 0, 1]), np.array([0, 1, 0])),
     ('user', '-1', 'movie') : (np.array([2]), np.array([1]))})

也可以從稀疏矩陣/networkX構造異質圖。

## 從稀疏矩陣構造圖
plus1 = sp.coo_matrix(([1, 1, 1], ([0, 0, 1], [0, 1, 0])), shape=(3, 2))
minus1 = sp.coo_matrix(([1], ([2], [1])), shape=(3, 2))
ratings = dgl.heterograph(
    {('user', '+1', 'movie') : plus1,
     ('user', '-1', 'movie') : minus1})

## 從networkX構造圖
plus1 = nx.DiGraph()
plus1.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
plus1.add_nodes_from(['m0', 'm1'], bipartite=1)
plus1.add_edges_from([('u0', 'm0'), ('u0', 'm1'), ('u1', 'm0')])
ratings = dgl.heterograph(
    {('user', '+1', 'movie') : plus1,
     ('user', '-1', 'movie') : minus1})

使用ACM異質圖數據集：

import scipy.io
import urllib.request

data_url = 'https://data.dgl.ai/dataset/ACM.mat'
data_file_path = '/tmp/ACM.mat'

urllib.request.urlretrieve(data_url, data_file_path)
data = scipy.io.loadmat(data_file_path)
print(list(data.keys()))

['__header__', '__version__', '__globals__', 'TvsP', 'PvsA', 'PvsV', 'AvsF', 'VvsC', 'PvsL', 'PvsC', 'A', 'C', 'F', 'L', 'P', 'T', 'V', 'PvsT', 'CNormPvsA', 'RNormPvsA', 'CNormPvsC', 'RNormPvsC', 'CNormPvsT', 'RNormPvsT', 'CNormPvsV', 'RNormPvsV', 'CNormVvsC', 'RNormVvsC', 'CNormAvsF', 'RNormAvsF', 'CNormPvsL', 'RNormPvsL', 'stopwords', 'nPvsT', 'nT', 'CNormnPvsT', 'RNormnPvsT', 'nnPvsT', 'nnT', 'CNormnnPvsT', 'RNormnnPvsT', 'PvsP', 'CNormPvsP', 'RNormPvsP']

A代表作者， P代表論文， C代表會議，L是主題代碼; 邊存儲為鍵XvsY下的SciPy稀疏矩陣，其中X和Y可以是任何節點類型代碼。
輸出PvsA (paper - author) 的一些統計量。

print(type(data['PvsA']))
print('#Papers:', data['PvsA'].shape[0])
print('#Authors:', data['PvsA'].shape[1])
print('#Links:', data['PvsA'].nnz)

轉化Scipy Matrix 為 dgl.heterograph()。

pa_g = dgl.heterograph({('paper', 'written-by', 'author') : data['PvsA']})
# equivalent (shorter) API for creating heterograph with two node types:
pa_g = dgl.bipartite(data['PvsA'], 'paper', 'written-by', 'author')

打印出類型名稱和其他結構信息。

print('Node types:', pa_g.ntypes)
print('Edge types:', pa_g.etypes)
print('Canonical edge types:', pa_g.canonical_etypes)

# 節點和邊都是從零開始的整數ID，每種類型都有其自己的計數。要區分不同類型的節點和邊緣，需要指定類型名稱作為參數。
print(pa_g.number_of_nodes('paper'))

# 如果規范邊類型名稱是唯一可區分的，則可以將其簡化為邊類型名稱。
print(pa_g.number_of_edges(('paper', 'written-by', 'author')))
print(pa_g.number_of_edges('written-by'))
## 獲得論文#1 的作者
print(pa_g.successors(1, etype='written-by'))

Node types: ['paper', 'author']
Edge types: ['written-by']
Canonical edge types: [('paper', 'written-by', 'author')]
12499
37055
37055
tensor([3532, 6421, 8516, 8560])

Metagraph

## Metagraph（或網絡模式）是異質圖結構的一個概覽。 被用作異質圖的模板，它描述了網絡中存在多少種對象以及可能存在的鏈接。
print(G.metagraph)

Graph(num_nodes={'author': 17431, 'paper': 12499, 'subject': 73},
      num_edges={('paper', 'written-by', 'author'): 37055, ('author', 'writing', 'paper'): 37055, ('paper', 'citing', 'paper'): 30789, ('paper', 'cited', 'paper'): 30789, ('paper', 'is-about', 'subject'): 12499, ('subject', 'has', 'paper'): 12499},
      metagraph=[('author', 'paper'), ('paper', 'author'), ('paper', 'paper'), ('paper', 'paper'), ('paper', 'subject'), ('subject', 'paper')])

半監督節點分類(Relational-GCN)

搭建異質圖神經網絡:

import dgl.function as fn

class HeteroRGCNLayer(nn.Module):
    def __init__(self, in_size, out_size, etypes):
        super(HeteroRGCNLayer, self).__init__()
        # W_r for each relation
        self.weight = nn.ModuleDict({
                name : nn.Linear(in_size, out_size) for name in etypes
            })

    def forward(self, G, feat_dict):
        # The input is a dictionary of node features for each type
        funcs = {}
        for srctype, etype, dsttype in G.canonical_etypes:
            # 計算每一類etype的 W_r * h
            Wh = self.weight[etype](feat_dict[srctype])
            # Save it in graph for message passing
            G.nodes[srctype].data['Wh_%s' % etype] = Wh
            # 消息函數 copy_u: 將源節點的特征聚合到'm'中; reduce函數: 將'm'求均值賦值給 'h'
            funcs[etype] = (fn.copy_u('Wh_%s' % etype, 'm'), fn.mean('m', 'h'))
        # Trigger message passing of multiple types.
        # The first argument is the message passing functions for each relation.
        # The second one is the type wise reducer, could be "sum", "max",
        # "min", "mean", "stack"
        G.multi_update_all(funcs, 'sum')
        # return the updated node feature dictionary
        return {ntype : G.nodes[ntype].data['h'] for ntype in G.ntypes}

class HeteroRGCN(nn.Module):
    def __init__(self, G, in_size, hidden_size, out_size):
        super(HeteroRGCN, self).__init__()
        # Use trainable node embeddings as featureless inputs.
        embed_dict = {ntype : nn.Parameter(torch.Tensor(G.number_of_nodes(ntype), in_size))
                      for ntype in G.ntypes}
        for key, embed in embed_dict.items():
            nn.init.xavier_uniform_(embed)
        self.embed = nn.ParameterDict(embed_dict)
        # create layers
        self.layer1 = HeteroRGCNLayer(in_size, hidden_size, G.etypes)
        self.layer2 = HeteroRGCNLayer(hidden_size, out_size, G.etypes)

    def forward(self, G):
        h_dict = self.layer1(G, self.embed)
        h_dict = {k : F.leaky_relu(h) for k, h in h_dict.items()}
        h_dict = self.layer2(G, h_dict)
        # get paper logits
        return h_dict['paper']

Train and evaluate

model = HeteroRGCN(G, 10, 10, 3)

opt = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

best_val_acc = 0
best_test_acc = 0

for epoch in range(100):
    logits = model(G)
    # The loss is computed only for labeled nodes.
    loss = F.cross_entropy(logits[train_idx], labels[train_idx])

    pred = logits.argmax(1)
    train_acc = (pred[train_idx] == labels[train_idx]).float().mean()
    val_acc = (pred[val_idx] == labels[val_idx]).float().mean()
    test_acc = (pred[test_idx] == labels[test_idx]).float().mean()

    if best_val_acc < val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc

    opt.zero_grad()
    loss.backward()
    opt.step()

    if epoch % 5 == 0:
        print('Loss %.4f, Train Acc %.4f, Val Acc %.4f (Best %.4f), Test Acc %.4f (Best %.4f)' % (
            loss.item(),
            train_acc.item(),
            val_acc.item(),
            best_val_acc.item(),
            test_acc.item(),
            best_test_acc.item(),
        ))

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 DGL學習(四): 圖分類教程異質性鄰接表構建圖 DGL說明文檔 php中構建樹狀圖順序功能圖的學習 PyTorch如何構建深度學習模型？學習Python包的構建與分發 ECharts學習（2）--餅狀圖之南丁格爾圖用HTML5構建一個流程圖繪制工具