DGL学习: HAN官方教程代码实现
Posted liyinggang
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了DGL学习: HAN官方教程代码实现相关的知识,希望对你有一定的参考价值。
main函数 ,加载数据以及训练。
import torch from sklearn.metrics import f1_score from utils import load_data, EarlyStopping def score(logits, labels): # micro_f1 和 macro_f1 _, indices = torch.max(logits, dim=1) prediction = indices.long().cpu().numpy() labels = labels.cpu().numpy() accuracy = (prediction == labels).sum() / len(prediction) micro_f1 = f1_score(labels, prediction, average=‘micro‘) macro_f1 = f1_score(labels, prediction, average=‘macro‘) return accuracy, micro_f1, macro_f1 # 评估 def evaluate(model, g, features, labels, mask, loss_func): model.eval() with torch.no_grad(): logits = model(g, features) loss = loss_func(logits[mask], labels[mask]) accuracy, micro_f1, macro_f1 = score(logits[mask], labels[mask]) return loss, accuracy, micro_f1, macro_f1 def main(args): # If args[‘hetero‘] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, val_mask, test_mask = load_data(args[‘dataset‘]) if hasattr(torch, ‘BoolTensor‘): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() features = features.to(args[‘device‘]) labels = labels.to(args[‘device‘]) train_mask = train_mask.to(args[‘device‘]) val_mask = val_mask.to(args[‘device‘]) test_mask = test_mask.to(args[‘device‘]) if args[‘hetero‘]: from model_hetero import HAN model = HAN(meta_paths=[[‘pa‘, ‘ap‘], [‘pf‘, ‘fp‘]], in_size=features.shape[1], hidden_size=args[‘hidden_units‘], out_size=num_classes, num_heads=args[‘num_heads‘], dropout=args[‘dropout‘]).to(args[‘device‘]) g = g.to(args[‘device‘]) else: from model import HAN model = HAN(num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args[‘hidden_units‘], out_size=num_classes, num_heads=args[‘num_heads‘], dropout=args[‘dropout‘]).to(args[‘device‘]) g = [graph.to(args[‘device‘]) for graph in g] #异质图 stopper = EarlyStopping(patience=args[‘patience‘]) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args[‘lr‘], weight_decay=args[‘weight_decay‘]) for epoch in range(args[‘num_epochs‘]): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score(logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate(model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print(‘Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ‘ ‘Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}‘.format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate(model, g, features, labels, test_mask, loss_fcn) print(‘Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}‘.format( test_loss.item(), test_micro_f1, test_macro_f1)) if __name__ == ‘__main__‘: import argparse from utils import setup parser = argparse.ArgumentParser(‘HAN‘) parser.add_argument(‘-s‘, ‘--seed‘, type=int, default=1, help=‘Random seed‘) parser.add_argument(‘-ld‘, ‘--log-dir‘, type=str, default=‘results‘, help=‘Dir for saving training results‘) parser.add_argument(‘--hetero‘, type=bool ,default=True, help=‘Use metapath coalescing with DGL‘s own dataset‘) args = parser.parse_args().__dict__ args = setup(args) main(args)
utils 具体处理数据加载 和 早停策略。
import datetime import dgl import errno import numpy as np import os import pickle import random import torch from dgl.data.utils import download, get_download_dir, _get_dgl_url from pprint import pprint from scipy import sparse from scipy import io as sio def set_random_seed(seed=0): """Set random seed. Parameters ---------- seed : int Random seed to use """ random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) def mkdir_p(path, log=True): """Create a directory for the specified path. Parameters ---------- path : str Path name log : bool Whether to print result for directory creation """ try: os.makedirs(path) if log: print(‘Created directory {}‘.format(path)) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path) and log: print(‘Directory {} already exists.‘.format(path)) else: raise def get_date_postfix(): """Get a date based postfix for directory name. Returns ------- post_fix : str """ dt = datetime.datetime.now() post_fix = ‘{}_{:02d}-{:02d}-{:02d}‘.format( dt.date(), dt.hour, dt.minute, dt.second) return post_fix def setup_log_dir(args, sampling=False): """Name and create directory for logging. Parameters ---------- args : dict Configuration Returns ------- log_dir : str Path for logging directory sampling : bool Whether we are using sampling based training """ date_postfix = get_date_postfix() log_dir = os.path.join( args[‘log_dir‘], ‘{}_{}‘.format(args[‘dataset‘], date_postfix)) if sampling: log_dir = log_dir + ‘_sampling‘ mkdir_p(log_dir) return log_dir # The configuration below is from the paper. default_configure = { ‘lr‘: 0.005, # Learning rate ‘num_heads‘: [8], # Number of attention heads for node-level attention ‘hidden_units‘: 8, ‘dropout‘: 0.6, ‘weight_decay‘: 0.001, ‘num_epochs‘: 200, ‘patience‘: 100 } sampling_configure = { ‘batch_size‘: 20 } def setup(args): args.update(default_configure) set_random_seed(args[‘seed‘]) args[‘dataset‘] = ‘ACMRaw‘ if args[‘hetero‘] else ‘ACM‘ args[‘device‘] = ‘cuda: 0‘ if torch.cuda.is_available() else ‘cpu‘ args[‘log_dir‘] = setup_log_dir(args) return args def setup_for_sampling(args): args.update(default_configure) args.update(sampling_configure) set_random_seed() args[‘device‘] = ‘cuda: 0‘ if torch.cuda.is_available() else ‘cpu‘ args[‘log_dir‘] = setup_log_dir(args, sampling=True) return args def get_binary_mask(total_size, indices): mask = torch.zeros(total_size) mask[indices] = 1 return mask.byte() def load_acm(remove_self_loop): url = ‘dataset/ACM3025.pkl‘ data_path = get_download_dir() + ‘/ACM3025.pkl‘ download(_get_dgl_url(url), path=data_path) with open(data_path, ‘rb‘) as f: data = pickle.load(f) labels, features = torch.from_numpy(data[‘label‘].todense()).long(), torch.from_numpy(data[‘feature‘].todense()).float() num_classes = labels.shape[1] labels = labels.nonzero()[:, 1] if remove_self_loop: num_nodes = data[‘label‘].shape[0] data[‘PAP‘] = sparse.csr_matrix(data[‘PAP‘] - np.eye(num_nodes)) data[‘PLP‘] = sparse.csr_matrix(data[‘PLP‘] - np.eye(num_nodes)) # Adjacency matrices for meta path based neighbors # (Mufei): I verified both of them are binary adjacency matrices with self loops author_g = dgl.graph(data[‘PAP‘], ntype=‘paper‘, etype=‘author‘) subject_g = dgl.graph(data[‘PLP‘], ntype=‘paper‘, etype=‘subject‘) gs = [author_g, subject_g] train_idx = torch.from_numpy(data[‘train_idx‘]).long().squeeze(0) val_idx = torch.from_numpy(data[‘val_idx‘]).long().squeeze(0) test_idx = torch.from_numpy(data[‘test_idx‘]).long().squeeze(0) num_nodes = author_g.number_of_nodes() train_mask = get_binary_mask(num_nodes, train_idx) val_mask = get_binary_mask(num_nodes, val_idx) test_mask = get_binary_mask(num_nodes, test_idx) print(‘dataset loaded‘) pprint({ ‘dataset‘: ‘ACM‘, ‘train‘: train_mask.sum().item() / num_nodes, ‘val‘: val_mask.sum().item() / num_nodes, ‘test‘: test_mask.sum().item() / num_nodes }) return gs, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, val_mask, test_mask def load_acm_raw(remove_self_loop): assert not remove_self_loop url = ‘dataset/ACM.mat‘ data_path = get_download_dir() + ‘/ACM.mat‘ download(_get_dgl_url(url), path=data_path) data = sio.loadmat(data_path) p_vs_l = data[‘PvsL‘] # paper-field? p_vs_a = data[‘PvsA‘] # paper-author p_vs_t = data[‘PvsT‘] # paper-term, bag of words p_vs_c = data[‘PvsC‘] # paper-conference, labels come from that # We assign # (1) KDD papers as class 0 (data mining), # (2) SIGMOD and VLDB papers as class 1 (database), # (3) SIGCOMM and MOBICOMM papers as class 2 (communication) conf_ids = [0, 1, 9, 10, 13] label_ids = [0, 1, 2, 2, 1] p_vs_c_filter = p_vs_c[:, conf_ids] ## 过滤出上述五个会议的数据 ‘‘‘ 首先对跨列(axis=1)进行求和,每一篇paper会对应一个数num, 如果num!=0,那么这篇paper就在五大会议之一中发表过,否则它就没发表过。 .A1是将上述 papernum*1的二维矩阵转为 1D矩阵。 .nonzero 是当使用布尔数组直接作为下标对象或者元组下标对象中有布尔数组时,都相当于用nonzero()将布尔数组转换成一组整数数组,然后使用整数数组进行下标运算。 [0] 是取出一个list 这一步等于是筛选出所有在上述5个会议发表过的论文。 ‘‘‘ p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0] p_vs_l = p_vs_l[p_selected] p_vs_a = p_vs_a[p_selected] p_vs_t = p_vs_t[p_selected] p_vs_c = p_vs_c[p_selected] # 构造多个二分图 pa = dgl.bipartite(p_vs_a, ‘paper‘, ‘pa‘, ‘author‘) ap = dgl.bipartite(p_vs_a.transpose(), ‘author‘, ‘ap‘, ‘paper‘) pl = dgl.bipartite(p_vs_l, ‘paper‘, ‘pf‘, ‘field‘) lp = dgl.bipartite(p_vs_l.transpose(), ‘field‘, ‘fp‘, ‘paper‘) # 构造异质图 hg = dgl.hetero_from_relations([pa, ap, pl, lp]) features = torch.FloatTensor(p_vs_t.toarray()) pc_p, pc_c = p_vs_c.nonzero() # 返回包含矩阵非零元素索引的数组(row,col)元组。 row指的是 paper , col是会议 labels = np.zeros(len(p_selected), dtype=np.int64) ## label数量为paper数量 for conf_id, label_id in zip(conf_ids, label_ids): labels[pc_p[pc_c == conf_id]] = label_id # 为每一个会议打上标记 labels = torch.LongTensor(labels) # 转为tensor num_classes = 3 float_mask = np.zeros(len(pc_p)) for conf_id in conf_ids: pc_c_mask = (pc_c == conf_id) float_mask[pc_c_mask] = np.random.permutation(np.linspace(0, 1, pc_c_mask.sum())) train_idx = np.where(float_mask <= 0.2)[0] val_idx = np.where((float_mask > 0.2) & (float_mask <= 0.3))[0] test_idx = np.where(float_mask > 0.3)[0] num_nodes = hg.number_of_nodes(‘paper‘) # 图中节点数 print(num_nodes) train_mask = get_binary_mask(num_nodes, train_idx) val_mask = get_binary_mask(num_nodes, val_idx) test_mask = get_binary_mask(num_nodes, test_idx) return hg, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, val_mask, test_mask def load_data(dataset, remove_self_loop=False): if dataset == ‘ACM‘: return load_acm(remove_self_loop) elif dataset == ‘ACMRaw‘: return load_acm_raw(remove_self_loop) else: return NotImplementedError(‘Unsupported dataset {}‘.format(dataset)) class EarlyStopping(object): def __init__(self, patience=10): dt = datetime.datetime.now() self.filename = ‘early_stop_{}_{:02d}-{:02d}-{:02d}.pth‘.format( dt.date(), dt.hour, dt.minute, dt.second) self.patience = patience self.counter = 0 self.best_acc = None self.best_loss = None self.early_stop = False def step(self, loss, acc, model): if self.best_loss is None: self.best_acc = acc self.best_loss = loss self.save_checkpoint(model) elif (loss > self.best_loss) and (acc < self.best_acc): self.counter += 1 print(f‘EarlyStopping counter: {self.counter} out of {self.patience}‘) if self.counter >= self.patience: self.early_stop = True else: if (loss <= self.best_loss) and (acc >= self.best_acc): self.save_checkpoint(model) self.best_loss = np.min((loss, self.best_loss)) self.best_acc = np.max((acc, self.best_acc)) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Saves model when validation loss decreases.""" torch.save(model.state_dict(), self.filename) def load_checkpoint(self, model): """Load the latest checkpoint.""" model.load_state_dict(torch.load(self.filename))
模型(加载处理后的ACM数据集)
import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv class SemanticAttention(nn.Module): def __init__(self, in_size, hidden_size=128): super(SemanticAttention, self).__init__() self.project = nn.Sequential( nn.Linear(in_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1, bias=False) ) def forward(self, z): w = self.project(z).mean(0) # (M, 1) beta = torch.softmax(w, dim=0) # (M, 1) beta = beta.expand((z.shape[0],) + beta.shape) # (N, M, 1) return (beta * z).sum(1) # (N, D * K) class HANLayer(nn.Module): """ HAN layer. Arguments --------- num_meta_paths : number of homogeneous graphs generated from the metapaths. in_size : input feature dimension out_size : output feature dimension layer_num_heads : number of attention heads dropout : Dropout probability Inputs ------ g : list[DGLGraph] List of graphs h : tensor Input features Outputs ------- tensor The output feature """ def __init__(self, num_meta_paths, in_size, out_size, layer_num_heads, dropout): super(HANLayer, self).__init__() # One GAT layer for each meta path based adjacency matrix self.gat_layers = nn.ModuleList() for i in range(num_meta_paths): self.gat_layers.append(GATConv(in_size, out_size, layer_num_heads, dropout, dropout, activation=F.elu)) self.semantic_attention = SemanticAttention(in_size=out_size * layer_num_heads) self.num_meta_paths = num_meta_paths def forward(self, gs, h): semantic_embeddings = [] for i, g in enumerate(gs): semantic_embeddings.append(self.gat_layers[i](g, h).flatten(1)) semantic_embeddings = torch.stack(semantic_embeddings, dim=1) # (N, M, D * K) return self.semantic_attention(semantic_embeddings) # (N, D * K) class HAN(nn.Module): def __init__(self, num_meta_paths, in_size, hidden_size, out_size, num_heads, dropout): super(HAN, self).__init__() self.layers = nn.ModuleList() self.layers.append(HANLayer(num_meta_paths, in_size, hidden_size, num_heads[0], dropout)) for l in range(1, len(num_heads)): self.layers.append(HANLayer(num_meta_paths, hidden_size * num_heads[l-1], hidden_size, num_heads[l], dropout)) self.predict = nn.Linear(hidden_size * num_heads[-1], out_size) def forward(self, g, h): for gnn in self.layers: h = gnn(g, h) return self.predict(h)
模型 (加载处理前的ACM数据集)‘
"""This model shows an example of using dgl.metapath_reachable_graph on the original heterogeneous graph. Because the original HAN implementation only gives the preprocessed homogeneous graph, this model could not reproduce the result in HAN as they did not provide the preprocessing code, and we constructed another dataset from ACM with a different set of papers, connections, features and labels. """ import torch import torch.nn as nn import torch.nn.functional as F import dgl from dgl.nn.pytorch import GATConv class SemanticAttention(nn.Module): def __init__(self, in_size, hidden_size=128): super(SemanticAttention, self).__init__() self.project = nn.Sequential( nn.Linear(in_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1, bias=False) ) def forward(self, z): w = self.project(z).mean(0) # (M, 1) 输入维度为(N, M, D * K) 经过project层变为 (N, M, 1) , 对节点求mean之后变为 (M,1) beta = torch.softmax(w, dim=0) # (M, 1) 使用softmax归一化 beta = beta.expand((z.shape[0],) + beta.shape) # (N, M, 1) 扩展 N 份 return (beta * z).sum(1) # (N, D * K) (N, M, 1)* (N, M, D*K) = (N , M , D*K) sum(1) -> (N, D*K) class HANLayer(nn.Module): """ HAN layer. Arguments --------- meta_paths : list of metapaths, each as a list of edge types in_size : input feature dimension out_size : output feature dimension layer_num_heads : number of attention heads dropout : Dropout probability Inputs ------ g : DGLHeteroGraph The heterogeneous graph h : tensor Input features Outputs ------- tensor The output feature """ def __init__(self, meta_paths, in_size, out_size, layer_num_heads, dropout): super(HANLayer, self).__init__() # One GAT layer for each meta path based adjacency matrix self.gat_layers = nn.ModuleList() for i in range(len(meta_paths)): self.gat_layers.append(GATConv(in_size, out_size, layer_num_heads, dropout, dropout, activation=F.elu)) self.semantic_attention = SemanticAttention(in_size=out_size * layer_num_heads) self.meta_paths = list(tuple(meta_path) for meta_path in meta_paths) self._cached_graph = None self._cached_coalesced_graph = {} def forward(self, g, h): semantic_embeddings = [] if self._cached_graph is None or self._cached_graph is not g: self._cached_graph = g self._cached_coalesced_graph.clear() for meta_path in self.meta_paths: self._cached_coalesced_graph[meta_path] = dgl.metapath_reachable_graph(g, meta_path) for i, meta_path in enumerate(self.meta_paths): new_g = self._cached_coalesced_graph[meta_path] # 通过metapath得到的同质图 semantic_embeddings.append(self.gat_layers[i](new_g, h).flatten(1)) # (N, D*K) N是节点数, D是输出的维度out_size, K是注意力头数 semantic_embeddings = torch.stack(semantic_embeddings, dim=1) # (N, M, D * K) 堆叠M个元路径的结果 return self.semantic_attention(semantic_embeddings) # (N, D * K) class HAN(nn.Module): def __init__(self, meta_paths, in_size, hidden_size, out_size, num_heads, dropout): super(HAN, self).__init__() self.layers = nn.ModuleList() self.layers.append(HANLayer(meta_paths, in_size, hidden_size, num_heads[0], dropout)) for l in range(1, len(num_heads)): self.layers.append(HANLayer(meta_paths, hidden_size * num_heads[l-1], hidden_size, num_heads[l], dropout)) self.predict = nn.Linear(hidden_size * num_heads[-1], out_size) def forward(self, g, h): for gnn in self.layers: h = gnn(g, h) # return self.predict(h)
以上是关于DGL学习: HAN官方教程代码实现的主要内容,如果未能解决你的问题,请参考以下文章
基于注意力机制的图神经网络且考虑关系的R-GAT的一些理解以及DGL代码实现
基于注意力机制的图神经网络GAT的一些理解以及DGL官方代码的一些理解