""" --- title: Train a Graph Attention Network (GAT) on Cora dataset summary: > This trains is a Graph Attention Network (GAT) on Cora dataset --- # Train a Graph Attention Network (GAT) on Cora dataset """ from typing import Dict import numpy as np import torch from torch import nn from labml import lab, monit, tracker, experiment from labml.configs import BaseConfigs, option, calculate from labml.utils import download from labml_nn.helpers.device import DeviceConfigs from labml_nn.graphs.gat import GraphAttentionLayer from labml_nn.optimizers.configs import OptimizerConfigs class CoraDataset: """ ## [Cora Dataset](https://linqs.soe.ucsc.edu/data) Cora dataset is a dataset of research papers. For each paper we are given a binary feature vector that indicates the presence of words. Each paper is classified into one of 7 classes. The dataset also has the citation network. The papers are the nodes of the graph and the edges are the citations. The task is to classify the nodes to the 7 classes with feature vectors and citation network as input. """ # Labels for each node labels: torch.Tensor # Set of class names and an unique integer index classes: Dict[str, int] # Feature vectors for all nodes features: torch.Tensor # Adjacency matrix with the edge information. # `adj_mat[i][j]` is `True` if there is an edge from `i` to `j`. adj_mat: torch.Tensor @staticmethod def _download(): """ Download the dataset """ if not (lab.get_data_path() / 'cora').exists(): download.download_file('https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz', lab.get_data_path() / 'cora.tgz') download.extract_tar(lab.get_data_path() / 'cora.tgz', lab.get_data_path()) def __init__(self, include_edges: bool = True): """ Load the dataset """ # Whether to include edges. # This is test how much accuracy is lost if we ignore the citation network. self.include_edges = include_edges # Download dataset self._download() # Read the paper ids, feature vectors, and labels with monit.section('Read content file'): content = np.genfromtxt(str(lab.get_data_path() / 'cora/cora.content'), dtype=np.dtype(str)) # Load the citations, it's a list of pairs of integers. with monit.section('Read citations file'): citations = np.genfromtxt(str(lab.get_data_path() / 'cora/cora.cites'), dtype=np.int32) # Get the feature vectors features = torch.tensor(np.array(content[:, 1:-1], dtype=np.float32)) # Normalize the feature vectors self.features = features / features.sum(dim=1, keepdim=True) # Get the class names and assign an unique integer to each of them self.classes = {s: i for i, s in enumerate(set(content[:, -1]))} # Get the labels as those integers self.labels = torch.tensor([self.classes[i] for i in content[:, -1]], dtype=torch.long) # Get the paper ids paper_ids = np.array(content[:, 0], dtype=np.int32) # Map of paper id to index ids_to_idx = {id_: i for i, id_ in enumerate(paper_ids)} # Empty adjacency matrix - an identity matrix self.adj_mat = torch.eye(len(self.labels), dtype=torch.bool) # Mark the citations in the adjacency matrix if self.include_edges: for e in citations: # The pair of paper indexes e1, e2 = ids_to_idx[e[0]], ids_to_idx[e[1]] # We build a symmetrical graph, where if paper $i$ referenced # paper $j$ we place an adge from $i$ to $j$ as well as an edge # from $j$ to $i$. self.adj_mat[e1][e2] = True self.adj_mat[e2][e1] = True class GAT(nn.Module): """ ## Graph Attention Network (GAT) This graph attention network has two [graph attention layers](index.html). """ def __init__(self, in_features: int, n_hidden: int, n_classes: int, n_heads: int, dropout: float): """ * `in_features` is the number of features per node * `n_hidden` is the number of features in the first graph attention layer * `n_classes` is the number of classes * `n_heads` is the number of heads in the graph attention layers * `dropout` is the dropout probability """ super().__init__() # First graph attention layer where we concatenate the heads self.layer1 = GraphAttentionLayer(in_features, n_hidden, n_heads, is_concat=True, dropout=dropout) # Activation function after first graph attention layer self.activation = nn.ELU() # Final graph attention layer where we average the heads self.output = GraphAttentionLayer(n_hidden, n_classes, 1, is_concat=False, dropout=dropout) # Dropout self.dropout = nn.Dropout(dropout) def forward(self, x: torch.Tensor, adj_mat: torch.Tensor): """ * `x` is the features vectors of shape `[n_nodes, in_features]` * `adj_mat` is the adjacency matrix of the form `[n_nodes, n_nodes, n_heads]` or `[n_nodes, n_nodes, 1]` """ # Apply dropout to the input x = self.dropout(x) # First graph attention layer x = self.layer1(x, adj_mat) # Activation function x = self.activation(x) # Dropout x = self.dropout(x) # Output layer (without activation) for logits return self.output(x, adj_mat) def accuracy(output: torch.Tensor, labels: torch.Tensor): """ A simple function to calculate the accuracy """ return output.argmax(dim=-1).eq(labels).sum().item() / len(labels) class Configs(BaseConfigs): """ ## Configurations """ # Model model: GAT # Number of nodes to train on training_samples: int = 500 # Number of features per node in the input in_features: int # Number of features in the first graph attention layer n_hidden: int = 64 # Number of heads n_heads: int = 8 # Number of classes for classification n_classes: int # Dropout probability dropout: float = 0.6 # Whether to include the citation network include_edges: bool = True # Dataset dataset: CoraDataset # Number of training iterations epochs: int = 1_000 # Loss function loss_func = nn.CrossEntropyLoss() # Device to train on # # This creates configs for device, so that # we can change the device by passing a config value device: torch.device = DeviceConfigs() # Optimizer optimizer: torch.optim.Adam def run(self): """ ### Training loop We do full batch training since the dataset is small. If we were to sample and train we will have to sample a set of nodes for each training step along with the edges that span across those selected nodes. """ # Move the feature vectors to the device features = self.dataset.features.to(self.device) # Move the labels to the device labels = self.dataset.labels.to(self.device) # Move the adjacency matrix to the device edges_adj = self.dataset.adj_mat.to(self.device) # Add an empty third dimension for the heads edges_adj = edges_adj.unsqueeze(-1) # Random indexes idx_rand = torch.randperm(len(labels)) # Nodes for training idx_train = idx_rand[:self.training_samples] # Nodes for validation idx_valid = idx_rand[self.training_samples:] # Training loop for epoch in monit.loop(self.epochs): # Set the model to training mode self.model.train() # Make all the gradients zero self.optimizer.zero_grad() # Evaluate the model output = self.model(features, edges_adj) # Get the loss for training nodes loss = self.loss_func(output[idx_train], labels[idx_train]) # Calculate gradients loss.backward() # Take optimization step self.optimizer.step() # Log the loss tracker.add('loss.train', loss) # Log the accuracy tracker.add('accuracy.train', accuracy(output[idx_train], labels[idx_train])) # Set mode to evaluation mode for validation self.model.eval() # No need to compute gradients with torch.no_grad(): # Evaluate the model again output = self.model(features, edges_adj) # Calculate the loss for validation nodes loss = self.loss_func(output[idx_valid], labels[idx_valid]) # Log the loss tracker.add('loss.valid', loss) # Log the accuracy tracker.add('accuracy.valid', accuracy(output[idx_valid], labels[idx_valid])) # Save logs tracker.save() @option(Configs.dataset) def cora_dataset(c: Configs): """ Create Cora dataset """ return CoraDataset(c.include_edges) # Get the number of classes calculate(Configs.n_classes, lambda c: len(c.dataset.classes)) # Number of features in the input calculate(Configs.in_features, lambda c: c.dataset.features.shape[1]) @option(Configs.model) def gat_model(c: Configs): """ Create GAT model """ return GAT(c.in_features, c.n_hidden, c.n_classes, c.n_heads, c.dropout).to(c.device) @option(Configs.optimizer) def _optimizer(c: Configs): """ Create configurable optimizer """ opt_conf = OptimizerConfigs() opt_conf.parameters = c.model.parameters() return opt_conf def main(): # Create configurations conf = Configs() # Create an experiment experiment.create(name='gat') # Calculate configurations. experiment.configs(conf, { # Adam optimizer 'optimizer.optimizer': 'Adam', 'optimizer.learning_rate': 5e-3, 'optimizer.weight_decay': 5e-4, }) # Start and watch the experiment with experiment.start(): # Run the training conf.run() # if __name__ == '__main__': main()