feat(RGIN): Add relational generalisation of GIN

2019-10-16 11:00:48 +00:00 · 2019-10-16 11:00:48 +00:00 · 9764626607
--- a/gnns/init.py
+++ b/gnns/init.py
@ -3,4 +3,5 @@ from .gnn_edge_mlp import sparse_gnn_edge_mlp_layer
 from .gnn_film import sparse_gnn_film_layer
 from .rgat import sparse_rgat_layer
 from .rgcn import sparse_rgcn_layer
-from .rgdcn import sparse_rgdcn_layer
+from .rgdcn import sparse_rgdcn_layer
 from .rgin import sparse_rgin_layer
--- a/gnns/rgin.py
+++ b/gnns/rgin.py
@ -0,0 +1,136 @@
 from typing import List, Optional, Callable
 import tensorflow as tf
 from utils import get_activation
 class MLP(object):
    def __init__(self, out_size: int, num_hidden_layers: int = 1, activation_fun: Optional[Callable] = None):
        if activation_fun is None:
            self.__activation_fun = tf.nn.relu
        else:
            self.__activation_fun = activation_fun
        self.__layers = []  # type: List[tf.layers.Dense]
        for _ in range(num_hidden_layers + 1):  # all hidden layers + one linear output:
            self.__layers.append(tf.layers.Dense(units=out_size,
                                                 use_bias=False,
                                                 activation=None))
    def __call__(self, input: tf.Tensor) -> tf.Tensor:
        activations = input
        for linear_layer in self.__layers[:-1]:
            activations = linear_layer(activations)
            activations = self.__activation_fun(activations)
        return self.__layers[-1](activations)
 def sparse_rgin_layer(
        node_embeddings: tf.Tensor,
        adjacency_lists: List[tf.Tensor],
        state_dim: Optional[int],
        num_timesteps: int = 1,
        activation_function: Optional[str] = "ReLU",
        num_MLP_hidden_layers: int = 1,
        learn_epsilon: bool = True,
        ) -> tf.Tensor:
    """
    Compute new graph states by neural message passing using MLPs for state updates
    and message computation.
    For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
    matrices A_\ell.
    We compute new states as follows:
        h^{t+1}_v := MLP_{out}((1 + \epsilon) * MLP_{self}(h^t_v)
                               + \sum_\ell \sum_{(u, v) \in A_\ell} MLP_\ell(h^t_u))
    The learnable parameters of this are the MLPs and (if enabled) epsilon.
    This is derived from Cor. 6 of arXiv:1810.00826, instantiating the functions f, \phi
    with _separate_ MLPs. This is more powerful than the GIN formulation in Eq. (4.1) of
    arXiv:1810.00826, as we want to be able to distinguish graphs of the form
     G_1 = (V={1, 2, 3}, E_1={(1, 2)}, E_2={(3, 2)})
    and
     G_2 = (V={1, 2, 3}, E_1={(3, 2)}, E_2={(1, 2)})
    from each other. If we would treat all edges the same,
    G_1.E_1 \cup G_1.E_2 == G_2.E_1 \cup G_2.E_2 would imply that the two graphs
    become indistuingishable.
    Hence, we introduce per-edge-type MLPs, which also means that we have to drop
    the optimisation of modelling f \circ \phi by a single MLP used in the original
    GIN formulation.
    We use the following abbreviations in shape descriptions:
    * V: number of nodes
    * D: state dimension
    * L: number of different edge types
    * E: number of edges of a given edge type
    Arguments:
        node_embeddings: float32 tensor of shape [V, D], the original representation of
            each node in the graph.
        adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
            [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
            of type l connects node v to node u.
        state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
            to D, the dimensionality of the input. If different from the input dimension,
            parameter num_timesteps has to be 1.
        num_timesteps: Number of repeated applications of this message passing layer.
        activation_function: Type of activation function used.
        num_MLP_hidden_layers: Number of hidden layers of the MLPs.
        learn_epsilon: Flag indicating if the value of epsilon should be learned. If
            False, epsilon defaults to 0.
    Returns:
        float32 tensor of shape [V, state_dim]
    """
    num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
    if state_dim is None:
        state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
    # === Prepare things we need across all timesteps:
    activation_fn = get_activation(activation_function)
    aggregation_MLP = MLP(out_size=state_dim,
                          num_hidden_layers=num_MLP_hidden_layers,
                          activation_fun=activation_fn)
    edge_type_to_edge_mlp = []  # MLPs to compute the edge messages
    edge_type_to_message_targets = []  # List of tensors of message targets
    for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
        with tf.variable_scope("Edge_%i_MLP" % edge_type_idx):
            edge_type_to_edge_mlp.append(
                MLP(out_size=state_dim,
                    num_hidden_layers=num_MLP_hidden_layers,
                    activation_fun=activation_fn))
        edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
    # Initialize epsilon: Note that we merge the 1+epsilon here:
    if learn_epsilon:
        epsilon = tf.get_variable("epsilon", shape=(), dtype=tf.float32, initializer=tf.ones_initializer, trainable=True)
    else:
        epsilon = 1
    self_loop_MLP = MLP(out_size=state_dim,
                        num_hidden_layers=num_MLP_hidden_layers,
                        activation_fun=activation_fn)
    # Let M be the number of messages (sum of all E):
    message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
    cur_node_states = node_embeddings
    for _ in range(num_timesteps):
        messages_per_type = []  # list of tensors of messages of shape [E, D]
        # Collect incoming messages per edge type
        for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
            edge_sources = adjacency_list_for_edge_type[:, 0]
            edge_source_states = \
                tf.nn.embedding_lookup(params=cur_node_states,
                                       ids=edge_sources)  # Shape [E, D]
            messages = edge_type_to_edge_mlp[edge_type_idx](edge_source_states)  # Shape [E, D]
            messages_per_type.append(messages)
        all_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, D]
        aggregated_messages = \
            tf.unsorted_segment_sum(data=all_messages,
                                    segment_ids=message_targets,
                                    num_segments=num_nodes)  # Shape [V, D]
        cur_node_states = aggregation_MLP(epsilon * self_loop_MLP(cur_node_states) + aggregated_messages)
        cur_node_states = activation_fn(cur_node_states)  # Note that the final MLP layer has no activation, so we do that here explicitly
    return cur_node_states
--- a/models/init.py
+++ b/models/init.py
@ -5,3 +5,4 @@ from .gnn_film_model import GNN_FiLM_Model
 from .rgat_model import RGAT_Model
 from .rgcn_model import RGCN_Model
 from .rgdcn_model import RGDCN_Model
 from .rgin_model import RGIN_Model
--- a/models/rgin_model.py
+++ b/models/rgin_model.py
@ -0,0 +1,45 @@
 from typing import Dict, Any, List
 import tensorflow as tf
 from .sparse_graph_model import Sparse_Graph_Model
 from tasks import Sparse_Graph_Task
 from gnns import sparse_rgin_layer
 class RGIN_Model(Sparse_Graph_Model):
    @classmethod
    def default_params(cls):
        params = super().default_params()
        params.update({
            'hidden_size': 128,
            "graph_activation_function": "ReLU",
            'graph_layer_input_dropout_keep_prob': 1.0,
            'graph_dense_between_every_num_gnn_layers': 10000,
            'graph_residual_connection_every_num_layers': 10000,
            'graph_num_MLP_hidden_layers': 1,
            'graph_learn_epsilon': False,
        })
        return params
    @staticmethod
    def name(params: Dict[str, Any]) -> str:
        return "RGIN"
    def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
        super().__init__(params, task, run_id, result_dir)
    def _apply_gnn_layer(self,
                         node_representations: tf.Tensor,
                         adjacency_lists: List[tf.Tensor],
                         type_to_num_incoming_edges: tf.Tensor,
                         num_timesteps: int) -> tf.Tensor:
        return sparse_rgin_layer(
            node_embeddings=node_representations,
            adjacency_lists=adjacency_lists,
            state_dim=self.params['hidden_size'],
            num_timesteps=num_timesteps,
            activation_function=self.params['graph_activation_function'],
            num_MLP_hidden_layers=self.params['graph_num_MLP_hidden_layers'],
            learn_epsilon=self.params['graph_learn_epsilon'],
        )
--- a/utils/model_utils.py
+++ b/utils/model_utils.py
@ -5,7 +5,7 @@ from typing import Tuple, Type, Dict, Any
 import pickle
 from models import (Sparse_Graph_Model, GGNN_Model, GNN_FiLM_Model, GNN_Edge_MLP_Model,
-                    RGAT_Model, RGCN_Model, RGDCN_Model)
+                    RGAT_Model, RGCN_Model, RGDCN_Model, RGIN_Model)
 from tasks import Sparse_Graph_Task, QM9_Task, Citation_Network_Task, PPI_Task, VarMisuse_Task
@ -49,6 +49,8 @@ def name_to_model_class(name: str) -> Tuple[Type[Sparse_Graph_Model], Dict[str,
        return RGCN_Model, {}
    if name in ["rgdcn", "rgdcn_model"]:
        return RGDCN_Model, {}
    if name in ["rgin", "rgin_model"]:
        return RGIN_Model, {}
    raise ValueError("Unknown model type '%s'" % name)