feat(RGIN): Add relational generalisation of GIN

2019-10-16 11:00:48 +00:00 · 2019-10-16 11:00:48 +00:00 · 9764626607
--- a/gnns/init.py
+++ b/gnns/init.py
@ -3,4 +3,5 @@ from .gnn_edge_mlp import sparse_gnn_edge_mlp_layer
 from .gnn_film import sparse_gnn_film_layer
 from .rgat import sparse_rgat_layer
 from .rgcn import sparse_rgcn_layer
-from .rgdcn import sparse_rgdcn_layer
+from .rgdcn import sparse_rgdcn_layer
+from .rgin import sparse_rgin_layer
--- a/gnns/rgin.py
+++ b/gnns/rgin.py
@ -0,0 +1,136 @@
+from typing import List, Optional, Callable
+import tensorflow as tf
+
+
+from utils import get_activation
+
+
+class MLP(object):
+    def __init__(self, out_size: int, num_hidden_layers: int = 1, activation_fun: Optional[Callable] = None):
+        if activation_fun is None:
+            self.__activation_fun = tf.nn.relu
+        else:
+            self.__activation_fun = activation_fun
+
+        self.__layers = []  # type: List[tf.layers.Dense]
+        for _ in range(num_hidden_layers + 1):  # all hidden layers + one linear output:
+            self.__layers.append(tf.layers.Dense(units=out_size,
+                                                 use_bias=False,
+                                                 activation=None))
+
+    def __call__(self, input: tf.Tensor) -> tf.Tensor:
+        activations = input
+        for linear_layer in self.__layers[:-1]:
+            activations = linear_layer(activations)
+            activations = self.__activation_fun(activations)
+        return self.__layers[-1](activations)
+
+
+def sparse_rgin_layer(
+        node_embeddings: tf.Tensor,
+        adjacency_lists: List[tf.Tensor],
+        state_dim: Optional[int],
+        num_timesteps: int = 1,
+        activation_function: Optional[str] = "ReLU",
+        num_MLP_hidden_layers: int = 1,
+        learn_epsilon: bool = True,
+        ) -> tf.Tensor:
+    """
+    Compute new graph states by neural message passing using MLPs for state updates
+    and message computation.
+    For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
+    matrices A_\ell.
+
+    We compute new states as follows:
+        h^{t+1}_v := MLP_{out}((1 + \epsilon) * MLP_{self}(h^t_v)
+                               + \sum_\ell \sum_{(u, v) \in A_\ell} MLP_\ell(h^t_u))
+    The learnable parameters of this are the MLPs and (if enabled) epsilon.
+    This is derived from Cor. 6 of arXiv:1810.00826, instantiating the functions f, \phi
+    with _separate_ MLPs. This is more powerful than the GIN formulation in Eq. (4.1) of
+    arXiv:1810.00826, as we want to be able to distinguish graphs of the form
+     G_1 = (V={1, 2, 3}, E_1={(1, 2)}, E_2={(3, 2)})
+    and
+     G_2 = (V={1, 2, 3}, E_1={(3, 2)}, E_2={(1, 2)})
+    from each other. If we would treat all edges the same,
+    G_1.E_1 \cup G_1.E_2 == G_2.E_1 \cup G_2.E_2 would imply that the two graphs
+    become indistuingishable.
+    Hence, we introduce per-edge-type MLPs, which also means that we have to drop
+    the optimisation of modelling f \circ \phi by a single MLP used in the original
+    GIN formulation.
+
+    We use the following abbreviations in shape descriptions:
+    * V: number of nodes
+    * D: state dimension
+    * L: number of different edge types
+    * E: number of edges of a given edge type
+
+    Arguments:
+        node_embeddings: float32 tensor of shape [V, D], the original representation of
+            each node in the graph.
+        adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
+            [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
+            of type l connects node v to node u.
+        state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
+            to D, the dimensionality of the input. If different from the input dimension,
+            parameter num_timesteps has to be 1.
+        num_timesteps: Number of repeated applications of this message passing layer.
+        activation_function: Type of activation function used.
+        num_MLP_hidden_layers: Number of hidden layers of the MLPs.
+        learn_epsilon: Flag indicating if the value of epsilon should be learned. If
+            False, epsilon defaults to 0.
+
+    Returns:
+        float32 tensor of shape [V, state_dim]
+    """
+    num_nodes = tf.shape(node_embeddings, out_type=tf.int32)[0]
+    if state_dim is None:
+        state_dim = tf.shape(node_embeddings, out_type=tf.int32)[1]
+
+    # === Prepare things we need across all timesteps:
+    activation_fn = get_activation(activation_function)
+    aggregation_MLP = MLP(out_size=state_dim,
+                          num_hidden_layers=num_MLP_hidden_layers,
+                          activation_fun=activation_fn)
+    edge_type_to_edge_mlp = []  # MLPs to compute the edge messages
+    edge_type_to_message_targets = []  # List of tensors of message targets
+    for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
+        with tf.variable_scope("Edge_%i_MLP" % edge_type_idx):
+            edge_type_to_edge_mlp.append(
+                MLP(out_size=state_dim,
+                    num_hidden_layers=num_MLP_hidden_layers,
+                    activation_fun=activation_fn))
+        edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])
+    # Initialize epsilon: Note that we merge the 1+epsilon here:
+    if learn_epsilon:
+        epsilon = tf.get_variable("epsilon", shape=(), dtype=tf.float32, initializer=tf.ones_initializer, trainable=True)
+    else:
+        epsilon = 1
+    self_loop_MLP = MLP(out_size=state_dim,
+                        num_hidden_layers=num_MLP_hidden_layers,
+                        activation_fun=activation_fn)
+
+    # Let M be the number of messages (sum of all E):
+    message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]
+
+    cur_node_states = node_embeddings
+    for _ in range(num_timesteps):
+        messages_per_type = []  # list of tensors of messages of shape [E, D]
+        # Collect incoming messages per edge type
+        for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
+            edge_sources = adjacency_list_for_edge_type[:, 0]
+            edge_source_states = \
+                tf.nn.embedding_lookup(params=cur_node_states,
+                                       ids=edge_sources)  # Shape [E, D]
+            messages = edge_type_to_edge_mlp[edge_type_idx](edge_source_states)  # Shape [E, D]
+            messages_per_type.append(messages)
+
+        all_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, D]
+        aggregated_messages = \
+            tf.unsorted_segment_sum(data=all_messages,
+                                    segment_ids=message_targets,
+                                    num_segments=num_nodes)  # Shape [V, D]
+
+        cur_node_states = aggregation_MLP(epsilon * self_loop_MLP(cur_node_states) + aggregated_messages)
+        cur_node_states = activation_fn(cur_node_states)  # Note that the final MLP layer has no activation, so we do that here explicitly
+
+    return cur_node_states
--- a/models/init.py
+++ b/models/init.py
@ -5,3 +5,4 @@ from .gnn_film_model import GNN_FiLM_Model
 from .rgat_model import RGAT_Model
 from .rgcn_model import RGCN_Model
 from .rgdcn_model import RGDCN_Model
+from .rgin_model import RGIN_Model
--- a/models/rgin_model.py
+++ b/models/rgin_model.py
@ -0,0 +1,45 @@
+from typing import Dict, Any, List
+
+import tensorflow as tf
+
+from .sparse_graph_model import Sparse_Graph_Model
+from tasks import Sparse_Graph_Task
+from gnns import sparse_rgin_layer
+
+
+class RGIN_Model(Sparse_Graph_Model):
+    @classmethod
+    def default_params(cls):
+        params = super().default_params()
+        params.update({
+            'hidden_size': 128,
+            "graph_activation_function": "ReLU",
+            'graph_layer_input_dropout_keep_prob': 1.0,
+            'graph_dense_between_every_num_gnn_layers': 10000,
+            'graph_residual_connection_every_num_layers': 10000,
+            'graph_num_MLP_hidden_layers': 1,
+            'graph_learn_epsilon': False,
+        })
+        return params
+
+    @staticmethod
+    def name(params: Dict[str, Any]) -> str:
+        return "RGIN"
+
+    def __init__(self, params: Dict[str, Any], task: Sparse_Graph_Task, run_id: str, result_dir: str) -> None:
+        super().__init__(params, task, run_id, result_dir)
+
+    def _apply_gnn_layer(self,
+                         node_representations: tf.Tensor,
+                         adjacency_lists: List[tf.Tensor],
+                         type_to_num_incoming_edges: tf.Tensor,
+                         num_timesteps: int) -> tf.Tensor:
+        return sparse_rgin_layer(
+            node_embeddings=node_representations,
+            adjacency_lists=adjacency_lists,
+            state_dim=self.params['hidden_size'],
+            num_timesteps=num_timesteps,
+            activation_function=self.params['graph_activation_function'],
+            num_MLP_hidden_layers=self.params['graph_num_MLP_hidden_layers'],
+            learn_epsilon=self.params['graph_learn_epsilon'],
+        )
--- a/utils/model_utils.py
+++ b/utils/model_utils.py
@ -5,7 +5,7 @@ from typing import Tuple, Type, Dict, Any
 import pickle

 from models import (Sparse_Graph_Model, GGNN_Model, GNN_FiLM_Model, GNN_Edge_MLP_Model,
-                    RGAT_Model, RGCN_Model, RGDCN_Model)
+                    RGAT_Model, RGCN_Model, RGDCN_Model, RGIN_Model)
 from tasks import Sparse_Graph_Task, QM9_Task, Citation_Network_Task, PPI_Task, VarMisuse_Task


@ -49,6 +49,8 @@ def name_to_model_class(name: str) -> Tuple[Type[Sparse_Graph_Model], Dict[str,
        return RGCN_Model, {}
    if name in ["rgdcn", "rgdcn_model"]:
        return RGDCN_Model, {}
+    if name in ["rgin", "rgin_model"]:
+        return RGIN_Model, {}

    raise ValueError("Unknown model type '%s'" % name)