"""Invariant Graph Embedding model class."""
from typing import List
import numpy as np
import networkx as nx
import scipy.sparse as sps
from karateclub.estimator import Estimator
[docs]class IGE(Estimator):
r"""An implementation of `"Invariant Graph Embedding" <https://graphreason.github.io/papers/16.pdf>`_
from the ICML 2019 Workshop on Learning and Reasoning with Graph-Structured
Data paper "Invariant Embedding for Graph Classification". The procedure
computes a mixture of spectral and node embedding based features. Specifically,
it uses scattering, eigenvalues and pooled node feature embeddings to create
graph descriptors.
Args:
feature_embedding_dimensions (list): Feature embedding dimensions. Default is [3, 5]
spectral_embedding_dimensions (list): Spectral embedding dimensions. Default is [10, 20].
histogram_bins (list): Number of histogram bins. Default is [10, 20].
seed (int): Random seed value. Default is 42.
"""
def __init__(
self,
feature_embedding_dimensions: List[int] = [3, 5],
spectral_embedding_dimensions: List[int] = [10, 20],
histogram_bins: List[int] = [10, 20],
seed: int = 42,
):
self.feature_embedding_dimensions = feature_embedding_dimensions
self.spectral_embedding_dimensions = spectral_embedding_dimensions
self.histogram_bins = histogram_bins
self.seed = seed
def _create_D_inverse(self, graph):
"""
Creating a sparse inverse degree matrix.
Arg types:
* **graph** *(NetworkX graph)* - The graph to be embedded.
Return types:
* **D_inverse** *(Scipy array)* - Diagonal inverse degree matrix.
"""
index = np.arange(graph.number_of_nodes())
values = np.array(
[1.0 / graph.degree[node] for node in range(graph.number_of_nodes())]
)
shape = (graph.number_of_nodes(), graph.number_of_nodes())
D_inverse = sps.coo_matrix((values, (index, index)), shape=shape)
return D_inverse
def _get_normalized_adjacency(self, graph):
"""
Calculating the normalized adjacency matrix.
Arg types:
* **graph** *(NetworkX graph)* - The graph of interest.
Return types:
* **A_hat** *(SciPy array)* - The adjacency matrix of the graph.
"""
A = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes()))
D_inverse = self._create_D_inverse(graph)
A_hat = D_inverse.dot(A)
return A_hat
def _get_embedding_features(self, graph, features):
"""
Calculating the embedding features.
Arg types:
* **graph** *(NetworkX graph)* - The graph of interest.
* **features** *(list)* - The list of graph feature NumPy arrays.
Return types:
* **features** *(list)* - The list of graph feature NumPy arrays.
"""
number_of_nodes = graph.number_of_nodes()
mat_eye = np.eye(self.max_deg + 1)
degrees = [graph.degree[node] for node in graph.nodes()]
sub_features = mat_eye[degrees]
feature_dim = sub_features.shape[1]
for emb_dim in self.feature_embedding_dimensions:
emb_space_full = emb_dim * feature_dim
embed_space = np.zeros((emb_space_full, number_of_nodes))
P = self._get_normalized_adjacency(graph)
Q = self._get_normalized_adjacency(graph)
for i in range(emb_dim):
P = P.dot(Q)
embed_space[i * feature_dim : (i + 1) * feature_dim, :] = P.dot(
sub_features
).T
features.append(np.mean(embed_space, axis=1).T)
return features
def _get_spectral_features(self, graph, features):
"""
Calculating the spectral features.
Arg types:
* **graph** *(NetworkX graph)* - The graph of interest.
* **features** *(list)* - The list of graph feature NumPy arrays.
Return types:
* **features** *(list)* - The list of graph feature NumPy arrays.
"""
L = nx.laplacian_matrix(graph).asfptype()
for emb_dim in self.spectral_embedding_dimensions:
emb_eig = np.zeros(emb_dim)
min_dim = min(graph.number_of_nodes() - 1, emb_dim)
eigenvalues = sps.linalg.eigsh(
L, min_dim, which="SM", ncv=25 * min_dim, return_eigenvectors=False
)
emb_eig[-min_dim:] = eigenvalues[:min_dim]
features.append(emb_eig)
return features
def _get_histogram_features(self, graph, features):
"""
Calculating the spectral histogram features.
Arg types:
* **graph** *(NetworkX graph)* - The graph of interest.
* **features** *(list)* - The list of graph feature NumPy arrays.
Return types:
* **features** *(list)* - The list of graph feature NumPy arrays.
"""
L = nx.laplacian_matrix(graph).asfptype()
eigenvalues, eigenvectors = sps.linalg.eigsh(L)
eigenvectors_norm = np.dot(
np.diag(np.sqrt(1 / eigenvalues[1:])), eigenvectors.T[1:, :]
)
sim = np.dot(eigenvectors_norm.T, eigenvectors_norm)
sim = np.reshape(sim, (1, -1))
for bins in self.histogram_bins:
hist = np.histogram(sim, range=(-1, 1), bins=bins)[0]
features.append(hist)
return features
def _calculate_invariant_embedding(self, graph):
"""
Calculating features from generic embedding, spectral embeddings and histogram features.
Arg types:
* **graph** *(NetworkX graph)* - The graph of interest.
Return types:
* **features** *(NumPy array)* - The features of the graph.
"""
features = []
features = self._get_embedding_features(graph, features)
features = self._get_spectral_features(graph, features)
features = self._get_histogram_features(graph, features)
features = np.concatenate(features).reshape(1, -1)
return features
[docs] def fit(self, graphs: List[nx.classes.graph.Graph]):
"""
Fitting an Invariant Graph Embedding model.
Arg types:
* **graphs** *(List of NetworkX graphs)* - The graphs to be embedded.
"""
self._set_seed()
graphs = self._check_graphs(graphs)
self.max_deg = max(
[max([graph.degree[n] for n in graph.nodes()]) for graph in graphs]
)
self._embedding = [
self._calculate_invariant_embedding(graph) for graph in graphs
]
self._embedding = np.concatenate(self._embedding)
[docs] def get_embedding(self) -> np.array:
r"""Getting the embedding of graphs.
Return types:
* **embedding** *(Numpy array)* - The embedding of graphs.
"""
return self._embedding
[docs] def infer(self, graphs: List[nx.classes.graph.Graph]) -> np.array:
r"""Infer the embedding of graphs.
Arg types:
* **graphs** *(List of NetworkX graphs)* - The graphs to be embedded.
Return types:
* **embedding** *(Numpy array)* - The embedding of graphs.
"""
self._set_seed()
graphs = self._check_graphs(graphs)
embedding = np.array(
[self._calculate_invariant_embedding(graph) for graph in graphs]
)
return embedding