Source code for karateclub.node_embedding.attributed.bane

import numpy as np
import networkx as nx
from typing import Union
from numpy.linalg import inv
from scipy.sparse import coo_matrix
from sklearn.decomposition import TruncatedSVD
from karateclub.estimator import Estimator


[docs]class BANE(Estimator): r"""An implementation of `"BANE" <https://shiruipan.github.io/publication/yang-binarized-2018/yang-binarized-2018.pdf>`_ from the ICDM '18 paper "Binarized Attributed Network Embedding Class". The procedure first calculates the truncated SVD of an adjacency - feature matrix product. This matrix is further decomposed by a binary CCD based technique. Args: dimensions (int): Number of embedding dimensions. Default is 32. svd_iterations (int): SVD iteration count. Default is 20. seed (int): Random seed. Default is 42. alpha (float): Kernel matrix inversion parameter. Default is 0.3. iterations (int): Matrix decomposition iterations. Default is 100. binarization_iterations (int): Binarization iterations. Default is 20. seed (int): Random seed value. Default is 42. """ def __init__( self, dimensions: int = 32, svd_iterations: int = 20, seed: int = 42, alpha: float = 0.3, iterations: int = 100, binarization_iterations: int = 20, ): self.dimensions = dimensions self.svd_iterations = svd_iterations self.seed = seed self.alpha = alpha self.iterations = iterations self.binarization_iterations = binarization_iterations self.seed = seed def _create_target_matrix(self, graph): """ Creating a normalized sparse adjacency matrix target. Arg types: * **graph** *(NetworkX graph)* - The graph to be embedded. Return types: * **P** *(Scipy COO matrix) - The target matrix. """ weighted_graph = nx.Graph() for (u, v) in graph.edges(): weighted_graph.add_edge(u, v, weight=1.0 / graph.degree(u)) weighted_graph.add_edge(v, u, weight=1.0 / graph.degree(v)) P = nx.adjacency_matrix(weighted_graph, nodelist=range(graph.number_of_nodes())) return P
[docs] def fit(self, graph: nx.classes.graph.Graph, X: Union[np.array, coo_matrix]): """ Fitting a BANE model. Arg types: * **graph** *(NetworkX graph)* - The graph to be embedded. * **X** *(Scipy COO or Numpy array)* - The matrix of node features. """ self._set_seed() graph = self._check_graph(graph) self._P = self._create_target_matrix(graph) self._X = X self._fit_base_SVD_model() self._binary_optimize()
def _fit_base_SVD_model(self): """ Reducing the dimensionality with SVD in the 1st step. """ self._P = self._P.dot(self._X) self.model = TruncatedSVD( n_components=self.dimensions, n_iter=self.svd_iterations, random_state=self.seed, ) self.model.fit(self._P) self._P = self.model.fit_transform(self._P) def _update_G(self): """ Updating the kernel matrix. """ self._G = np.dot(self._B.transpose(), self._B) self._G = self._G + self.alpha * np.eye(self.dimensions) self._G = inv(self._G) self._G = self._G.dot(self._B.transpose()).dot(self._P) def _update_Q(self): """ Updating the rescaled target matrix. """ self._Q = self._G.dot(self._P.transpose()).transpose() def _update_B(self): """ Updating the embedding matrix. """ for _ in range(self.iterations): for d in range(self.dimensions): sel = [x for x in range(self.dimensions) if x != d] self._B[:, d] = ( self._Q[:, d] - self._B[:, sel] .dot(self._G[sel, :]) .dot(self._G[:, d]) .transpose() ) self._B[:, d] = np.sign(self._B[:, d]) def _binary_optimize(self): """ Starting 2nd optimization phase with power iterations and CCD. """ self._B = np.sign(np.random.normal(size=(self._P.shape[0], self.dimensions))) for _ in range(self.binarization_iterations): self._update_G() self._update_Q() self._update_B()
[docs] def get_embedding(self) -> np.array: r"""Getting the node embedding. Return types: * **embedding** *(Numpy array)* - The embedding of nodes. """ embedding = self._B return embedding