Source code for karateclub.community_detection.overlapping.symmnmf

import numpy as np
import networkx as nx
from typing import Dict
from scipy import sparse
from karateclub.estimator import Estimator


[docs]class SymmNMF(Estimator): r"""An implementation of `"Symm-NMF" <https://www.cc.gatech.edu/~hpark/papers/DaDingParkSDM12.pdf>`_ from the SDM'12 paper "Symmetric Nonnegative Matrix Factorization for Graph Clustering". The procedure decomposed the second power od the normalized adjacency matrix with an ADMM based non-negative matrix factorization based technique. This results in a node embedding and each node is associated with an embedding factor in the created latent space. Args: dimensions (int): Number of dimensions. Default is 32. iterations (int): Number of power iterations. Default is 200. rho (float): Regularization tuning parameter. Default is 100.0. seed (int): Random seed value. Default is 42. """ def __init__( self, dimensions: int = 32, iterations: int = 200, rho: float = 100.0, seed: int = 42, ): self.dimensions = dimensions self.iterations = iterations self.rho = rho self.seed = seed def _create_D_inverse(self, graph): """ Creating a sparse inverse degree matrix. Arg types: * **graph** *(NetworkX graph)* - The graph to be embedded. Return types: * **D_inverse** *(Scipy array)* - Diagonal inverse degree matrix. """ index = np.arange(graph.number_of_nodes()) values = np.array( [1.0 / graph.degree[node] for node in range(graph.number_of_nodes())] ) shape = (graph.number_of_nodes(), graph.number_of_nodes()) D_inverse = sparse.coo_matrix((values, (index, index)), shape=shape) return D_inverse def _create_base_matrix(self, graph): """ Creating a tuple with the normalized adjacency matrix. Return types: * **A_hat** *Scipy array* - Normalized adjacency. """ A = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes())) D_inverse = self._create_D_inverse(graph) A_hat = D_inverse.dot(A) return A_hat def _setup_embeddings(self, graph): """ Setup the node embedding matrices. Arg types: * **graph** *(NetworkX graph)* - The graph to be clustered. """ number_of_nodes = graph.shape[0] non_zero = graph.nonzero()[0].shape[0] self._H = np.random.uniform( 0, non_zero / (number_of_nodes ** 2), size=(number_of_nodes, self.dimensions), ) self._H_gamma = np.zeros((number_of_nodes, self.dimensions)) self._I = np.identity(self.dimensions)
[docs] def get_memberships(self) -> Dict[int, int]: r"""Getting the cluster membership of nodes. Return types: * **memberships** *(dict)* - Node cluster memberships. """ index = np.argmax(self._W, axis=1) memberships = {int(i): int(index[i]) for i in range(len(index))} return memberships
[docs] def get_embedding(self) -> np.array: r"""Getting the node embedding. Return types: * **embedding** *(Numpy array)* - The embedding of nodes. """ embedding = self._H return embedding
def _do_admm_update(self, A_hat): """ Doing a single ADMM update with the adjacency matrix. """ H_covar = np.linalg.inv(self._H.T.dot(self._H) + self.rho * self._I) self._W = ( A_hat.dot(A_hat.T.dot(self._H)) + self.rho * self._H - self._H_gamma ).dot(H_covar) self._W = np.maximum(self._W, 0) W_covar = np.linalg.inv(self._W.T.dot(self._W) + self.rho * self._I) self._H = ( A_hat.dot(A_hat.T.dot(self._W)) + self.rho * self._W + self._H_gamma ).dot(W_covar) self._H = np.maximum(self._H, 0) self._H_gamma = self._H_gamma + self.rho * (self._W - self._H)
[docs] def fit(self, graph: nx.classes.graph.Graph): """ Fitting a Symm-NMF clustering model. Arg types: * **graph** *(NetworkX graph)* - The graph to be clustered. """ self._set_seed() graph = self._check_graph(graph) graph.remove_edges_from(nx.selfloop_edges(graph)) A_hat = self._create_base_matrix(graph) self._setup_embeddings(A_hat) for step in range(self.iterations): self._do_admm_update(A_hat)