Source code for karateclub.community_detection.overlapping.symmnmf

import numpy as np
import networkx as nx
from typing import Dict
from scipy import sparse
from karateclub.estimator import Estimator


[docs]class SymmNMF(Estimator):

    r"""An implementation of `"Symm-NMF" <https://www.cc.gatech.edu/~hpark/papers/DaDingParkSDM12.pdf>`_
    from the SDM'12 paper "Symmetric Nonnegative Matrix Factorization for Graph Clustering". The procedure
    decomposed the second power od the normalized adjacency matrix with an ADMM based non-negative matrix
    factorization based technique. This results in a node embedding and each node is associated with an
    embedding factor in the created latent space.

    Args:
        dimensions (int): Number of dimensions. Default is 32.
        iterations (int): Number of power iterations. Default is 200.
        rho (float): Regularization tuning parameter. Default is 100.0.
        seed (int): Random seed value. Default is 42.
    """

    def __init__(
        self,
        dimensions: int = 32,
        iterations: int = 200,
        rho: float = 100.0,
        seed: int = 42,
    ):

        self.dimensions = dimensions
        self.iterations = iterations
        self.rho = rho
        self.seed = seed

    def _create_D_inverse(self, graph):
        """
        Creating a sparse inverse degree matrix.

        Arg types:
            * **graph** *(NetworkX graph)* - The graph to be embedded.

        Return types:
            * **D_inverse** *(Scipy array)* - Diagonal inverse degree matrix.
        """
        index = np.arange(graph.number_of_nodes())
        values = np.array(
            [1.0 / graph.degree[node] for node in range(graph.number_of_nodes())]
        )
        shape = (graph.number_of_nodes(), graph.number_of_nodes())
        D_inverse = sparse.coo_matrix((values, (index, index)), shape=shape)
        return D_inverse

    def _create_base_matrix(self, graph):
        """
        Creating a tuple with the normalized adjacency matrix.

        Return types:
            * **A_hat** *Scipy array* - Normalized adjacency.
        """
        A = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes()))
        D_inverse = self._create_D_inverse(graph)
        A_hat = D_inverse.dot(A)
        return A_hat

    def _setup_embeddings(self, graph):
        """
        Setup the node embedding matrices.

        Arg types:
            * **graph** *(NetworkX graph)* - The graph to be clustered.
        """
        number_of_nodes = graph.shape[0]
        non_zero = graph.nonzero()[0].shape[0]
        self._H = np.random.uniform(
            0,
            non_zero / (number_of_nodes**2),
            size=(number_of_nodes, self.dimensions),
        )
        self._H_gamma = np.zeros((number_of_nodes, self.dimensions))
        self._I = np.identity(self.dimensions)

[docs]    def get_memberships(self) -> Dict[int, int]:
        r"""Getting the cluster membership of nodes.

        Return types:
            * **memberships** *(dict)* - Node cluster memberships.
        """
        index = np.argmax(self._W, axis=1)
        memberships = {int(i): int(index[i]) for i in range(len(index))}
        return memberships

[docs]    def get_embedding(self) -> np.array:
        r"""Getting the node embedding.

        Return types:
            * **embedding** *(Numpy array)* - The embedding of nodes.
        """
        embedding = self._H
        return embedding

    def _do_admm_update(self, A_hat):
        """
        Doing a single ADMM update with the adjacency matrix.

        """
        H_covar = np.linalg.inv(self._H.T.dot(self._H) + self.rho * self._I)
        self._W = (
            A_hat.dot(A_hat.T.dot(self._H)) + self.rho * self._H - self._H_gamma
        ).dot(H_covar)
        self._W = np.maximum(self._W, 0)
        W_covar = np.linalg.inv(self._W.T.dot(self._W) + self.rho * self._I)
        self._H = (
            A_hat.dot(A_hat.T.dot(self._W)) + self.rho * self._W + self._H_gamma
        ).dot(W_covar)
        self._H = np.maximum(self._H, 0)
        self._H_gamma = self._H_gamma + self.rho * (self._W - self._H)

[docs]    def fit(self, graph: nx.classes.graph.Graph):
        """
        Fitting a Symm-NMF clustering model.

        Arg types:
            * **graph** *(NetworkX graph)* - The graph to be clustered.
        """
        self._set_seed()
        graph = self._check_graph(graph)
        graph.remove_edges_from(nx.selfloop_edges(graph))
        A_hat = self._create_base_matrix(graph)
        self._setup_embeddings(A_hat)
        for step in range(self.iterations):
            self._do_admm_update(A_hat)