Source code for karateclub.community_detection.overlapping.mnmf

import numpy as np
import networkx as nx
from typing import Dict
from scipy.sparse import coo_matrix
from karateclub.estimator import Estimator


[docs]class MNMF(Estimator): r"""An implementation of `"M-NMF" <https://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14589/13763>`_ from the AAAI '17 paper "Community Preserving Network Embedding". The procedure uses joint non-negative matrix factorization with modularity based regularization in order to learn a cluster membership distribution over nodes. The method can be used in an overlapping and non-overlapping way. Args: dimensions (int): Number of dimensions. Default is 128. clusters (int): Number of clusters. Default is 10. lambd (float): KKT penalty. Default is 0.2 alpha (float): Clustering penalty. Default is 0.05. beta (float): Modularity regularization penalty. Default is 0.05. iterations (int): Number of power iterations. Default is 200. lower_control (float): Floating point overflow control. Default is 10**-15. eta (float): Similarity mixing parameter. Default is 5.0. seed (int): Random seed value. Default is 42. """ def __init__( self, dimensions: int = 128, clusters: int = 10, lambd: float = 0.2, alpha: float = 0.05, beta: float = 0.05, iterations: int = 200, lower_control: float = 10**-15, eta: float = 5.0, seed: int = 42, ): self.dimensions = dimensions self.clusters = clusters self.lambd = lambd self.alpha = alpha self.beta = beta self.iterations = iterations self.lower_control = lower_control self.eta = eta self.seed = seed def _modularity_generator(self): """Calculating the sparse modularity matrix.""" degs = nx.degree(self._graph) e_count = self._graph.number_of_edges() n_count = self._graph.number_of_nodes() modularity_mat_shape = (n_count, n_count) indices_1 = np.array( [edge[0] for edge in self._graph.edges()] + [edge[1] for edge in self._graph.edges()] ) indices_2 = np.array( [edge[1] for edge in self._graph.edges()] + [edge[0] for edge in self._graph.edges()] ) scores = [ float(degs[e[0]] * degs[e[1]]) / (2 * e_count) for e in self._graph.edges() ] scores = scores + [ float(degs[e[1]] * degs[e[0]]) / (2 * e_count) for e in self._graph.edges() ] mod_matrix = coo_matrix( (scores, (indices_1, indices_2)), shape=modularity_mat_shape ) return mod_matrix def _setup_matrices(self): """Creating parameter matrices and target matrices.""" self._number_of_nodes = nx.number_of_nodes(self._graph) self._M = np.random.uniform(0, 1, (self._number_of_nodes, self.dimensions)) self._U = np.random.uniform(0, 1, (self._number_of_nodes, self.dimensions)) self._H = np.random.uniform(0, 1, (self._number_of_nodes, self.clusters)) self._C = np.random.uniform(0, 1, (self.clusters, self.dimensions)) self._B1 = nx.adjacency_matrix( self._graph, nodelist=range(self._graph.number_of_nodes()) ) self._B2 = self._modularity_generator() self._X = np.transpose(self._U) overlaps = self._B1.dot(self._B1) self._S = self._B1 + self.eta * self._B1 * (overlaps) def _update_M(self): """Update matrix M.""" enum = self._S.dot(self._U) denom = np.dot(self._M, np.dot(np.transpose(self._U), self._U)) denom[denom < self.lower_control] = self.lower_control self._M = np.multiply(self._M, enum / denom) row_sums = self._M.sum(axis=1) self._M = self._M / row_sums[:, np.newaxis] def _update_U(self): """Update matrix U.""" enum = self._S.dot(self._M) + self.alpha * np.dot(self._H, self._C) denom = np.dot( self._U, np.dot(np.transpose(self._M), self._M) + self.alpha * np.dot(np.transpose(self._C), self._C), ) denom[denom < self.lower_control] = self.lower_control self._U = np.multiply(self._U, enum / denom) row_sums = self._U.sum(axis=1) self._U = self._U / row_sums[:, np.newaxis] def _update_C(self): """Update matrix C.""" enum = np.dot(np.transpose(self._H), self._U) denom = np.dot(self._C, np.dot(np.transpose(self._U), self._U)) denom[denom < self.lower_control] = self.lower_control frac = enum / denom self._C = np.multiply(self._C, frac) row_sums = self._C.sum(axis=1) self._C = self._C / row_sums[:, np.newaxis] def _update_H(self): """Update matrix H.""" B1H = self._B1.dot(self._H) B2H = self._B2.dot(self._H) HHH = np.dot(self._H, (np.dot(np.transpose(self._H), self._H))) UC = np.dot(self._U, np.transpose(self._C)) rooted = np.square(2 * self.beta * B2H) + np.multiply( 16 * self.lambd * HHH, ( 2 * self.beta * B1H + 2 * self.alpha * UC + (4 * self.lambd - 2 * self.alpha) * self._H ), ) rooted[rooted < 0] = 0 sqroot_1 = np.sqrt(rooted) enum = -2 * self.beta * B2H + sqroot_1 denom = 8 * self.lambd * HHH denom[denom < self.lower_control] = self.lower_control rooted = enum / denom rooted[rooted < 0] = 0 sqroot_2 = np.sqrt(rooted) self._H = np.multiply(self._H, sqroot_2) row_sums = self._H.sum(axis=1) self._H = self._H / row_sums[:, np.newaxis]
[docs] def get_memberships(self) -> Dict[int, int]: r"""Getting the cluster membership of nodes. Return types: * **memberships** *(dict)* - Node cluster memberships. """ indices = np.argmax(self._H, axis=1) memberships = {i: membership for i, membership in enumerate(indices)} return memberships
[docs] def get_embedding(self) -> np.array: r"""Getting the node embedding. Return types: * **embedding** *(Numpy array)* - The embedding of nodes. """ embedding = self._U return embedding
[docs] def get_cluster_centers(self) -> np.array: r"""Getting the node embedding. Return types: * **centers** *(Numpy array)* - The cluster centers. """ centers = self._C return centers
[docs] def fit(self, graph: nx.classes.graph.Graph): """ Fitting an M-NMF clustering model. Arg types: * **graph** *(NetworkX graph)* - The graph to be clustered. """ self._set_seed() graph = self._check_graph(graph) self._graph = graph self._setup_matrices() for _ in range(self.iterations): self._update_M() self._update_U() self._update_C() self._update_H()