import random
import community
import numpy as np
import networkx as nx
from typing import Dict
from karateclub.estimator import Estimator
[docs]class BigClam(Estimator):
r"""An implementation of `"BigClam" <http://infolab.stanford.edu/~crucis/pubs/paper-nmfagm.pdf>`_
from the WSDM '13 paper "Overlapping Community Detection at Scale: A Non-negative Matrix
Factorization Approach". The procedure uses gradient ascent to create an embedding which is
used for deciding the node-cluster affiliations.
Args:
dimensions (int): Number of embedding dimensions. Default 8.
iterations (int): Number of training iterations. Default 50.
learning_rate (float): Gradient ascent learning rate. Default is 0.005.
seed (int): Random seed value. Default is 42.
"""
def __init__(
self,
dimensions: int = 8,
iterations: int = 50,
learning_rate: float = 0.005,
seed: int = 42,
):
self.dimensions = dimensions
self.iterations = iterations
self.learning_rate = learning_rate
self.seed = seed
def _initialize_features(self, number_of_nodes):
"""
Creating the community embedding and gradient sum.
Arg types:
* **number_of_nodes** *(int)* - The number of nodes in the graph.
"""
self._embedding = np.random.uniform(0, 1, (number_of_nodes, self.dimensions))
self._global_features = np.sum(self._embedding, axis=0)
def _calculate_gradient(self, node_feature, neb_features):
"""
Calculating the feature gradient.
Arg types:
* **node_feature** *(Numpy array)* - The node representation.
* **neb_features** *(Numpy array)* - The representation of node neighbours.
"""
raw_scores = node_feature.dot(neb_features.T)
raw_scores = np.clip(raw_scores, -15, 15)
scores = np.exp(-raw_scores) / (1 - np.exp(-raw_scores))
scores = scores.reshape(-1, 1)
neb_grad = np.sum(scores * neb_features, axis=0)
without_grad = (
self._global_features - node_feature - np.sum(neb_features, axis=0)
)
grad = neb_grad - without_grad
return grad
def _do_updates(self, node, gradient, node_feature):
"""
Updating the embedding and the feature sum.
Arg types:
* **node** *(int)* - The node identifier.
* **gradient** *(Numpy array)* - The gradient of the node representation.
* **node_feature** *(Numpy array)* - The node representation.
"""
self._embedding[node] = self._embedding[node] + self.learning_rate * gradient
self._embedding[node] = np.clip(self._embedding[node], 0.00001, 10)
self._global_features = (
self._global_features - node_feature + self._embedding[node]
)
[docs] def get_memberships(self) -> Dict[int, int]:
r"""Getting the cluster membership of nodes.
Return types:
* **memberships** *(dict)* - Node cluster memberships.
"""
indices = np.argmax(self._embedding, axis=1)
memberships = {i: membership for i, membership in enumerate(indices)}
return memberships
[docs] def get_embedding(self) -> np.array:
r"""Getting the node embedding.
Return types:
* **embedding** *(Numpy array)* - The embedding of nodes.
"""
embedding = self._embedding
return embedding
[docs] def fit(self, graph: nx.classes.graph.Graph):
"""
Fitting a BigClam clustering model.
Arg types:
* **graph** *(NetworkX graph)* - The graph to be clustered.
"""
self._set_seed()
graph = self._check_graph(graph)
number_of_nodes = graph.number_of_nodes()
self._initialize_features(number_of_nodes)
nodes = [node for node in graph.nodes()]
for i in range(self.iterations):
random.shuffle(nodes)
for node in nodes:
nebs = [neb for neb in graph.neighbors(node)]
neb_features = self._embedding[nebs, :]
node_feature = self._embedding[node, :]
gradient = self._calculate_gradient(node_feature, neb_features)
self._do_updates(node, gradient, node_feature)