Source code for karateclub.node_embedding.attributed.asne

import random
import numpy as np
import networkx as nx
from scipy.sparse import coo_matrix
from karateclub.estimator import Estimator
from gensim.models.doc2vec import Doc2Vec, TaggedDocument


[docs]class ASNE(Estimator): r"""An implementation of `"ASNE" <https://arxiv.org/abs/1705.04969>`_ from the TKDE '18 paper "Attributed Social Network Embedding". The procedure implicitly factorizes a concatenated adjacency matrix and feature matrix. Args: dimensions (int): Dimensionality of embedding. Default is 128. workers (int): Number of cores. Default is 4. epochs (int): Number of epochs. Default is 100. down_sampling (float): Down sampling frequency. Default is 0.0001. learning_rate (float): HogWild! learning rate. Default is 0.05. min_count (int): Minimal count of node occurrences. Default is 1. seed (int): Random seed value. Default is 42. """ def __init__( self, dimensions: int = 128, workers: int = 4, epochs: int = 100, down_sampling: float = 0.0001, learning_rate: float = 0.05, min_count: int = 1, seed: int = 42, ): self.dimensions = dimensions self.workers = workers self.epochs = epochs self.down_sampling = down_sampling self.learning_rate = learning_rate self.min_count = min_count self.seed = seed def _feature_transform(self, graph, X): features = { node: ["neb_" + str(neb) for neb in graph.neighbors(node)] for node in graph.nodes() } nodes = X.row for i, node in enumerate(nodes): features[node].append("feature_" + str(X.col[i])) return features
[docs] def fit(self, graph: nx.classes.graph.Graph, X: coo_matrix): """ Fitting an ASNE model. Arg types: * **graph** *(NetworkX graph)* - The graph to be embedded. * **X** *(Scipy COO array)* - The matrix of node features. """ self._set_seed() graph = self._check_graph(graph) features = self._feature_transform(graph, X) documents = [ TaggedDocument(words=features[node], tags=[str(node)]) for node in range(len(features)) ] model = Doc2Vec( documents, vector_size=self.dimensions, window=0, min_count=self.min_count, dm=0, sample=self.down_sampling, workers=self.workers, epochs=self.epochs, alpha=self.learning_rate, seed=self.seed, ) self._embedding = np.array( [model.docvecs[str(i)] for i, _ in enumerate(documents)] )
[docs] def get_embedding(self) -> np.array: r"""Getting the node embedding. Return types: * **embedding** *(Numpy array)* - The embedding of nodes. """ embedding = self._embedding return embedding