from typing import List
import networkx as nx
import numpy as np
from gensim.models.word2vec import Word2Vec
from karateclub.estimator import Estimator
from karateclub.utils.walker import BiasedRandomWalker
[docs]class Node2Vec(Estimator):
r"""An implementation of `"Node2Vec" <https://cs.stanford.edu/~jure/pubs/node2vec-kdd16.pdf>`_
from the KDD '16 paper "node2vec: Scalable Feature Learning for Networks".
The procedure uses biased second order random walks to approximate the pointwise mutual information
matrix obtained by pooling normalized adjacency matrix powers. This matrix
is decomposed by an approximate factorization technique.
Args:
walk_number (int): Number of random walks. Default is 10.
walk_length (int): Length of random walks. Default is 80.
p (float): Return parameter (1/p transition probability) to move towards from previous node.
q (float): In-out parameter (1/q transition probability) to move away from previous node.
dimensions (int): Dimensionality of embedding. Default is 128.
workers (int): Number of cores. Default is 4.
window_size (int): Matrix power order. Default is 5.
epochs (int): Number of epochs. Default is 1.
learning_rate (float): HogWild! learning rate. Default is 0.05.
min_count (int): Minimal count of node occurrences. Default is 1.
seed (int): Random seed value. Default is 42.
"""
_embedding: List[np.ndarray]
def __init__(
self,
walk_number: int = 10,
walk_length: int = 80,
p: float = 1.0,
q: float = 1.0,
dimensions: int = 128,
workers: int = 4,
window_size: int = 5,
epochs: int = 1,
learning_rate: float = 0.05,
min_count: int = 1,
seed: int = 42,
):
super(Node2Vec, self).__init__()
self.walk_number = walk_number
self.walk_length = walk_length
self.p = p
self.q = q
self.dimensions = dimensions
self.workers = workers
self.window_size = window_size
self.epochs = epochs
self.learning_rate = learning_rate
self.min_count = min_count
self.seed = seed
[docs] def fit(self, graph: nx.classes.graph.Graph):
"""
Fitting a DeepWalk model.
Arg types:
* **graph** *(NetworkX graph)* - The graph to be embedded.
"""
self._set_seed()
graph = self._check_graph(graph)
walker = BiasedRandomWalker(self.walk_length, self.walk_number, self.p, self.q)
walker.do_walks(graph)
model = Word2Vec(
walker.walks,
hs=1,
alpha=self.learning_rate,
epochs=self.epochs,
vector_size=self.dimensions,
window=self.window_size,
min_count=self.min_count,
workers=self.workers,
seed=self.seed,
)
n_nodes = graph.number_of_nodes()
self._embedding = [model.wv[str(n)] for n in range(n_nodes)]
[docs] def get_embedding(self) -> np.array:
r"""Getting the node embedding.
Return types:
* **embedding** *(Numpy array)* - The embedding of nodes.
"""
return np.array(self._embedding)