From 876035203e8c339e0c4b8adc684f2a09ac81c141 Mon Sep 17 00:00:00 2001 From: pedugnat Date: Fri, 12 Aug 2022 11:50:45 +0200 Subject: [PATCH 1/4] add check edge weights positive --- dynnode2vec/dynnode2vec.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dynnode2vec/dynnode2vec.py b/dynnode2vec/dynnode2vec.py index f11c54e..23e5915 100644 --- a/dynnode2vec/dynnode2vec.py +++ b/dynnode2vec/dynnode2vec.py @@ -39,6 +39,7 @@ def __init__( n_walks_per_node: int = 10, embedding_size: int = 128, window: int = 10, + weighted: bool = False, seed: int | None = 0, parallel_processes: int = 4, plain_node2vec: bool = False, @@ -70,6 +71,7 @@ def __init__( assert ( isinstance(window, int) and embedding_size > 0 ), "window should be a strictly positive integer" + assert isinstance(weighted, bool), "weighted should be a boolean" assert ( seed is None or isinstance(seed, int) ) and embedding_size > 0, "seed should be either None or int" @@ -84,6 +86,7 @@ def __init__( self.n_walks_per_node = n_walks_per_node self.embedding_size = embedding_size self.window = window + self.weighted = weighted self.seed = seed self.parallel_processes = parallel_processes self.plain_node2vec = plain_node2vec @@ -91,6 +94,21 @@ def __init__( # see https://stackoverflow.com/questions/53417258/what-is-workers-parameter-in-word2vec-in-nlp # pylint: disable=line-too-long self.gensim_workers = max(self.parallel_processes - 1, 12) + def check_edge_weights(self, graphs: list[nx.Graph]) -> None: + """ + Check that all edge weights are strictly positive, otherwise we can not run random walks. + """ + if not self.weighted: + return + + for i, graph in enumerate(graphs): + weights = nx.get_edge_attributes(graph, name="weight") + + assert all(weight > 0 for weight in weights.values()), ( + "All edge weights should be strictly positive to run Dynnode2Vec " + f"found negative weight in graph {i}" + ) + def _initialize_embeddings( self, graphs: list[nx.Graph] ) -> tuple[Word2Vec, list[Embedding]]: From 053fefff693e523942865ca4d6fd4790ff452480 Mon Sep 17 00:00:00 2001 From: pedugnat Date: Fri, 12 Aug 2022 16:27:04 +0200 Subject: [PATCH 2/4] add tests for check edge weights --- tests/test_dynnode2vec.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_dynnode2vec.py b/tests/test_dynnode2vec.py index 70dc250..904e531 100644 --- a/tests/test_dynnode2vec.py +++ b/tests/test_dynnode2vec.py @@ -2,6 +2,7 @@ Test the DynNode2Vec class """ # pylint: disable=missing-function-docstring +import random import gensim import networkx as nx @@ -24,6 +25,13 @@ def dynnode2vec_fixture(): ) +@pytest.fixture(name="weighted_dynnode2vec_object") +def weighted_dynnode2vec_fixture(): + return dynnode2vec.DynNode2Vec( + n_walks_per_node=5, walk_length=5, weighted=True, parallel_processes=1 + ) + + @pytest.fixture(name="parallel_dynnode2vec_object") def dynnode2vec_parallel_fixture(): return dynnode2vec.DynNode2Vec( @@ -93,6 +101,21 @@ def test_compute_embeddings(graphs, dynnode2vec_object): assert all(isinstance(emb, dynnode2vec.Embedding) for emb in embeddings) +def test_compute_weighted_embeddings(graphs, weighted_dynnode2vec_object): + embeddings = weighted_dynnode2vec_object.compute_embeddings(graphs) + + assert isinstance(embeddings, list) + assert all(isinstance(emb, dynnode2vec.Embedding) for emb in embeddings) + + # add random negative weights to the graph and check that it raises + for graph in graphs: + for _, _, data in graph.edges(data=True): + data["weight"] = -random.random() + + with pytest.raises(AssertionError): + weighted_dynnode2vec_object.compute_embeddings(graphs) + + def test_parallel_compute_embeddings(graphs, parallel_dynnode2vec_object): embeddings = parallel_dynnode2vec_object.compute_embeddings(graphs) From c5c8235a7fb75a80865037952018c8f8f2486eaf Mon Sep 17 00:00:00 2001 From: pedugnat Date: Fri, 12 Aug 2022 16:30:53 +0200 Subject: [PATCH 3/4] added seed for tests reproducibility --- tests/test_dynnode2vec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_dynnode2vec.py b/tests/test_dynnode2vec.py index 904e531..fcface7 100644 --- a/tests/test_dynnode2vec.py +++ b/tests/test_dynnode2vec.py @@ -108,9 +108,10 @@ def test_compute_weighted_embeddings(graphs, weighted_dynnode2vec_object): assert all(isinstance(emb, dynnode2vec.Embedding) for emb in embeddings) # add random negative weights to the graph and check that it raises + rng = random.Random(0) for graph in graphs: for _, _, data in graph.edges(data=True): - data["weight"] = -random.random() + data["weight"] = -rng.random() with pytest.raises(AssertionError): weighted_dynnode2vec_object.compute_embeddings(graphs) From c5859d1ea9d7c15c25169bb0e68f2593c82c5688 Mon Sep 17 00:00:00 2001 From: pedugnat Date: Fri, 12 Aug 2022 16:41:00 +0200 Subject: [PATCH 4/4] fix --- dynnode2vec/dynnode2vec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dynnode2vec/dynnode2vec.py b/dynnode2vec/dynnode2vec.py index 23e5915..2358c64 100644 --- a/dynnode2vec/dynnode2vec.py +++ b/dynnode2vec/dynnode2vec.py @@ -94,7 +94,7 @@ def __init__( # see https://stackoverflow.com/questions/53417258/what-is-workers-parameter-in-word2vec-in-nlp # pylint: disable=line-too-long self.gensim_workers = max(self.parallel_processes - 1, 12) - def check_edge_weights(self, graphs: list[nx.Graph]) -> None: + def _check_edge_weights(self, graphs: list[nx.Graph]) -> None: """ Check that all edge weights are strictly positive, otherwise we can not run random walks. """ @@ -250,7 +250,7 @@ def compute_embeddings(self, graphs: list[nx.Graph]) -> list[Embedding]: """ Compute dynamic embeddings on a list of graphs. """ - # TO DO : check graph weights valid + self._check_edge_weights(graphs) model, embeddings = self._initialize_embeddings(graphs) time_walks = self._simulate_walks(graphs) self._update_embeddings(embeddings, time_walks, model)