From 73c86cd6cf32bab286ee3b3cd5a90847fc1b51ea Mon Sep 17 00:00:00 2001 From: Amos Dinh Date: Fri, 30 Aug 2024 11:40:26 +0200 Subject: [PATCH 1/4] renamed httpheader vars to hyphen, fixed setuptools, fixed test_ paths --- .../dws/jrdf2vec/training/Gensim.java | 46 +++++----- src/main/resources/python_server.py | 92 ++++++++++--------- src/main/resources/test/test_python_server.py | 26 +++--- 3 files changed, 83 insertions(+), 81 deletions(-) diff --git a/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java b/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java index e5c251194..9310f054c 100644 --- a/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java +++ b/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java @@ -114,8 +114,8 @@ private Gensim() { */ public void trainVectorSpaceModel(String modelPath, String trainingFilePath) { HttpGet request = new HttpGet(serverUrl + "/train-vector-space-model"); - request.addHeader("input_file_path", getCanonicalPath(trainingFilePath)); - request.addHeader("model_path", modelPath); + request.addHeader("input-file-path", getCanonicalPath(trainingFilePath)); + request.addHeader("model-path", modelPath); try (CloseableHttpResponse response = httpClient.execute(request)) { HttpEntity entity = response.getEntity(); @@ -135,9 +135,9 @@ public void trainVectorSpaceModel(String modelPath, String trainingFilePath) { */ public double queryVectorSpaceModel(String modelPath, String documentIdOne, String documentIdTwo) throws Exception { HttpGet request = new HttpGet(serverUrl + "/query-vector-space-model"); - request.addHeader("model_path", modelPath); - request.addHeader("document_id_one", documentIdOne); - request.addHeader("document_id_two", documentIdTwo); + request.addHeader("model-path", modelPath); + request.addHeader("document-id-one", documentIdOne); + request.addHeader("document-id-two", documentIdTwo); try (CloseableHttpResponse response = httpClient.execute(request)) { HttpEntity entity = response.getEntity(); @@ -168,24 +168,24 @@ public double queryVectorSpaceModel(String modelPath, String documentIdOne, Stri public boolean trainWord2VecModel(String modelOrVectorPath, String trainingFilePath, Word2VecConfiguration configuration) { HttpGet request = new HttpGet(serverUrl + "/train-word2vec"); if (modelOrVectorPath.endsWith(".kv")) { - request.addHeader("vector_path", modelOrVectorPath); - request.addHeader("model_path", modelOrVectorPath.substring(0, modelOrVectorPath.length() - 3)); + request.addHeader("vector-path", modelOrVectorPath); + request.addHeader("model-path", modelOrVectorPath.substring(0, modelOrVectorPath.length() - 3)); } else { - request.addHeader("model_path", modelOrVectorPath); - request.addHeader("vector_path", modelOrVectorPath + ".kv"); + request.addHeader("model-path", modelOrVectorPath); + request.addHeader("vector-path", modelOrVectorPath + ".kv"); } request.addHeader("file_path", getCanonicalPath(trainingFilePath)); request.addHeader("vector_dimension", "" + configuration.getVectorDimension()); - request.addHeader("number_of_threads", "" + configuration.getNumberOfThreads()); - request.addHeader("window_size", "" + configuration.getWindowSize()); + request.addHeader("number-of-threads", "" + configuration.getNumberOfThreads()); + request.addHeader("window-size", "" + configuration.getWindowSize()); request.addHeader("iterations", "" + configuration.getIterations()); request.addHeader("negatives", "" + configuration.getNegatives()); - request.addHeader("cbow_or_sg", configuration.getType().toString()); + request.addHeader("cbow-or-sg", configuration.getType().toString()); request.addHeader("min_count", "" + configuration.getMinCount()); request.addHeader("sample", "" + configuration.getSample()); request.addHeader("epochs", "" + configuration.getEpochs()); - request.addHeader("hierarchical_softmax", "" + configuration.isUseHierarchicalSoftmax()); + request.addHeader("hierarchical-softmax", "" + configuration.isUseHierarchicalSoftmax()); try (CloseableHttpResponse response = httpClient.execute(request)) { HttpEntity entity = response.getEntity(); @@ -224,8 +224,8 @@ public double getSimilarity(String concept1, String concept2, String modelOrVect } } else { HttpGet request = new HttpGet(serverUrl + "/get-similarity"); - request.addHeader("concept_1", concept1); - request.addHeader("concept_2", concept2); + request.addHeader("concept-1", concept1); + request.addHeader("concept-2", concept2); addModelToRequest(request, modelOrVectorPath); try (CloseableHttpResponse response = httpClient.execute(request)) { HttpEntity entity = response.getEntity(); @@ -481,8 +481,8 @@ public int getVocabularySize(String modelOrVectorPath) { */ private void addModelToRequest(HttpGet request, String modelOrVectorPath) { if (modelOrVectorPath.endsWith(".kv")) { - request.addHeader("vector_path", getCanonicalPath(modelOrVectorPath)); - } else request.addHeader("model_path", getCanonicalPath(modelOrVectorPath)); + request.addHeader("vector-path", getCanonicalPath(modelOrVectorPath)); + } else request.addHeader("model-path", getCanonicalPath(modelOrVectorPath)); } /** @@ -573,7 +573,7 @@ public boolean checkRequirements() { LOGGER.error("Could not find requirements file."); return false; } - request.addHeader("requirements_file", requirementsFile.getAbsolutePath()); + request.addHeader("requirements-file", requirementsFile.getAbsolutePath()); try (CloseableHttpResponse response = httpClient.execute(request)) { HttpEntity entity = response.getEntity(); String resultMessage = EntityUtils.toString(entity); @@ -822,8 +822,8 @@ public void writeModelAsTextFile(String modelOrVectorPath, String fileToWrite) { */ public void convertW2vToKv(String w2vPath, String fileToWrite){ HttpGet request = new HttpGet(serverUrl + "/w2v-to-kv"); - request.addHeader("w2v_path", w2vPath); - request.addHeader("new_file", fileToWrite); + request.addHeader("w2v-path", w2vPath); + request.addHeader("new-file", fileToWrite); try (CloseableHttpResponse response = httpClient.execute(request)) { HttpEntity entity = response.getEntity(); if (entity == null) { @@ -852,9 +852,9 @@ public void writeModelAsTextFile(String modelOrVectorPath, String fileToWrite, S HttpGet request = new HttpGet(serverUrl + "/write-model-as-text-file"); addModelToRequest(request, modelOrVectorPath); if (entityFile != null) { - request.addHeader("entity_file", entityFile); + request.addHeader("entity-file", entityFile); } - request.addHeader("file_to_write", fileToWrite); + request.addHeader("file-to-write", fileToWrite); try (CloseableHttpResponse response = httpClient.execute(request)) { HttpEntity entity = response.getEntity(); if (entity == null) { @@ -959,4 +959,4 @@ public static void setPort(int port) { public static String getServerUrl() { return serverUrl; } -} \ No newline at end of file +} diff --git a/src/main/resources/python_server.py b/src/main/resources/python_server.py index 029df7e69..209b9554e 100644 --- a/src/main/resources/python_server.py +++ b/src/main/resources/python_server.py @@ -6,8 +6,8 @@ import os import sys import gzip -import pkg_resources -from pkg_resources import DistributionNotFound +from packaging.requirements import Requirement +from importlib.metadata import version, PackageNotFoundError import pathlib @@ -49,32 +49,38 @@ def check_requirements() -> str: str A message listing installed and potentially missing requirements. """ - requirements_file = request.headers.get("requirements_file") + requirements_file = request.headers.get("requirements-file") logging.info(f"received requirements file path: {requirements_file}") + with pathlib.Path(requirements_file).open() as requirements_txt: - requirements = pkg_resources.parse_requirements(requirements_txt) + requirements = requirements_txt.read().splitlines() ok_requirements = [] missing_requirements = [] + for requirement in requirements: - requirement = str(requirement) - print(f"Checking {requirement}") + req = Requirement(requirement) + print(f"Checking {req}") try: - pkg_resources.require(requirement) - ok_requirements.append(requirement) - except Exception as error: - missing = str(error) - missing_requirements.append(requirement) + installed_version = version(req.name) + if req.specifier.contains(installed_version): + ok_requirements.append(str(req)) + else: + missing_requirements.append(f"{req} (installed: {installed_version})") + except PackageNotFoundError: + missing_requirements.append(str(req)) + message = "Dependency Check" - if len(ok_requirements) > 0: + if ok_requirements: message += "\nInstalled Requirements:" for r in ok_requirements: message += "\n\t" + r - if len(missing_requirements) > 0: - message += "\nMissing Requirements:" + if missing_requirements: + message += "\nMissing or Incompatible Requirements:" for r in missing_requirements: message += "\n\t" + r else: message += "\n=> Everything is installed. You are good to go!" + print(message) logging.info(message) return message @@ -153,8 +159,8 @@ def w2v_to_kv() -> str: from gensim.models import KeyedVectors try: - w2v_path = request.headers.get("w2v_path") - new_file = request.headers.get("new_file") + w2v_path = request.headers.get("w2v-path") + new_file = request.headers.get("new-file") result = KeyedVectors.load_word2vec_format(w2v_path, unicode_errors="ignore") result.save(new_file) active_models[os.path.realpath(new_file)] = result @@ -176,21 +182,21 @@ def train_word_2_vec() -> str: 'True' as string if operation was successful, else 'False' (as string). """ try: - model_path = request.headers.get("model_path") # where the model will be stored + model_path = request.headers.get("model-path") # where the model will be stored vector_path = request.headers.get( - "vector_path" + "vector-path" ) # where the vector file will be stored file_path = request.headers.get("file_path") vector_dimension = request.headers.get("vector_dimension") - number_of_threads = request.headers.get("number_of_threads") - window_size = request.headers.get("window_size") + number_of_threads = request.headers.get("number-of-threads") + window_size = request.headers.get("window-size") iterations = request.headers.get("iterations") negatives = request.headers.get("negatives") - cbow_or_sg = request.headers.get("cbow_or_sg") + cbow_or_sg = request.headers.get("cbow-or-sg") min_count = request.headers.get("min_count") sample = request.headers.get("sample") epochs = request.headers.get("epochs") - hs_string: str = request.headers.get("hierarchical_softmax") + hs_string: str = request.headers.get("hierarchical-softmax") hs = 1 if hs_string == "true" else 0 @@ -259,16 +265,16 @@ def is_in_vocabulary(): True if concept in model vocabulary, else False. """ concept = request.headers.get("concept") - model_path = request.headers.get("model_path") - vector_path = request.headers.get("vector_path") + model_path = request.headers.get("model-path") + vector_path = request.headers.get("vector-path") vectors = get_vectors(model_path, vector_path) return str(concept in vectors.key_to_index) @app.route("/get-vocabulary-size", methods=["GET"]) def get_vocab_size(): - model_path = request.headers.get("model_path") - vector_path = request.headers.get("vector_path") + model_path = request.headers.get("model-path") + vector_path = request.headers.get("vector-path") vectors = get_vectors(model_path, vector_path) return str(len(vectors.key_to_index)) @@ -303,10 +309,10 @@ def get_vectors(model_path, vector_path): @app.route("/get-similarity", methods=["GET"]) def get_similarity_given_model(): - concept_1 = request.headers.get("concept_1") - concept_2 = request.headers.get("concept_2") - model_path = request.headers.get("model_path") - vector_path = request.headers.get("vector_path") + concept_1 = request.headers.get("concept-1") + concept_2 = request.headers.get("concept-2") + model_path = request.headers.get("model-path") + vector_path = request.headers.get("vector-path") vectors = get_vectors(model_path=model_path, vector_path=vector_path) if vectors is None: @@ -338,8 +344,8 @@ def get_similarity_given_model(): @app.route("/get-vocabulary-terms", methods=["GET"]) def get_vocabulary_terms(): - model_path = request.headers.get("model_path") - vector_path = request.headers.get("vector_path") + model_path = request.headers.get("model-path") + vector_path = request.headers.get("vector-path") vectors = get_vectors(model_path, vector_path) result = "" for word in vectors.key_to_index: @@ -350,8 +356,8 @@ def get_vocabulary_terms(): @app.route("/get-vector", methods=["GET"]) def get_vector_given_model(): concept = request.headers.get("concept") - model_path = request.headers.get("model_path") - vector_path = request.headers.get("vector_path") + model_path = request.headers.get("model-path") + vector_path = request.headers.get("vector-path") vectors = get_vectors(model_path=model_path, vector_path=vector_path) if vectors is None: @@ -381,8 +387,8 @@ def get_vector_given_model(): @app.route("/train-vector-space-model", methods=["GET"]) def train_vector_space_model(): - input_file_path = request.headers.get("input_file_path") - model_path = request.headers.get("model_path") + input_file_path = request.headers.get("input-file-path") + model_path = request.headers.get("model-path") dictionary = __createDictionary(input_file_path) corpus = CsvCorpus(dictionary, input_file_path) @@ -401,9 +407,9 @@ def train_vector_space_model(): @app.route("/query-vector-space-model", methods=["GET"]) def query_vector_space_model(): try: - model_path = request.headers.get("model_path") - document_id_one = request.headers.get("document_id_one") - document_id_two = request.headers.get("document_id_two") # can be None + model_path = request.headers.get("model-path") + document_id_one = request.headers.get("document-id-one") + document_id_two = request.headers.get("document-id-two") # can be None model = active_models.get(model_path) if model is None: @@ -675,10 +681,10 @@ def write_vectors_as_text_file(): boolean 'True' as string if operation was successful, else 'False' (as string). """ - model_path = request.headers.get("model_path") - vector_path = request.headers.get("vector_path") - file_to_write = request.headers.get("file_to_write") - entity_file = request.headers.get("entity_file") + model_path = request.headers.get("model-path") + vector_path = request.headers.get("vector-path") + file_to_write = request.headers.get("file-to-write") + entity_file = request.headers.get("entity-file") vectors = get_vectors(model_path=model_path, vector_path=vector_path) print("Writing the vectors as text file.") with open(file_to_write, "w+") as f: diff --git a/src/main/resources/test/test_python_server.py b/src/main/resources/test/test_python_server.py index c1e7c150a..ff6f930c0 100644 --- a/src/main/resources/test/test_python_server.py +++ b/src/main/resources/test/test_python_server.py @@ -9,7 +9,7 @@ uri_prefix = "http://localhost:1808/" - +base_dir = Path(__file__).resolve().parent.parent.parent.parent / "test" / "resources" class ServerThread(threading.Thread): def __init__(self, *args, **kwargs): @@ -40,45 +40,41 @@ def setup_module(module): def test_get_vector(): - test_model_vectors = "../../test/resources/test_model_vectors.kv" - vector_test_path = Path(test_model_vectors) + vector_test_path = (base_dir / "test_model_vectors.kv") assert vector_test_path.is_file() result = requests.get( uri_prefix + "get-vector", - headers={"concept": "Europe", "vector_path": test_model_vectors}, + headers={"concept": "Europe", "vector-path": str(vector_test_path)}, ) assert len(result.content.decode("utf-8").split(" ")) == 100 def test_is_in_vocabulary(): - test_model = "../../test/resources/test_model" - test_vectors = "../../test/resources/test_model_vectors.kv" - model_test_path = Path(test_model) - vector_test_path = Path(test_vectors) + model_test_path = (base_dir / "test_model") + vector_test_path = (base_dir / "test_model_vectors.kv") assert model_test_path.is_file() assert vector_test_path.is_file() result = requests.get( uri_prefix + "is-in-vocabulary", - headers={"concept": "Europe", "model_path": test_model}, + headers={"concept": "Europe", "model-path": str(model_test_path)}, ) assert result.content.decode("utf-8") == "True" result = requests.get( uri_prefix + "is-in-vocabulary", - headers={"concept": "Europe", "vector_path": test_vectors}, + headers={"concept": "Europe", "vector-path": str(vector_test_path)}, ) assert result.content.decode("utf-8") == "True" def test_get_similarity(): - test_model = "../../test/resources/test_model" - model_test_path = Path(test_model) + model_test_path = (base_dir / "test_model") assert model_test_path.is_file() result = requests.get( uri_prefix + "get-similarity", headers={ - "concept_1": "Europe", - "concept_2": "united", - "model_path": test_model}, + "concept-1": "Europe", + "concept-2": "united", + "model-path": str(model_test_path)}, ) result_str = result.content.decode("utf-8") assert float(result_str) > 0 From 702b2d52a11d2432306cba5c9c882d5270be270d Mon Sep 17 00:00:00 2001 From: Amos Dinh Date: Fri, 30 Aug 2024 12:25:15 +0200 Subject: [PATCH 2/4] fixed missed underscores --- .../informatik/dws/jrdf2vec/training/Gensim.java | 6 +++--- src/main/resources/python_server.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java b/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java index 9310f054c..c67935c51 100644 --- a/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java +++ b/src/main/java/de/uni_mannheim/informatik/dws/jrdf2vec/training/Gensim.java @@ -175,14 +175,14 @@ public boolean trainWord2VecModel(String modelOrVectorPath, String trainingFileP request.addHeader("vector-path", modelOrVectorPath + ".kv"); } - request.addHeader("file_path", getCanonicalPath(trainingFilePath)); - request.addHeader("vector_dimension", "" + configuration.getVectorDimension()); + request.addHeader("file-path", getCanonicalPath(trainingFilePath)); + request.addHeader("vector-dimension", "" + configuration.getVectorDimension()); request.addHeader("number-of-threads", "" + configuration.getNumberOfThreads()); request.addHeader("window-size", "" + configuration.getWindowSize()); request.addHeader("iterations", "" + configuration.getIterations()); request.addHeader("negatives", "" + configuration.getNegatives()); request.addHeader("cbow-or-sg", configuration.getType().toString()); - request.addHeader("min_count", "" + configuration.getMinCount()); + request.addHeader("min-count", "" + configuration.getMinCount()); request.addHeader("sample", "" + configuration.getSample()); request.addHeader("epochs", "" + configuration.getEpochs()); request.addHeader("hierarchical-softmax", "" + configuration.isUseHierarchicalSoftmax()); diff --git a/src/main/resources/python_server.py b/src/main/resources/python_server.py index 209b9554e..ada188f40 100644 --- a/src/main/resources/python_server.py +++ b/src/main/resources/python_server.py @@ -186,14 +186,14 @@ def train_word_2_vec() -> str: vector_path = request.headers.get( "vector-path" ) # where the vector file will be stored - file_path = request.headers.get("file_path") - vector_dimension = request.headers.get("vector_dimension") + file_path = request.headers.get("file-path") + vector_dimension = request.headers.get("vector-dimension") number_of_threads = request.headers.get("number-of-threads") window_size = request.headers.get("window-size") iterations = request.headers.get("iterations") negatives = request.headers.get("negatives") cbow_or_sg = request.headers.get("cbow-or-sg") - min_count = request.headers.get("min_count") + min_count = request.headers.get("min-count") sample = request.headers.get("sample") epochs = request.headers.get("epochs") hs_string: str = request.headers.get("hierarchical-softmax") From 45eee5843aff97a60bf73b6e64c03ac26b3935cb Mon Sep 17 00:00:00 2001 From: Amos Dinh Date: Fri, 30 Aug 2024 12:31:58 +0200 Subject: [PATCH 3/4] added packaging as requirement --- src/main/resources/environment.yml | 3 ++- src/main/resources/requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/resources/environment.yml b/src/main/resources/environment.yml index f6df42cda..1573d9854 100644 --- a/src/main/resources/environment.yml +++ b/src/main/resources/environment.yml @@ -6,4 +6,5 @@ dependencies: - gensim>=4.0 - flask>=2.0 - numpy>=1.17 - - requests>=2.27 \ No newline at end of file + - packaging>=24.1 + - requests>=2.27 diff --git a/src/main/resources/requirements.txt b/src/main/resources/requirements.txt index 553090182..45d694377 100644 --- a/src/main/resources/requirements.txt +++ b/src/main/resources/requirements.txt @@ -1,4 +1,5 @@ gensim>=4.0 flask>=2.0 numpy>=1.17 -requests>=2.27 # only required for unit tests \ No newline at end of file +packaging>=24.1 +requests>=2.27 # only required for unit tests From 57fbb8a7c123dede22ad5e59d68ff71850433b1d Mon Sep 17 00:00:00 2001 From: Amos Dinh Date: Fri, 30 Aug 2024 12:43:50 +0200 Subject: [PATCH 4/4] fixed check requirements to ignore #comments --- src/main/resources/python_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/resources/python_server.py b/src/main/resources/python_server.py index ada188f40..b6698de84 100644 --- a/src/main/resources/python_server.py +++ b/src/main/resources/python_server.py @@ -58,6 +58,7 @@ def check_requirements() -> str: missing_requirements = [] for requirement in requirements: + requirement = requirement.split("#")[0].strip() req = Requirement(requirement) print(f"Checking {req}") try: