Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ private Gensim() {
*/
public void trainVectorSpaceModel(String modelPath, String trainingFilePath) {
HttpGet request = new HttpGet(serverUrl + "/train-vector-space-model");
request.addHeader("input_file_path", getCanonicalPath(trainingFilePath));
request.addHeader("model_path", modelPath);
request.addHeader("input-file-path", getCanonicalPath(trainingFilePath));
request.addHeader("model-path", modelPath);

try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
Expand All @@ -135,9 +135,9 @@ public void trainVectorSpaceModel(String modelPath, String trainingFilePath) {
*/
public double queryVectorSpaceModel(String modelPath, String documentIdOne, String documentIdTwo) throws Exception {
HttpGet request = new HttpGet(serverUrl + "/query-vector-space-model");
request.addHeader("model_path", modelPath);
request.addHeader("document_id_one", documentIdOne);
request.addHeader("document_id_two", documentIdTwo);
request.addHeader("model-path", modelPath);
request.addHeader("document-id-one", documentIdOne);
request.addHeader("document-id-two", documentIdTwo);

try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
Expand Down Expand Up @@ -168,24 +168,24 @@ public double queryVectorSpaceModel(String modelPath, String documentIdOne, Stri
public boolean trainWord2VecModel(String modelOrVectorPath, String trainingFilePath, Word2VecConfiguration configuration) {
HttpGet request = new HttpGet(serverUrl + "/train-word2vec");
if (modelOrVectorPath.endsWith(".kv")) {
request.addHeader("vector_path", modelOrVectorPath);
request.addHeader("model_path", modelOrVectorPath.substring(0, modelOrVectorPath.length() - 3));
request.addHeader("vector-path", modelOrVectorPath);
request.addHeader("model-path", modelOrVectorPath.substring(0, modelOrVectorPath.length() - 3));
} else {
request.addHeader("model_path", modelOrVectorPath);
request.addHeader("vector_path", modelOrVectorPath + ".kv");
request.addHeader("model-path", modelOrVectorPath);
request.addHeader("vector-path", modelOrVectorPath + ".kv");
}

request.addHeader("file_path", getCanonicalPath(trainingFilePath));
request.addHeader("vector_dimension", "" + configuration.getVectorDimension());
request.addHeader("number_of_threads", "" + configuration.getNumberOfThreads());
request.addHeader("window_size", "" + configuration.getWindowSize());
request.addHeader("file-path", getCanonicalPath(trainingFilePath));
request.addHeader("vector-dimension", "" + configuration.getVectorDimension());
request.addHeader("number-of-threads", "" + configuration.getNumberOfThreads());
request.addHeader("window-size", "" + configuration.getWindowSize());
request.addHeader("iterations", "" + configuration.getIterations());
request.addHeader("negatives", "" + configuration.getNegatives());
request.addHeader("cbow_or_sg", configuration.getType().toString());
request.addHeader("min_count", "" + configuration.getMinCount());
request.addHeader("cbow-or-sg", configuration.getType().toString());
request.addHeader("min-count", "" + configuration.getMinCount());
request.addHeader("sample", "" + configuration.getSample());
request.addHeader("epochs", "" + configuration.getEpochs());
request.addHeader("hierarchical_softmax", "" + configuration.isUseHierarchicalSoftmax());
request.addHeader("hierarchical-softmax", "" + configuration.isUseHierarchicalSoftmax());

try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
Expand Down Expand Up @@ -224,8 +224,8 @@ public double getSimilarity(String concept1, String concept2, String modelOrVect
}
} else {
HttpGet request = new HttpGet(serverUrl + "/get-similarity");
request.addHeader("concept_1", concept1);
request.addHeader("concept_2", concept2);
request.addHeader("concept-1", concept1);
request.addHeader("concept-2", concept2);
addModelToRequest(request, modelOrVectorPath);
try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
Expand Down Expand Up @@ -481,8 +481,8 @@ public int getVocabularySize(String modelOrVectorPath) {
*/
private void addModelToRequest(HttpGet request, String modelOrVectorPath) {
if (modelOrVectorPath.endsWith(".kv")) {
request.addHeader("vector_path", getCanonicalPath(modelOrVectorPath));
} else request.addHeader("model_path", getCanonicalPath(modelOrVectorPath));
request.addHeader("vector-path", getCanonicalPath(modelOrVectorPath));
} else request.addHeader("model-path", getCanonicalPath(modelOrVectorPath));
}

/**
Expand Down Expand Up @@ -573,7 +573,7 @@ public boolean checkRequirements() {
LOGGER.error("Could not find requirements file.");
return false;
}
request.addHeader("requirements_file", requirementsFile.getAbsolutePath());
request.addHeader("requirements-file", requirementsFile.getAbsolutePath());
try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
String resultMessage = EntityUtils.toString(entity);
Expand Down Expand Up @@ -822,8 +822,8 @@ public void writeModelAsTextFile(String modelOrVectorPath, String fileToWrite) {
*/
public void convertW2vToKv(String w2vPath, String fileToWrite){
HttpGet request = new HttpGet(serverUrl + "/w2v-to-kv");
request.addHeader("w2v_path", w2vPath);
request.addHeader("new_file", fileToWrite);
request.addHeader("w2v-path", w2vPath);
request.addHeader("new-file", fileToWrite);
try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
if (entity == null) {
Expand Down Expand Up @@ -852,9 +852,9 @@ public void writeModelAsTextFile(String modelOrVectorPath, String fileToWrite, S
HttpGet request = new HttpGet(serverUrl + "/write-model-as-text-file");
addModelToRequest(request, modelOrVectorPath);
if (entityFile != null) {
request.addHeader("entity_file", entityFile);
request.addHeader("entity-file", entityFile);
}
request.addHeader("file_to_write", fileToWrite);
request.addHeader("file-to-write", fileToWrite);
try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
if (entity == null) {
Expand Down Expand Up @@ -959,4 +959,4 @@ public static void setPort(int port) {
public static String getServerUrl() {
return serverUrl;
}
}
}
3 changes: 2 additions & 1 deletion src/main/resources/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ dependencies:
- gensim>=4.0
- flask>=2.0
- numpy>=1.17
- requests>=2.27
- packaging>=24.1
- requests>=2.27
99 changes: 53 additions & 46 deletions src/main/resources/python_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import os
import sys
import gzip
import pkg_resources
from pkg_resources import DistributionNotFound
from packaging.requirements import Requirement
from importlib.metadata import version, PackageNotFoundError
import pathlib


Expand Down Expand Up @@ -49,32 +49,39 @@ def check_requirements() -> str:
str
A message listing installed and potentially missing requirements.
"""
requirements_file = request.headers.get("requirements_file")
requirements_file = request.headers.get("requirements-file")
logging.info(f"received requirements file path: {requirements_file}")

with pathlib.Path(requirements_file).open() as requirements_txt:
requirements = pkg_resources.parse_requirements(requirements_txt)
requirements = requirements_txt.read().splitlines()
ok_requirements = []
missing_requirements = []

for requirement in requirements:
requirement = str(requirement)
print(f"Checking {requirement}")
requirement = requirement.split("#")[0].strip()
req = Requirement(requirement)
print(f"Checking {req}")
try:
pkg_resources.require(requirement)
ok_requirements.append(requirement)
except Exception as error:
missing = str(error)
missing_requirements.append(requirement)
installed_version = version(req.name)
if req.specifier.contains(installed_version):
ok_requirements.append(str(req))
else:
missing_requirements.append(f"{req} (installed: {installed_version})")
except PackageNotFoundError:
missing_requirements.append(str(req))

message = "Dependency Check"
if len(ok_requirements) > 0:
if ok_requirements:
message += "\nInstalled Requirements:"
for r in ok_requirements:
message += "\n\t" + r
if len(missing_requirements) > 0:
message += "\nMissing Requirements:"
if missing_requirements:
message += "\nMissing or Incompatible Requirements:"
for r in missing_requirements:
message += "\n\t" + r
else:
message += "\n=> Everything is installed. You are good to go!"

print(message)
logging.info(message)
return message
Expand Down Expand Up @@ -153,8 +160,8 @@ def w2v_to_kv() -> str:
from gensim.models import KeyedVectors

try:
w2v_path = request.headers.get("w2v_path")
new_file = request.headers.get("new_file")
w2v_path = request.headers.get("w2v-path")
new_file = request.headers.get("new-file")
result = KeyedVectors.load_word2vec_format(w2v_path, unicode_errors="ignore")
result.save(new_file)
active_models[os.path.realpath(new_file)] = result
Expand All @@ -176,21 +183,21 @@ def train_word_2_vec() -> str:
'True' as string if operation was successful, else 'False' (as string).
"""
try:
model_path = request.headers.get("model_path") # where the model will be stored
model_path = request.headers.get("model-path") # where the model will be stored
vector_path = request.headers.get(
"vector_path"
"vector-path"
) # where the vector file will be stored
file_path = request.headers.get("file_path")
vector_dimension = request.headers.get("vector_dimension")
number_of_threads = request.headers.get("number_of_threads")
window_size = request.headers.get("window_size")
file_path = request.headers.get("file-path")
vector_dimension = request.headers.get("vector-dimension")
number_of_threads = request.headers.get("number-of-threads")
window_size = request.headers.get("window-size")
iterations = request.headers.get("iterations")
negatives = request.headers.get("negatives")
cbow_or_sg = request.headers.get("cbow_or_sg")
min_count = request.headers.get("min_count")
cbow_or_sg = request.headers.get("cbow-or-sg")
min_count = request.headers.get("min-count")
sample = request.headers.get("sample")
epochs = request.headers.get("epochs")
hs_string: str = request.headers.get("hierarchical_softmax")
hs_string: str = request.headers.get("hierarchical-softmax")

hs = 1 if hs_string == "true" else 0

Expand Down Expand Up @@ -259,16 +266,16 @@ def is_in_vocabulary():
True if concept in model vocabulary, else False.
"""
concept = request.headers.get("concept")
model_path = request.headers.get("model_path")
vector_path = request.headers.get("vector_path")
model_path = request.headers.get("model-path")
vector_path = request.headers.get("vector-path")
vectors = get_vectors(model_path, vector_path)
return str(concept in vectors.key_to_index)


@app.route("/get-vocabulary-size", methods=["GET"])
def get_vocab_size():
model_path = request.headers.get("model_path")
vector_path = request.headers.get("vector_path")
model_path = request.headers.get("model-path")
vector_path = request.headers.get("vector-path")
vectors = get_vectors(model_path, vector_path)
return str(len(vectors.key_to_index))

Expand Down Expand Up @@ -303,10 +310,10 @@ def get_vectors(model_path, vector_path):
@app.route("/get-similarity", methods=["GET"])
def get_similarity_given_model():

concept_1 = request.headers.get("concept_1")
concept_2 = request.headers.get("concept_2")
model_path = request.headers.get("model_path")
vector_path = request.headers.get("vector_path")
concept_1 = request.headers.get("concept-1")
concept_2 = request.headers.get("concept-2")
model_path = request.headers.get("model-path")
vector_path = request.headers.get("vector-path")
vectors = get_vectors(model_path=model_path, vector_path=vector_path)

if vectors is None:
Expand Down Expand Up @@ -338,8 +345,8 @@ def get_similarity_given_model():

@app.route("/get-vocabulary-terms", methods=["GET"])
def get_vocabulary_terms():
model_path = request.headers.get("model_path")
vector_path = request.headers.get("vector_path")
model_path = request.headers.get("model-path")
vector_path = request.headers.get("vector-path")
vectors = get_vectors(model_path, vector_path)
result = ""
for word in vectors.key_to_index:
Expand All @@ -350,8 +357,8 @@ def get_vocabulary_terms():
@app.route("/get-vector", methods=["GET"])
def get_vector_given_model():
concept = request.headers.get("concept")
model_path = request.headers.get("model_path")
vector_path = request.headers.get("vector_path")
model_path = request.headers.get("model-path")
vector_path = request.headers.get("vector-path")
vectors = get_vectors(model_path=model_path, vector_path=vector_path)

if vectors is None:
Expand Down Expand Up @@ -381,8 +388,8 @@ def get_vector_given_model():

@app.route("/train-vector-space-model", methods=["GET"])
def train_vector_space_model():
input_file_path = request.headers.get("input_file_path")
model_path = request.headers.get("model_path")
input_file_path = request.headers.get("input-file-path")
model_path = request.headers.get("model-path")

dictionary = __createDictionary(input_file_path)
corpus = CsvCorpus(dictionary, input_file_path)
Expand All @@ -401,9 +408,9 @@ def train_vector_space_model():
@app.route("/query-vector-space-model", methods=["GET"])
def query_vector_space_model():
try:
model_path = request.headers.get("model_path")
document_id_one = request.headers.get("document_id_one")
document_id_two = request.headers.get("document_id_two") # can be None
model_path = request.headers.get("model-path")
document_id_one = request.headers.get("document-id-one")
document_id_two = request.headers.get("document-id-two") # can be None

model = active_models.get(model_path)
if model is None:
Expand Down Expand Up @@ -675,10 +682,10 @@ def write_vectors_as_text_file():
boolean
'True' as string if operation was successful, else 'False' (as string).
"""
model_path = request.headers.get("model_path")
vector_path = request.headers.get("vector_path")
file_to_write = request.headers.get("file_to_write")
entity_file = request.headers.get("entity_file")
model_path = request.headers.get("model-path")
vector_path = request.headers.get("vector-path")
file_to_write = request.headers.get("file-to-write")
entity_file = request.headers.get("entity-file")
vectors = get_vectors(model_path=model_path, vector_path=vector_path)
print("Writing the vectors as text file.")
with open(file_to_write, "w+") as f:
Expand Down
3 changes: 2 additions & 1 deletion src/main/resources/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
gensim>=4.0
flask>=2.0
numpy>=1.17
requests>=2.27 # only required for unit tests
packaging>=24.1
requests>=2.27 # only required for unit tests
Loading