Skip to content

Commit

Permalink
Merge pull request #89 from GeorgeKontsevik/feat/tests
Browse files Browse the repository at this point in the history
Feat/tests
  • Loading branch information
Sandrro authored Aug 29, 2024
2 parents 5357e2b + 6616065 commit 8019e8c
Show file tree
Hide file tree
Showing 9 changed files with 90 additions and 167 deletions.
8 changes: 4 additions & 4 deletions sloyka/src/risks/emotion_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class EmotionRecognizer:
- text_column: The name of the column containing the text to be analyzed.
"""

def __init__(self, model_name=HuggingFaceModel.Text.Bert_Large, device='cpu'):
def __init__(self, model_name=HuggingFaceModel.Text.Bert_Tiny, device='cpu'):
self.device = device
self.model_name = model_name

Expand All @@ -46,11 +46,11 @@ def __init__(self, model_name=HuggingFaceModel.Text.Bert_Large, device='cpu'):
HuggingFaceModel.Text.Bert_Tiny2,
]

self.recognizer = None

def init_base_recognizer(self):
self.recognizer = TextRecognizer(model=self.model_name, device=self.device)

#def init_base_recognizer(self):
# self.recognizer = TextRecognizer(model=self.model_name, device=self.device)


def recognize_emotion(self, text):
"""
Expand Down
2 changes: 1 addition & 1 deletion sloyka/src/semantic_graph/keyword_extracter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def extract_keywords(
texts_to_add = []

for j, text in zip(ids_text_to_extract, texts_to_extract):
extraction = self.model.extract_keywords(text, top_n=top_n, stop_words=RUS_STOPWORDS)
extraction = KeyBERT().extract_keywords(docs=text, top_n=top_n, stop_words=RUS_STOPWORDS)
if extraction:
score = extraction[0][1]
if score > semantic_key_filter:
Expand Down
22 changes: 0 additions & 22 deletions tests/test_area_matcher.py

This file was deleted.

58 changes: 21 additions & 37 deletions tests/test_classifiers.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,24 @@
# import pytest
# import torch
# import pandas as pd
# from sloyka import TextClassifiers
import pytest
import torch
import pandas as pd
from sloyka import TextClassifiers

# path_to_file = "sloyka/sample_data/raw/Адмиралтейский.csv"
@pytest.fixture
def sample_dataframe():
s_data = {'Текст комментария': {203: 'На Чайковского 63 тоже идет кап.ремонт. В квартире у пенсионеров побили стекла. Куда им обратиться?',
204: 'Вся улица Жуковского и Восстания заклеена рекламой! Почему не действует полиция и администрация с ЖСК-1 ?'},
'message_id': {203: 195, 204: 196}}
return pd.DataFrame(s_data)

# @pytest.fixture
# def test_data():
# df_predict = pd.read_csv(path_to_file, sep=";")
# df_predict.rename(columns={"Текст комментария": "Текст"}, inplace=True)
# df_predict = df_predict.dropna(subset=["Текст"])
# df_predict = df_predict.head(3)
# return df_predict
@pytest.fixture
def model():
return TextClassifiers(
repository_id="Sandrro/text_to_function_v2",
number_of_categories=1,
device_type=torch.device("cpu"),
)

# @pytest.fixture
# def model():
# return TextClassifiers(
# repository_id="Sandrro/text_to_subfunction_v10",
# number_of_categories=1,
# device_type=torch.device("cpu"),
# )

# def test_cats_probs(model, test_data):
# expected_df = pd.DataFrame(
# {
# "cats": [
# "Вопросы граждан о проектах/планах/сроках/ходе проведения работ по благоустройству",
# "Не ЦУР",
# "Вопросы по оплате проезда в общественном транспорте",
# ],
# "probs": ["1.0", "0.999", "0.98"],
# }
# )

# test_data[["cats", "probs"]] = pd.DataFrame(
# test_data["Текст"].progress_map(lambda x: model.run_text_classifier_topics(x)).to_list()
# )
# assert test_data["cats"].equals(expected_df["cats"])
# assert test_data["probs"].equals(expected_df["probs"])
def test_cats_probs(model, sample_dataframe):
sample_dataframe[["cats", "probs"]] = sample_dataframe["Текст комментария"].progress_map(lambda x: model.run_text_classifier(x)).to_list()
print(sample_dataframe)
assert sample_dataframe.iloc[0]["cats"] == "ЖКХ"
20 changes: 20 additions & 0 deletions tests/test_emotion_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
import torch
import pandas as pd
from sloyka import EmotionRecognizer

@pytest.fixture
def sample_dataframe():
s_data = {'Текст комментария': {203: 'На Чайковского 63 тоже идет кап.ремонт. В квартире у пенсионеров побили стекла. Куда им обратиться?',
204: 'Вся улица Жуковского и Восстания заклеена рекламой! Почему не действует полиция и администрация с ЖСК-1 ?'},
'message_id': {203: 195, 204: 196}}
return pd.DataFrame(s_data)

@pytest.fixture
def model():
return EmotionRecognizer()

def test_emotion_recognizer(model, sample_dataframe):
sample_dataframe["emotion"] = sample_dataframe["Текст комментария"].progress_map(lambda x: model.recognize_emotion(x))
print(sample_dataframe)
assert sample_dataframe.iloc[0]["emotion"] == "neutral"
31 changes: 0 additions & 31 deletions tests/test_events_modelling.py

This file was deleted.

29 changes: 0 additions & 29 deletions tests/test_geocoder_matcher.py

This file was deleted.

68 changes: 25 additions & 43 deletions tests/test_semantic_graph.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,26 @@
# import pandas as pd

# from sloyka import Semgraph


# sm = Semgraph()
# test_df = pd.read_feather("sloyka/sample_data/processed/df_strts.feather")[:20]
# text_column='Текст комментария'
# toponim_column='only_full_street_name'
# toponim_name_column='initial_street'
# toponim_type_column='Toponims'

# def test_extract_keywords():
# result = sm.extract_keywords(test_df,
# text_column,
# toponim_column,
# toponim_name_column,
# toponim_type_column,
# semantic_key_filter=0.6,
# top_n=5)

# assert len(result) == 6

# def test_get_semantic_closeness():
# df = pd.DataFrame([['TOPONIM_1', 'роза'], ['TOPONIM_2', 'куст']], columns=['toponims', 'words'])
# result = sm.get_semantic_closeness(df,
# column='words',
# similaryty_filter=0.5)

# check = round(float(result['SIMILARITY_SCORE'].iloc[0]), 3)

# assert check == round(0.655513, 3)

# def test_build_semantic_graph():
# result = sm.build_semantic_graph(test_df,
# text_column,
# toponim_column,
# toponim_name_column,
# toponim_type_column,
# key_score_filter=0.4,
# semantic_score_filter=0.6,
# top_n=5)
import geopandas as gpd
import pytest
from sloyka import Semgraph

@pytest.fixture
def sample_data():
gdf = gpd.read_parquet("sloyka\sample_data\sample_data_geocoded_emotioned.parquet")
gdf['type'] = 'post'
return gdf


def test_build_semantic_graph(sample_data):
sm = Semgraph()
G = sm.build_graph(sample_data,
id_column='message_id',
text_column='Текст комментария',
text_type_column="type",
toponym_column='full_street_name',
toponym_name_column='only_full_street_name',
toponym_type_column='Toponyms',
post_id_column="message_id",
parents_stack_column="message_id",
location_column='Location',
geometry_column='geometry')

# assert len(result.edges) == 216
assert len(G.edges) == 88
19 changes: 19 additions & 0 deletions tests/test_services_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest
import pandas as pd
from sloyka import City_services

@pytest.fixture
def sample_dataframe():
s_data = {'Текст комментария': {203: 'Когда уже на Юго западе будет метро? Весь день в пути проводим!',
204: 'Вся улица Жуковского и Восстания заклеена рекламой! Почему не действует полиция и администрация с ЖСК-1 ?'},
'message_id': {203: 195, 204: 196}}
return pd.DataFrame(s_data)

@pytest.fixture
def model():
return City_services()

def test_services(model, sample_dataframe):
result = model.run(sample_dataframe, "Текст комментария")
print(result)
assert result.iloc[0]["City_services"][0] == "Метро"

0 comments on commit 8019e8c

Please sign in to comment.