From 76099782c46aa6b33928cd3c41df1fd3b26f99ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?U=C4=9Fur=20=C3=87ekmez?= Date: Thu, 29 Feb 2024 21:02:07 +0300 Subject: [PATCH 1/6] added gpt-4-0125-preview into model catalog --- llmware/model_configs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llmware/model_configs.py b/llmware/model_configs.py index f7452c7a..6119aca3 100644 --- a/llmware/model_configs.py +++ b/llmware/model_configs.py @@ -229,10 +229,14 @@ {"model_name": "gpt-3.5-turbo-instruct", "display_name": "GPT-3.5-Instruct", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 4000}, - # new gpt-4 models announced in November 2023 + # gpt-4 model announced in November 2023 {"model_name": "gpt-4-1106-preview", "display_name": "GPT-4-Turbo-1106", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 128000}, + # gpt-4 model announced in January 2024 + {"model_name": "gpt-4-0125-preview", "display_name": "GPT-4-Turbo-0125", "model_family": "OpenAIGenModel", + "model_category": "generative-api", "model_location": "api", "context_window": 128000}, + {"model_name": "gpt-3.5-turbo-1106", "display_name": "GPT-3.5-Turbo-1106", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 16385}, # end - gpt-4 model update From 94016aaa2a45b4268d40c2bb6186169a4ee68032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?U=C4=9Fur=20=C3=87ekmez?= Date: Thu, 29 Feb 2024 21:02:48 +0300 Subject: [PATCH 2/6] added gpt-4-1125-preview handler into models.py --- llmware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmware/models.py b/llmware/models.py index e1e74d8c..fd4a2162 100644 --- a/llmware/models.py +++ b/llmware/models.py @@ -2194,7 +2194,7 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer try: - if self.model_name in ["gpt-3.5-turbo","gpt-4","gpt-4-1106-preview","gpt-3.5-turbo-1106"]: + if self.model_name in ["gpt-3.5-turbo","gpt-4","gpt-4-1106-preview","gpt-3.5-turbo-1106", "gpt-4-0125-preview"]: messages = self.prompt_engineer_chatgpt3(prompt_enriched, self.add_context, inference_dict) From ec508b7a0c4c94a6760c37c6134696b54d263cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?U=C4=9Fur=20=C3=87ekmez?= Date: Thu, 29 Feb 2024 21:23:45 +0300 Subject: [PATCH 3/6] added gpt-3.5-turbo-0125 into model configs --- llmware/model_configs.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/llmware/model_configs.py b/llmware/model_configs.py index 6119aca3..024c2d71 100644 --- a/llmware/model_configs.py +++ b/llmware/model_configs.py @@ -233,13 +233,17 @@ {"model_name": "gpt-4-1106-preview", "display_name": "GPT-4-Turbo-1106", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 128000}, + # gpt-3.5 model announced in November 2023 + {"model_name": "gpt-3.5-turbo-1106", "display_name": "GPT-3.5-Turbo-1106", "model_family": "OpenAIGenModel", + "model_category": "generative-api", "model_location": "api", "context_window": 16385}, + # gpt-4 model announced in January 2024 {"model_name": "gpt-4-0125-preview", "display_name": "GPT-4-Turbo-0125", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 128000}, - - {"model_name": "gpt-3.5-turbo-1106", "display_name": "GPT-3.5-Turbo-1106", "model_family": "OpenAIGenModel", + + # gpt-3.5 model announced in January 2024 + {"model_name": "gpt-3.5-turbo-0125", "display_name": "GPT-3.5-Turbo-0125", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 16385}, - # end - gpt-4 model update # generative AIB models - aib-read-gpt - "main model" {"model_name": "aib-read-gpt", "display_name": "AIB-READ-GPT", "model_family": "AIBReadGPTModel", From 1b511df8baabdf125796ae0b0d0acbcd786de81b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?U=C4=9Fur=20=C3=87ekmez?= Date: Thu, 29 Feb 2024 21:24:50 +0300 Subject: [PATCH 4/6] added gpt-3.5-turbo-0125 in models --- llmware/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llmware/models.py b/llmware/models.py index fd4a2162..39608690 100644 --- a/llmware/models.py +++ b/llmware/models.py @@ -2194,7 +2194,8 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer try: - if self.model_name in ["gpt-3.5-turbo","gpt-4","gpt-4-1106-preview","gpt-3.5-turbo-1106", "gpt-4-0125-preview"]: + if self.model_name in ["gpt-3.5-turbo","gpt-4","gpt-4-1106-preview","gpt-3.5-turbo-1106", + "gpt-4-0125-preview", "gpt-3.5-turbo-0125"]: messages = self.prompt_engineer_chatgpt3(prompt_enriched, self.add_context, inference_dict) From 729f36bcf00fc8e4fb57b4c16a54f923c082124d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?U=C4=9Fur=20=C3=87ekmez?= Date: Fri, 1 Mar 2024 18:21:17 +0300 Subject: [PATCH 5/6] zip parser bugfix when extracting a zip file, zip_extract_handler function's return value and the lines processing it do not match in terms of attribute names, thus giving the following error File "/opt/homebrew/Caskroom/miniforge/base/lib/python3.10/site-packages/llmware/parsers.py", line 330, in _collator pdf_found += zip_work_order["pdf_found"] KeyError: 'pdf_found' --- llmware/parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llmware/parsers.py b/llmware/parsers.py index d48e832e..4eb36e91 100644 --- a/llmware/parsers.py +++ b/llmware/parsers.py @@ -327,11 +327,11 @@ def _collator(self, input_folder_path, dupe_check=False): # --inside zip_extract_handler- will update counters zip_work_order = self.zip_extract_handler() - pdf_found += zip_work_order["pdf_found"] - office_found += zip_work_order["office_found"] - text_found += zip_work_order["text_found"] - voice_found += zip_work_order["voice_found"] - ocr_found += zip_work_order["ocr_found"] + pdf_found += zip_work_order["pdf"] + office_found += zip_work_order["office"] + text_found += zip_work_order["text"] + voice_found += zip_work_order["voice"] + ocr_found += zip_work_order["ocr"] work_order = {"pdf": pdf_found, "office": office_found, From 4f6a7b5402b5a1bd3751292d0bcba79340da0e23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?U=C4=9Fur=20=C3=87ekmez?= Date: Fri, 1 Mar 2024 18:57:53 +0300 Subject: [PATCH 6/6] fix on retrieval when key_term length is zero The following error is solved: if core_text[x].lower() == key_term[0].lower(): IndexError: string index out of range --- llmware/retrieval.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llmware/retrieval.py b/llmware/retrieval.py index b0a0ed15..cee78d0b 100644 --- a/llmware/retrieval.py +++ b/llmware/retrieval.py @@ -1374,6 +1374,9 @@ def locate_query_match (self, query, core_text): for x in range(0, len(core_text)): match = 0 for key_term in query_tokens: + if len(key_term) == 0: + continue + if key_term.startswith('"'): key_term = key_term[1:-1]