diff --git a/llmware/model_configs.py b/llmware/model_configs.py index f7452c7a..024c2d71 100644 --- a/llmware/model_configs.py +++ b/llmware/model_configs.py @@ -229,13 +229,21 @@ {"model_name": "gpt-3.5-turbo-instruct", "display_name": "GPT-3.5-Instruct", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 4000}, - # new gpt-4 models announced in November 2023 + # gpt-4 model announced in November 2023 {"model_name": "gpt-4-1106-preview", "display_name": "GPT-4-Turbo-1106", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 128000}, + # gpt-3.5 model announced in November 2023 {"model_name": "gpt-3.5-turbo-1106", "display_name": "GPT-3.5-Turbo-1106", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 16385}, - # end - gpt-4 model update + + # gpt-4 model announced in January 2024 + {"model_name": "gpt-4-0125-preview", "display_name": "GPT-4-Turbo-0125", "model_family": "OpenAIGenModel", + "model_category": "generative-api", "model_location": "api", "context_window": 128000}, + + # gpt-3.5 model announced in January 2024 + {"model_name": "gpt-3.5-turbo-0125", "display_name": "GPT-3.5-Turbo-0125", "model_family": "OpenAIGenModel", + "model_category": "generative-api", "model_location": "api", "context_window": 16385}, # generative AIB models - aib-read-gpt - "main model" {"model_name": "aib-read-gpt", "display_name": "AIB-READ-GPT", "model_family": "AIBReadGPTModel", diff --git a/llmware/models.py b/llmware/models.py index 2bb3c9c7..700059fe 100644 --- a/llmware/models.py +++ b/llmware/models.py @@ -2194,7 +2194,8 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer try: - if self.model_name in ["gpt-3.5-turbo","gpt-4","gpt-4-1106-preview","gpt-3.5-turbo-1106"]: + if self.model_name in ["gpt-3.5-turbo","gpt-4","gpt-4-1106-preview","gpt-3.5-turbo-1106", + "gpt-4-0125-preview", "gpt-3.5-turbo-0125"]: messages = self.prompt_engineer_chatgpt3(prompt_enriched, self.add_context, inference_dict) diff --git a/llmware/parsers.py b/llmware/parsers.py index d48e832e..4eb36e91 100644 --- a/llmware/parsers.py +++ b/llmware/parsers.py @@ -327,11 +327,11 @@ def _collator(self, input_folder_path, dupe_check=False): # --inside zip_extract_handler- will update counters zip_work_order = self.zip_extract_handler() - pdf_found += zip_work_order["pdf_found"] - office_found += zip_work_order["office_found"] - text_found += zip_work_order["text_found"] - voice_found += zip_work_order["voice_found"] - ocr_found += zip_work_order["ocr_found"] + pdf_found += zip_work_order["pdf"] + office_found += zip_work_order["office"] + text_found += zip_work_order["text"] + voice_found += zip_work_order["voice"] + ocr_found += zip_work_order["ocr"] work_order = {"pdf": pdf_found, "office": office_found, diff --git a/llmware/retrieval.py b/llmware/retrieval.py index b0a0ed15..cee78d0b 100644 --- a/llmware/retrieval.py +++ b/llmware/retrieval.py @@ -1374,6 +1374,9 @@ def locate_query_match (self, query, core_text): for x in range(0, len(core_text)): match = 0 for key_term in query_tokens: + if len(key_term) == 0: + continue + if key_term.startswith('"'): key_term = key_term[1:-1]