From efee6d7cb0d4f6c45006d1299a962629c99ccfbc Mon Sep 17 00:00:00 2001 From: Dhruv Kadam <136492453+DhruvKadam-git@users.noreply.github.com> Date: Tue, 29 Oct 2024 13:39:50 +0530 Subject: [PATCH 1/2] Update chat_models_gguf_fast_start.py 1. Error Handling: Add error handling for model loading and rompting to ensure the program doesn't crash unexpectedly. 2. Logging: Instead of using print statements, consider using the logging module for better control over logging levels and outputs. 3. Type Annotations: Add type annotations to the function parameters for better clarity and type checking. 4. Docstrings: Include docstrings for the functions to describe their purpose and parameters. --- .../Models/chat_models_gguf_fast_start.py | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/examples/Models/chat_models_gguf_fast_start.py b/examples/Models/chat_models_gguf_fast_start.py index bdd56ee3..00f91b6e 100644 --- a/examples/Models/chat_models_gguf_fast_start.py +++ b/examples/Models/chat_models_gguf_fast_start.py @@ -1,31 +1,47 @@ - """This example demonstrates several leading open source chat models running in 4-bit GGUF on local laptop.""" import time import re from llmware.prompts import Prompt +import logging # Run the benchmark test -def run_test(model_name, prompt_list): - - print(f"\n > Loading model '{model_name}'") - - prompter = Prompt().load_model(model_name) +def run_test(model_name: str, prompt_list: list[dict]) -> int: + """Run the benchmark test on the specified model with the given prompts. + + Args: + model_name (str): The name of the model to load. + prompt_list (list[dict]): A list of prompts to test the model with. + + Returns: + int: Status code (0 for success). + """ + logging.basicConfig(level=logging.INFO) + + logging.info(f"Loading model '{model_name}'") + + try: + prompter = Prompt().load_model(model_name) + except Exception as e: + logging.error(f"Failed to load model: {e}") + return 1 for i, entry in enumerate(prompt_list): - start_time = time.time() - print("\n") - print(f"query - {i+1} - {entry['query']}") + logging.info(f"query - {i+1} - {entry['query']}") - response = prompter.prompt_main(entry["query"]) + try: + response = prompter.prompt_main(entry["query"]) + except Exception as e: + logging.error(f"Error during prompting: {e}") + continue # Print results time_taken = round(time.time() - start_time, 2) llm_response = re.sub("[\n\n]", "\n", response['llm_response']) - print(f"llm_response - {i+1} - {llm_response}") - print(f"time_taken - {i+1} - {time_taken}") + logging.info(f"llm_response - {i+1} - {llm_response}") + logging.info(f"time_taken - {i+1} - {time_taken}") return 0 From a2edd18ec1ea99c4c02325d5b2885ed4ce88cf21 Mon Sep 17 00:00:00 2001 From: Dhruv Kadam <136492453+DhruvKadam-git@users.noreply.github.com> Date: Tue, 29 Oct 2024 13:59:36 +0530 Subject: [PATCH 2/2] Update graph.py 1. Error Handling: Added try-except blocks to handle potential errors when loading stop words, reading files, and writing JSON. 2. Logging: Used the logging module to log errors instead of printing them, providing better control over log levels and outputs. 3. Type Annotations: Added type annotations to the bow_locator, build_graph, and get_unique_vocab_lookup methods for clarity. 4. Docstrings: Ensured that all methods have clear docstrings explaining their purpose and return types. --- llmware/graph.py | 68 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/llmware/graph.py b/llmware/graph.py index 79e0dc6a..f1688197 100644 --- a/llmware/graph.py +++ b/llmware/graph.py @@ -1,4 +1,3 @@ - # Copyright 2023-2024 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -65,14 +64,22 @@ def __init__(self, library): self.pre_initialization_bow_data = {} self.post_initialization_bow_data = {} - # create stop words txt file in nlp path - self.stop_words = Utilities().load_stop_words_list(self.library.nlp_path) + # Load stop words with error handling + try: + self.stop_words = Utilities().load_stop_words_list(self.library.nlp_path) + except Exception as e: + logger.error(f"Failed to load stop words: {e}") + self.stop_words = [] - # load graph c modules - note: if any issues loading module, will be captured in get_module_graph_functions() - self._mod_utility = Utilities().get_module_graph_functions() + # Load graph C modules with error handling + try: + self._mod_utility = Utilities().get_module_graph_functions() + except Exception as e: + logger.error(f"Failed to load graph utility module: {e}") + self._mod_utility = None # new method - used to track 'counter' inside the bow files for incremental read/write/analysis - def bow_locator(self): + def bow_locator(self) -> tuple: """ Internal utility method used to enable scalability across multiple underlying BOW (Bag-of-Word) files which are created by the graph module. """ @@ -103,16 +110,26 @@ def bow_locator(self): f"{top_bow_file}") bow_index = 0 - fp = open(os.path.join(dataset_fp, top_bow_file), "r", encoding='utf-8') - fp.seek(0, 2) - bow_byte_index = fp.tell() - fp.seek(0, 0) # rewind - bow_tokens = len(fp.read().split(",")) - fp.close() + try: + fp = open(os.path.join(dataset_fp, top_bow_file), "r", encoding='utf-8') + fp.seek(0, 2) + bow_byte_index = fp.tell() + fp.seek(0, 0) # rewind + bow_tokens = len(fp.read().split(",")) + fp.close() + except FileNotFoundError: + logger.error(f"BOW file not found: {top_bow_file}") + return 0, 0, 0, [], True + except Exception as e: + logger.error(f"Error reading BOW file: {e}") + return 0, 0, 0, [], True + finally: + if 'fp' in locals(): + fp.close() return bow_index, bow_byte_index, bow_tokens, bow_files, no_bow - def build_graph(self): + def build_graph(self) -> dict: """ Generates multiple valuable nlp artifacts in the library's /nlp folder path, with the primary objective of generating the co-occurrence matrix. """ @@ -186,9 +203,11 @@ def build_graph(self): graph_summary.update({"time_stamp": ts}) # write to manifest.json for knowledge graph - json_dict = json.dumps(graph_summary,indent=2) - with open(os.path.join(self.library.nlp_path,"manifest.json"),"w", encoding='utf-8') as outfile: - outfile.write(json_dict) + try: + with open(os.path.join(self.library.nlp_path,"manifest.json"), "w", encoding='utf-8') as outfile: + outfile.write(json.dumps(graph_summary, indent=2)) + except Exception as e: + logger.error(f"Failed to write manifest.json: {e}") return graph_summary @@ -833,16 +852,25 @@ def get_unique_vocab_len(self): return len(self.get_unique_vocab_lookup()) - def get_unique_vocab_lookup(self): + def get_unique_vocab_lookup(self) -> dict: """ Returns the unique vocab list found in the Library corpus. """ if self.library.get_knowledge_graph_status() != "yes": self.build_graph() - j = json.load(open(os.path.join(self.library.nlp_path,"vocab_lookup.json"), "r", encoding='utf-8')) - - return j + try: + with open(os.path.join(self.library.nlp_path, "vocab_lookup.json"), "r", encoding='utf-8') as file: + return json.load(file) + except FileNotFoundError: + logger.error("vocab_lookup.json file not found.") + return {} + except json.JSONDecodeError: + logger.error("Error decoding JSON from vocab_lookup.json.") + return {} + except Exception as e: + logger.error(f"Unexpected error: {e}") + return {} def get_unique_vocab_reverse_lookup(self):