From efee6d7cb0d4f6c45006d1299a962629c99ccfbc Mon Sep 17 00:00:00 2001
From: Dhruv Kadam <136492453+DhruvKadam-git@users.noreply.github.com>
Date: Tue, 29 Oct 2024 13:39:50 +0530
Subject: [PATCH 1/2] Update chat_models_gguf_fast_start.py

1. Error Handling: Add error handling for model loading and rompting to ensure the program doesn't crash unexpectedly.

2. Logging: Instead of using print statements, consider using the logging module for better control over logging levels and outputs.

3. Type Annotations: Add type annotations to the function parameters for better clarity and type checking.

4. Docstrings: Include docstrings for the functions to describe their purpose and parameters.
---
 .../Models/chat_models_gguf_fast_start.py     | 40 +++++++++++++------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/examples/Models/chat_models_gguf_fast_start.py b/examples/Models/chat_models_gguf_fast_start.py
index bdd56ee3..00f91b6e 100644
--- a/examples/Models/chat_models_gguf_fast_start.py
+++ b/examples/Models/chat_models_gguf_fast_start.py
@@ -1,31 +1,47 @@
-
 """This example demonstrates several leading open source chat models running in 4-bit GGUF on local laptop."""
 
 import time
 import re
 from llmware.prompts import Prompt
+import logging
 
 
 # Run the benchmark test
-def run_test(model_name, prompt_list):
-
-    print(f"\n > Loading model '{model_name}'")
-
-    prompter = Prompt().load_model(model_name)
+def run_test(model_name: str, prompt_list: list[dict]) -> int:
+    """Run the benchmark test on the specified model with the given prompts.
+
+    Args:
+        model_name (str): The name of the model to load.
+        prompt_list (list[dict]): A list of prompts to test the model with.
+
+    Returns:
+        int: Status code (0 for success).
+    """
+    logging.basicConfig(level=logging.INFO)
+
+    logging.info(f"Loading model '{model_name}'")
+    
+    try:
+        prompter = Prompt().load_model(model_name)
+    except Exception as e:
+        logging.error(f"Failed to load model: {e}")
+        return 1
 
     for i, entry in enumerate(prompt_list):
-
         start_time = time.time()
-        print("\n")
-        print(f"query - {i+1} - {entry['query']}")
+        logging.info(f"query - {i+1} - {entry['query']}")
 
-        response = prompter.prompt_main(entry["query"])
+        try:
+            response = prompter.prompt_main(entry["query"])
+        except Exception as e:
+            logging.error(f"Error during prompting: {e}")
+            continue
 
         # Print results
         time_taken = round(time.time() - start_time, 2)
         llm_response = re.sub("[\n\n]", "\n", response['llm_response'])
-        print(f"llm_response - {i+1} - {llm_response}")
-        print(f"time_taken - {i+1} - {time_taken}")
+        logging.info(f"llm_response - {i+1} - {llm_response}")
+        logging.info(f"time_taken - {i+1} - {time_taken}")
 
     return 0
 

From a2edd18ec1ea99c4c02325d5b2885ed4ce88cf21 Mon Sep 17 00:00:00 2001
From: Dhruv Kadam <136492453+DhruvKadam-git@users.noreply.github.com>
Date: Tue, 29 Oct 2024 13:59:36 +0530
Subject: [PATCH 2/2] Update graph.py

1. Error Handling: Added try-except blocks to handle potential errors when loading stop words, reading files, and writing JSON.

2. Logging: Used the logging module to log errors instead of printing them, providing better control over log levels and outputs.

3. Type Annotations: Added type annotations to the bow_locator, build_graph, and get_unique_vocab_lookup methods for clarity.

4. Docstrings: Ensured that all methods have clear docstrings explaining their purpose and return types.
---
 llmware/graph.py | 68 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/llmware/graph.py b/llmware/graph.py
index 79e0dc6a..f1688197 100644
--- a/llmware/graph.py
+++ b/llmware/graph.py
@@ -1,4 +1,3 @@
-
 # Copyright 2023-2024 llmware
 
 # Licensed under the Apache License, Version 2.0 (the "License"); you
@@ -65,14 +64,22 @@ def __init__(self, library):
         self.pre_initialization_bow_data = {}
         self.post_initialization_bow_data = {}
 
-        # create stop words txt file in nlp path
-        self.stop_words = Utilities().load_stop_words_list(self.library.nlp_path)
+        # Load stop words with error handling
+        try:
+            self.stop_words = Utilities().load_stop_words_list(self.library.nlp_path)
+        except Exception as e:
+            logger.error(f"Failed to load stop words: {e}")
+            self.stop_words = []
 
-        # load graph c modules - note: if any issues loading module, will be captured in get_module_graph_functions()
-        self._mod_utility = Utilities().get_module_graph_functions()
+        # Load graph C modules with error handling
+        try:
+            self._mod_utility = Utilities().get_module_graph_functions()
+        except Exception as e:
+            logger.error(f"Failed to load graph utility module: {e}")
+            self._mod_utility = None
 
     # new method - used to track 'counter' inside the bow files for incremental read/write/analysis
-    def bow_locator(self):
+    def bow_locator(self) -> tuple:
 
         """ Internal utility method used to enable scalability across multiple underlying BOW (Bag-of-Word)
         files which are created by the graph module. """
@@ -103,16 +110,26 @@ def bow_locator(self):
                                f"{top_bow_file}")
                 bow_index = 0
 
-            fp = open(os.path.join(dataset_fp, top_bow_file), "r", encoding='utf-8')
-            fp.seek(0, 2)
-            bow_byte_index = fp.tell()
-            fp.seek(0, 0)  # rewind
-            bow_tokens = len(fp.read().split(","))
-            fp.close()
+            try:
+                fp = open(os.path.join(dataset_fp, top_bow_file), "r", encoding='utf-8')
+                fp.seek(0, 2)
+                bow_byte_index = fp.tell()
+                fp.seek(0, 0)  # rewind
+                bow_tokens = len(fp.read().split(","))
+                fp.close()
+            except FileNotFoundError:
+                logger.error(f"BOW file not found: {top_bow_file}")
+                return 0, 0, 0, [], True
+            except Exception as e:
+                logger.error(f"Error reading BOW file: {e}")
+                return 0, 0, 0, [], True
+            finally:
+                if 'fp' in locals():
+                    fp.close()
 
         return bow_index, bow_byte_index, bow_tokens, bow_files, no_bow
 
-    def build_graph(self):
+    def build_graph(self) -> dict:
 
         """ Generates multiple valuable nlp artifacts in the library's /nlp folder path, with the
         primary objective of generating the co-occurrence matrix. """
@@ -186,9 +203,11 @@ def build_graph(self):
         graph_summary.update({"time_stamp": ts})
 
         #   write to manifest.json for knowledge graph
-        json_dict = json.dumps(graph_summary,indent=2)
-        with open(os.path.join(self.library.nlp_path,"manifest.json"),"w", encoding='utf-8') as outfile:
-            outfile.write(json_dict)
+        try:
+            with open(os.path.join(self.library.nlp_path,"manifest.json"), "w", encoding='utf-8') as outfile:
+                outfile.write(json.dumps(graph_summary, indent=2))
+        except Exception as e:
+            logger.error(f"Failed to write manifest.json: {e}")
 
         return graph_summary
 
@@ -833,16 +852,25 @@ def get_unique_vocab_len(self):
 
         return len(self.get_unique_vocab_lookup())
 
-    def get_unique_vocab_lookup(self):
+    def get_unique_vocab_lookup(self) -> dict:
 
         """ Returns the unique vocab list found in the Library corpus. """
 
         if self.library.get_knowledge_graph_status() != "yes":
             self.build_graph()
 
-        j = json.load(open(os.path.join(self.library.nlp_path,"vocab_lookup.json"), "r", encoding='utf-8'))
-
-        return j
+        try:
+            with open(os.path.join(self.library.nlp_path, "vocab_lookup.json"), "r", encoding='utf-8') as file:
+                return json.load(file)
+        except FileNotFoundError:
+            logger.error("vocab_lookup.json file not found.")
+            return {}
+        except json.JSONDecodeError:
+            logger.error("Error decoding JSON from vocab_lookup.json.")
+            return {}
+        except Exception as e:
+            logger.error(f"Unexpected error: {e}")
+            return {}
 
     def get_unique_vocab_reverse_lookup(self):