[DOCS][LUMO]

kyegomez · Jan 27, 2025 · f1600d2 · f1600d2
1 parent 12109e3
commit f1600d2
Show file tree

Hide file tree

Showing 16 changed files with 252 additions and 94 deletions.
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -210,10 +210,12 @@ nav:
       - OpenRouter: "swarms/examples/openrouter.md"
       - XAI: "swarms/examples/xai.md"
     - Swarms Tools: 
-      - Agent with HTX + CoinGecko: "swarms/examples/swarms_tools_htx.md"
-      - Agent with HTX + CoinGecko Function Calling: "swarms/examples/swarms_tools_htx_gecko.md"
       - Agent with Yahoo Finance: "swarms/examples/yahoo_finance.md"
       - Twitter Agents: "swarms_tools/twitter.md"
+      - Blockchain Agents:
+        - Agent with HTX + CoinGecko: "swarms/examples/swarms_tools_htx.md"
+        - Agent with HTX + CoinGecko Function Calling: "swarms/examples/swarms_tools_htx_gecko.md"
+        - Lumo: "swarms/examples/lumo.md"
     - Meme Agents:
       - Bob The Builder: "swarms/examples/bob_the_builder.md"
       - Meme Agent Builder: "swarms/examples/meme_agents.md"

diff --git a/docs/swarms/examples/lumo.md b/docs/swarms/examples/lumo.md
@@ -0,0 +1,63 @@
+# Lumo Example
+Introducing Lumo-70B-Instruct - the largest and most advanced AI model ever created for the Solana ecosystem. Built on Meta's groundbreaking LLaMa 3.3 70B Instruct foundation, this revolutionary model represents a quantum leap in blockchain-specific artificial intelligence. With an unprecedented 70 billion parameters and trained on the most comprehensive Solana documentation dataset ever assembled, Lumo-70B-Instruct sets a new standard for developer assistance in the blockchain space.
+
+
+- [Docs](https://huggingface.co/lumolabs-ai/Lumo-70B-Instruct)
+
+```python
+from swarms import Agent
+from transformers import LlamaForCausalLM, AutoTokenizer
+import torch
+from transformers import BitsAndBytesConfig
+
+class Lumo:
+    """
+    A class for generating text using the Lumo model with 4-bit quantization.
+    """
+    def __init__(self):
+        """
+        Initializes the Lumo model with 4-bit quantization and a tokenizer.
+        """
+        # Configure 4-bit quantization
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
+            llm_int8_enable_fp32_cpu_offload=True
+        )
+
+        self.model = LlamaForCausalLM.from_pretrained(
+            "lumolabs-ai/Lumo-70B-Instruct",
+            device_map="auto",
+            quantization_config=bnb_config,
+            use_cache=False,
+            attn_implementation="sdpa"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("lumolabs-ai/Lumo-70B-Instruct")
+
+    def run(self, task: str) -> str:
+        """
+        Generates text based on the given prompt using the Lumo model.
+
+        Args:
+            prompt (str): The input prompt for the model.
+
+        Returns:
+            str: The generated text.
+        """
+        inputs = self.tokenizer(task, return_tensors="pt").to(self.model.device)
+        outputs = self.model.generate(**inputs, max_new_tokens=100)
+        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+
+
+
+Agent(
+    agent_name="Solana-Analysis-Agent",
+    model_name=Lumo(),
+    max_loops="auto",
+    interactive=True,
+    streaming_on=True,
+).run("How do i create a smart contract in solana?")
+
+```
diff --git a/example.py b/example.py
@@ -1,6 +1,3 @@
-import os
-
-from swarm_models import OpenAIChat
 from swarms import Agent
 from swarms.prompts.finance_agent_sys_prompt import (
     FINANCIAL_AGENT_SYS_PROMPT,
@@ -9,24 +6,14 @@
 
 load_dotenv()
 
-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("GROQ_API_KEY")
-
-# Model
-model = OpenAIChat(
-    openai_api_base="https://api.groq.com/openai/v1",
-    openai_api_key=api_key,
-    model_name="llama-3.1-70b-versatile",
-    temperature=0.1,
-)
 
 # Initialize the agent
 agent = Agent(
     agent_name="Financial-Analysis-Agent",
     agent_description="Personal finance advisor agent",
     system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
     max_loops=1,
-    llm=model,
+    model_name="gpt-4o",
     dynamic_temperature_enabled=True,
     user_name="swarms_corp",
     retry_attempts=3,

diff --git a/new_features_examples/lumo_example.py b/new_features_examples/lumo_example.py
@@ -0,0 +1,59 @@
+import torch
+from transformers import (
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    LlamaForCausalLM,
+)
+
+from swarms import Agent
+
+
+class Lumo:
+    """
+    A class for generating text using the Lumo model with 4-bit quantization.
+    """
+    def __init__(self):
+        """
+        Initializes the Lumo model with 4-bit quantization and a tokenizer.
+        """
+        # Configure 4-bit quantization
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
+            llm_int8_enable_fp32_cpu_offload=True
+        )
+
+        self.model = LlamaForCausalLM.from_pretrained(
+            "lumolabs-ai/Lumo-70B-Instruct",
+            device_map="auto",
+            quantization_config=bnb_config,
+            use_cache=False,
+            attn_implementation="sdpa"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("lumolabs-ai/Lumo-70B-Instruct")
+
+    def run(self, task: str) -> str:
+        """
+        Generates text based on the given prompt using the Lumo model.
+
+        Args:
+            prompt (str): The input prompt for the model.
+
+        Returns:
+            str: The generated text.
+        """
+        inputs = self.tokenizer(task, return_tensors="pt").to(self.model.device)
+        outputs = self.model.generate(**inputs, max_new_tokens=100)
+        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+
+
+
+Agent(
+    agent_name="Solana-Analysis-Agent",
+    model_name=Lumo(),
+    max_loops="auto",
+    interactive=True,
+    streaming_on=True,
+).run("How do i create a smart contract in solana?")
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "swarms"
-version = "6.9.7"
+version = "6.9.8"
 description = "Swarms - TGSC"
 license = "MIT"
 authors = ["Kye Gomez <[email protected]>"]
@@ -62,7 +62,6 @@ python = ">=3.10,<4.0"
 asyncio = ">=3.4.3,<4.0"
 toml = "*"
 pypdf = "5.1.0"
-swarm-models = "*"
 loguru = "*"
 pydantic = "*"
 tenacity = "*"
@@ -76,7 +75,6 @@ aiofiles = "*"
 clusterops = "*"
 # chromadb = "*"
 rich = "*"
-pandas = "*"
 # sentence-transformers = "*"
 
 

diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py
@@ -23,7 +23,6 @@
 import yaml
 from loguru import logger
 from pydantic import BaseModel
-from swarm_models.tiktoken_wrapper import TikTokenizer
 
 from swarms.agents.ape_agent import auto_generate_prompt
 from swarms.artifacts.main_artifact import Artifact
@@ -55,6 +54,7 @@
 )
 from swarms.telemetry.capture_sys_data import log_agent_data
 from swarms.agents.agent_print import agent_print
+from swarms.utils.litellm_tokenizer import count_tokens
 
 
 # Utils
@@ -439,7 +439,7 @@ def __init__(
         self.time_created = time_created
         self.data_memory = data_memory
         self.load_yaml_path = load_yaml_path
-        self.tokenizer = TikTokenizer()
+        self.tokenizer = tokenizer
         self.auto_generate_prompt = auto_generate_prompt
         self.rag_every_loop = rag_every_loop
         self.plan_enabled = plan_enabled
@@ -563,9 +563,7 @@ def __init__(
             max_loops=self.max_loops,
             steps=self.short_memory.to_dict(),
             full_history=self.short_memory.get_str(),
-            total_tokens=self.tokenizer.count_tokens(
-                self.short_memory.get_str()
-            ),
+            total_tokens=count_tokens(self.short_memory.get_str()),
             stopping_token=self.stopping_token,
             interactive=self.interactive,
             dynamic_temperature_enabled=self.dynamic_temperature_enabled,
@@ -1043,10 +1041,8 @@ def _run(
             self.agent_output.full_history = (
                 self.short_memory.get_str()
             )
-            self.agent_output.total_tokens = (
-                self.tokenizer.count_tokens(
-                    self.short_memory.get_str()
-                )
+            self.agent_output.total_tokens = count_tokens(
+                self.short_memory.get_str()
             )
 
             # Handle artifacts
@@ -1976,7 +1972,7 @@ def memory_query(self, task: str = None, *args, **kwargs) -> None:
                 )
 
                 # # Count the tokens
-                # memory_token_count = self.tokenizer.count_tokens(
+                # memory_token_count = count_tokens(
                 #     memory_retrieval
                 # )
                 # if memory_token_count > self.memory_chunk_size:
@@ -2065,7 +2061,7 @@ def stream_response(
     def check_available_tokens(self):
         # Log the amount of tokens left in the memory and in the task
         if self.tokenizer is not None:
-            tokens_used = self.tokenizer.count_tokens(
+            tokens_used = count_tokens(
                 self.short_memory.return_history_as_string()
             )
             logger.info(
@@ -2076,7 +2072,7 @@ def check_available_tokens(self):
 
     def tokens_checks(self):
         # Check the tokens available
-        tokens_used = self.tokenizer.count_tokens(
+        tokens_used = count_tokens(
             self.short_memory.return_history_as_string()
         )
         out = self.check_available_tokens()
@@ -2140,13 +2136,10 @@ def log_step_metadata(
 
         # Calculate token usage
         # full_memory = self.short_memory.return_history_as_string()
-        # prompt_tokens = self.tokenizer.count_tokens(full_memory)
-        # completion_tokens = self.tokenizer.count_tokens(response)
+        # prompt_tokens = count_tokens(full_memory)
+        # completion_tokens = count_tokens(response)
         # total_tokens = prompt_tokens + completion_tokens
-        total_tokens = (
-            self.tokenizer.count_tokens(task)
-            + self.tokenizer.count_tokens(response),
-        )
+        total_tokens = (count_tokens(task) + count_tokens(response),)
 
         # # Get memory responses
         # memory_responses = {

diff --git a/swarms/structs/agent_memory_manager.py b/swarms/structs/agent_memory_manager.py
@@ -7,7 +7,7 @@
 
 import yaml
 from pydantic import BaseModel
-from swarm_models.tiktoken_wrapper import TikTokenizer
+from swarms.utils.litellm_tokenizer import count_tokens
 
 logger = logging.getLogger(__name__)
 
@@ -60,7 +60,7 @@ def __init__(
         long_term_memory: Optional[Any] = None,
     ):
         self.config = config
-        self.tokenizer = tokenizer or TikTokenizer()
+        self.tokenizer = tokenizer
         self.long_term_memory = long_term_memory
 
         # Initialize memories
@@ -86,7 +86,7 @@ def create_memory_entry(
             agent_name=agent_name,
             session_id=session_id,
             memory_type=memory_type,
-            token_count=self.tokenizer.count_tokens(content),
+            token_count=count_tokens(content),
         )
         return MemoryEntry(content=content, metadata=metadata)
 
@@ -219,7 +219,7 @@ def truncate_to_token_limit(
         self, text: str, max_tokens: int
     ) -> str:
         """Truncate text to fit within token limit"""
-        current_tokens = self.tokenizer.count_tokens(text)
+        current_tokens = count_tokens(text)
 
         if current_tokens <= max_tokens:
             return text
@@ -230,7 +230,7 @@ def truncate_to_token_limit(
         current_count = 0
 
         for sentence in sentences:
-            sentence_tokens = self.tokenizer.count_tokens(sentence)
+            sentence_tokens = count_tokens(sentence)
             if current_count + sentence_tokens <= max_tokens:
                 result.append(sentence)
                 current_count += sentence_tokens
@@ -376,9 +376,7 @@ def search_memories(
                         agent_name="system",
                         session_id="long_term",
                         memory_type="long_term",
-                        token_count=self.tokenizer.count_tokens(
-                            content
-                        ),
+                        token_count=count_tokens(content),
                     )
                     results.append(
                         MemoryEntry(

diff --git a/swarms/structs/auto_swarm_builder.py b/swarms/structs/auto_swarm_builder.py
@@ -2,9 +2,11 @@
 from typing import List
 
 from pydantic import BaseModel, Field
-from swarm_models import OpenAIFunctionCaller, OpenAIChat
 
 from swarms.structs.agent import Agent
+from swarms.structs.meme_agent_persona_generator import (
+    OpenAIFunctionCaller,
+)
 from swarms.structs.swarm_router import SwarmRouter
 from swarms.utils.loguru_logger import initialize_logger
 from swarms.structs.agents_available import showcase_available_agents
@@ -64,15 +66,6 @@ class SwarmConfig(BaseModel):
     )
 
 
-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the OpenAIChat class
-model = OpenAIChat(
-    openai_api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
-)
-
-
 BOSS_SYSTEM_PROMPT = """
 Manage a swarm of worker agents to efficiently serve the user by deciding whether to create new agents or delegate tasks. Ensure operations are efficient and effective.
 
@@ -248,7 +241,7 @@ def build_agent(
             agent_name=agent_name,
             description=agent_description,
             system_prompt=agent_system_prompt,
-            llm=model,
+            model_name="gpt-4o",
             max_loops=max_loops,
             autosave=True,
             dashboard=False,