add more models

mixtral 8x22b
xtekky · Apr 13, 2024 · ed8afc2 · ed8afc2
1 parent 8c560b8
commit ed8afc2
Show file tree

Hide file tree

Showing 6 changed files with 49 additions and 13 deletions.
diff --git a/.gitignore b/.gitignore
@@ -59,4 +59,7 @@ node_modules
 models
 projects/windows/g4f
 doc.txt
-dist.py
+dist.py
+x.txt
+bench.py
+to-reverse.txt
diff --git a/README.md b/README.md
@@ -281,6 +281,15 @@ set G4F_PROXY=http://host:port
 | [beta.theb.ai](https://beta.theb.ai) | `g4f.Provider.Theb` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ |
 | [you.com](https://you.com) | `g4f.Provider.You` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ |
 
+## New OpenSource Models
+While we wait for gpt-5, here is a list of new models that are at least better than gpt-3.5-turbo. Some rival gpt-4. Expect this list to grow.
+
+| Website | Provider |  parameters |
+| ------  | -------  |  ------ | 
+| [mixtral-8x22b](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) | `g4f.Provider.DeepInfra` | 176B / 44b active |
+| [dbrx-instruct](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) | `g4f.Provider.DeepInfra` | 132B / 36B active|
+
+
 ### GPT-3.5
 
 | Website | Provider | GPT-3.5 | GPT-4 | Stream | Status | Auth |

diff --git a/g4f/Provider/DeepInfra.py b/g4f/Provider/DeepInfra.py
@@ -11,7 +11,7 @@ class DeepInfra(Openai):
     needs_auth = False
     supports_stream = True
     supports_message_history = True
-    default_model = 'meta-llama/Llama-2-70b-chat-hf'
+    default_model = 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1'
 
     @classmethod
     def get_models(cls):
@@ -32,6 +32,14 @@ def create_async_generator(
         max_tokens: int = 1028,
         **kwargs
     ) -> AsyncResult:
+
+        if not '/' in model:
+            models = {
+                'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1',
+                'dbrx-instruct': 'databricks/dbrx-instruct',
+            }
+            model = models.get(model, model)
+
         headers = {
             'Accept-Encoding': 'gzip, deflate, br',
             'Accept-Language': 'en-US',

diff --git a/g4f/Provider/needs_auth/Openai.py b/g4f/Provider/needs_auth/Openai.py
@@ -51,6 +51,7 @@ async def create_async_generator(
                 stream=stream,
                 **extra_data
             )
+
             async with session.post(f"{api_base.rstrip('/')}/chat/completions", json=data) as response:
                 await raise_for_status(response)
                 if not stream:

diff --git a/g4f/client/service.py b/g4f/client/service.py
@@ -55,9 +55,10 @@ def get_model_and_provider(model    : Union[Model, str],
         provider = convert_to_provider(provider)
 
     if isinstance(model, str):
+
         if model in ModelUtils.convert:
             model = ModelUtils.convert[model]
-
+    
     if not provider:
         if isinstance(model, str):
             raise ModelNotFoundError(f'Model not found: {model}')

diff --git a/g4f/models.py b/g4f/models.py
@@ -162,11 +162,11 @@ def __all__() -> list[str]:
     best_provider = DeepInfra
 )
 
-# mixtral_8x22b = Model(
-#     name          = "mistralai/Mixtral-8x22B-v0.1",
-#     base_provider = "huggingface",
-#     best_provider = DeepInfra
-# )
+mixtral_8x22b = Model(
+    name          = "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
+    base_provider = "huggingface",
+    best_provider = DeepInfra
+)
 
 # Misc models
 dolphin_mixtral_8x7b = Model(
@@ -266,6 +266,12 @@ def __all__() -> list[str]:
     best_provider = Pi
 )
 
+dbrx_instruct = Model(
+    name = 'databricks/dbrx-instruct',
+    base_provider = 'mistral',
+    best_provider = DeepInfra
+)
+
 class ModelUtils:
     """
     Utility class for mapping string identifiers to Model instances.
@@ -300,20 +306,28 @@ class ModelUtils:
         'gigachat'     : gigachat,
         'gigachat_plus': gigachat_plus,
         'gigachat_pro' : gigachat_pro,
-
+
+        # Mistral Opensource
         'mixtral-8x7b': mixtral_8x7b,
         'mistral-7b': mistral_7b,
         'mistral-7b-v02': mistral_7b_v02,
-        # 'mixtral-8x22b': mixtral_8x22b,
+        'mixtral-8x22b': mixtral_8x22b,
         'dolphin-mixtral-8x7b': dolphin_mixtral_8x7b,
-        'lzlv-70b': lzlv_70b,
-        'airoboros-70b': airoboros_70b,
-        'openchat_3.5': openchat_35,
+
+        # google gemini
         'gemini': gemini,
         'gemini-pro': gemini_pro,
+
+        # anthropic
         'claude-v2': claude_v2,
         'claude-3-opus': claude_3_opus,
         'claude-3-sonnet': claude_3_sonnet,
+
+        # other
+        'dbrx-instruct': dbrx_instruct,
+        'lzlv-70b': lzlv_70b,
+        'airoboros-70b': airoboros_70b,
+        'openchat_3.5': openchat_35,
         'pi': pi
     }