diff --git a/.gitignore b/.gitignore index d14c4a0..059e3fc 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ yarn-debug.log* yarn-error.log* yarn.lock* package-lock.json +pnpm-lock.yaml Cargo.lock # Runtime data diff --git a/README.md b/README.md index 566df8d..980a377 100755 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ This runs a Flask process, so you can add the typical flags such as setting a di ```sh $ git clone https://github.com/nat/openplayground $ cd app && npm install && npx parcel watch src/index.html --no-cache -$ cd server && pip3 install -r requirements.txt && cd .. && python3 -m server.app +$ cd server && pip3 install -r requirements.txt && cd .. && python3 -m server.app -m ./server/models.json ``` ## Docker @@ -75,28 +75,42 @@ You can add models in `server/models.json` with the following schema: #### Local inference -For models running locally on your device you can add them to openplayground like the following (a minimal example): +For models running locally on your device you can add llama-cpp-python dependency and set **LLAMA-7B_MODEL_BIN_PATH** and **LLAMA-7B_MODEL_PROMPT_PATH** variable in .env file. the **LLAMA-7B** part should match the name in models.json. + +The LLAMA-7B_MODEL_PROMPT_PATH file should match the model prompt format. Here is some example: + +##### Llama + +``` +Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision. + +User: Hello, Bob. +Bob: Hello. How may I help you today? +User: Please tell me the largest city in Europe. +Bob: Sure. The largest city in Europe is Moscow, the capital of Russia. +User:{prompt} +Bob: +``` + +##### Alpaca + +``` +### Instruction: +{prompt} + +### Response: -```json -"llama": { - "api_key" : false, - "models" : { - "llama-70b": { - "parameters": { - "temperature": { - "value": 0.5, - "range": [ - 0.1, - 1.0 - ] - }, - } - } - } -} ``` -Keep in mind you will need to add a generation method for your model in `server/app.py`. Take a look at `local_text_generation()` as an example. +##### Vicuna + +``` +### Human:{prompt} +### Assistant: +``` + + +Keep in mind you will need to add a generation method for your model in `server/app.py`. Take a look at `local_text_generation_llama()` as an example. #### API Provider Inference diff --git a/app/src/components/parameters-side-panel.tsx b/app/src/components/parameters-side-panel.tsx index 4127cf1..199cfdf 100644 --- a/app/src/components/parameters-side-panel.tsx +++ b/app/src/components/parameters-side-panel.tsx @@ -24,6 +24,7 @@ import {handleSelectModel} from "../lib/utils" const modelProviders = { forefront: "Forefront", + "llama-local": "Llama (Local)", "huggingface-local": "Hugging Face (Local)", huggingface: "Hugging Face", "aleph-alpha": "Aleph Alpha", diff --git a/app/src/lib/editor-styles.tsx b/app/src/lib/editor-styles.tsx index 846d249..f45bce5 100644 --- a/app/src/lib/editor-styles.tsx +++ b/app/src/lib/editor-styles.tsx @@ -57,6 +57,8 @@ export const styleMap = { return styles.openai; case "huggingface-local": return styles.huggingface_local; + case "llama-local": + return styles.huggingface_local; case "cohere": return styles.cohere; case "huggingface": diff --git a/server/app.py b/server/app.py index 3804504..560e88e 100755 --- a/server/app.py +++ b/server/app.py @@ -299,6 +299,8 @@ def text_generation(self, inference_request: InferenceRequest): if inference_request.model_provider == "openai": return self.inference_manager.openai_text_generation(provider_details, inference_request) + elif inference_request.model_provider == "llama-local": + return self.inference_manager.local_text_generation_llama(provider_details, inference_request) elif inference_request.model_provider == "cohere": return self.inference_manager.cohere_text_generation(provider_details, inference_request) elif inference_request.model_provider == "huggingface": diff --git a/server/lib/api/inference.py b/server/lib/api/inference.py index 3e30d46..f9311b3 100644 --- a/server/lib/api/inference.py +++ b/server/lib/api/inference.py @@ -124,6 +124,6 @@ def split_tasks_by_provider(tasks: List[InferenceRequest]) -> Tuple[List[Inferen local_tasks, remote_tasks = [], [] for task in tasks: - (local_tasks if task.model_provider == "huggingface-local" else remote_tasks).append(task) + (local_tasks if "-local" in task.model_provider else remote_tasks).append(task) return local_tasks, remote_tasks \ No newline at end of file diff --git a/server/lib/inference/__init__.py b/server/lib/inference/__init__.py index 57d4546..89a6110 100644 --- a/server/lib/inference/__init__.py +++ b/server/lib/inference/__init__.py @@ -1,3 +1,4 @@ +from pathlib import Path import anthropic import cachetools import math @@ -15,6 +16,7 @@ from dataclasses import dataclass from typing import Callable, Union from .huggingface.hf import HFInference +from llama_cpp import Llama logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -191,7 +193,7 @@ def __error_handler__(self, inference_fn: InferenceFunction, provider_details: P logger.error(f"Error parsing response from API: {e}") except Exception as e: infer_result.token = f"[ERROR] {e}" - logger.error(f"Error: {e}") + logger.exception(f"Error: {e}") finally: if infer_result.token is None: infer_result.token = "[COMPLETED]" @@ -601,6 +603,50 @@ def __local_text_generation__(self, provider_details: ProviderDetails, inference def local_text_generation(self, provider_details: ProviderDetails, inference_request: InferenceRequest): self.__error_handler__(self.__local_text_generation__, provider_details, inference_request) + def __local_text_generation_llama__(self, provider_details: ProviderDetails, inference_request: InferenceRequest): + cancelled = False + env_model_bin_path = inference_request.model_name.upper() + '_MODEL_BIN_PATH' + env_model_prompt_path = inference_request.model_name.upper() + '_MODEL_PROMPT_PATH' + llama_modlel_path = os.environ.get(env_model_bin_path) + llama_prompt_path = os.environ.get(env_model_prompt_path) + if not llama_modlel_path: + logger.error(f"please add {env_model_bin_path} to the dot env file of environment variable if you want to use this model!") + return + if not llama_prompt_path: + logger.warning(f"please add {llama_prompt_path} prompt template file path with {{prompt}} format string to the dot env file of environment variable if you want to use this model with custom prompt format.") + llama_prompt_template = "{prompt}" + else: + with open(Path(llama_prompt_path)) as f: + llama_prompt_template = f.read() + llm = Llama(model_path=llama_modlel_path) + prompt_final = llama_prompt_template.format(prompt=inference_request.prompt) + stream = llm( + prompt_final, + max_tokens=inference_request.model_parameters['maximumLength'], + temperature=float(inference_request.model_parameters['temperature']), + top_p=float(inference_request.model_parameters['topP']), + repeat_penalty=float(inference_request.model_parameters['repetitionPenalty']), + stop=inference_request.model_parameters['stopSequences'], + stream=True, + ) + for output in stream: + if cancelled: break + infer_response = InferenceResult( + uuid=inference_request.uuid, + model_name=inference_request.model_name, + model_tag=inference_request.model_tag, + model_provider=inference_request.model_provider, + token=output['choices'][0]['text'], + probability=None, + top_n_distribution=None + ) + if not self.announcer.announce(infer_response, event="infer"): + cancelled = True + logger.info(f"Cancelled inference for {inference_request.uuid} - {inference_request.model_name}") + + def local_text_generation_llama(self, provider_details: ProviderDetails, inference_request: InferenceRequest): + self.__error_handler__(self.__local_text_generation_llama__, provider_details, inference_request) + def __anthropic_text_generation__(self, provider_details: ProviderDetails, inference_request: InferenceRequest): c = anthropic.Client(provider_details.api_key) diff --git a/server/models.json b/server/models.json index 22afeb7..8ae4408 100644 --- a/server/models.json +++ b/server/models.json @@ -1,14 +1,12 @@ { "openai": { - "requiresAPIKey": true, - "remoteInference": true, "models": { "text-ada-001": { - "enabled": false, - "status": "ready", "capabilities": [ "logprobs" ], + "enabled": false, + "status": "ready", "parameters": { "temperature": { "value": 0.5, @@ -52,11 +50,11 @@ } }, "text-babbage-001": { - "enabled": false, - "status": "ready", "capabilities": [ "logprobs" ], + "enabled": false, + "status": "ready", "parameters": { "temperature": { "value": 0.5, @@ -100,11 +98,11 @@ } }, "text-curie-001": { - "enabled": false, - "status": "ready", "capabilities": [ "logprobs" ], + "enabled": false, + "status": "ready", "parameters": { "temperature": { "value": 0.5, @@ -148,11 +146,11 @@ } }, "text-davinci-003": { - "enabled": false, - "status": "ready", "capabilities": [ "logprobs" ], + "enabled": false, + "status": "ready", "parameters": { "temperature": { "value": 0.5, @@ -196,11 +194,11 @@ } }, "gpt-3.5-turbo": { - "enabled": false, - "status": "ready", "capabilities": [ "logprobs" ], + "enabled": false, + "status": "ready", "parameters": { "temperature": { "value": 0.5, @@ -244,11 +242,11 @@ } }, "gpt-4": { - "enabled": false, - "status": "ready", "capabilities": [ "logprobs" ], + "enabled": false, + "status": "ready", "parameters": { "temperature": { "value": 0.5, @@ -291,16 +289,18 @@ } } } - } - }, - "cohere": { + }, "requiresAPIKey": true, "remoteInference": true, + "defaultParameters": null, + "searchURL": null + }, + "cohere": { "models": { "xlarge": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [], "parameters": { "temperature": { "value": 1, @@ -351,9 +351,9 @@ } }, "medium": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [], "parameters": { "temperature": { "value": 1, @@ -404,9 +404,9 @@ } }, "command-medium-nightly": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [], "parameters": { "temperature": { "value": 1, @@ -457,9 +457,9 @@ } }, "command-xlarge-nightly": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [], "parameters": { "temperature": { "value": 1, @@ -509,13 +509,16 @@ } } } - } + }, + "requiresAPIKey": true, + "remoteInference": true, + "defaultParameters": null, + "searchURL": null }, "huggingface": { + "models": {}, "requiresAPIKey": true, "remoteInference": true, - "searchURL": "https://huggingface.co/api/quicksearch?q={searchQuery}&type=model", - "defaultCapabilities": [], "defaultParameters": { "temperature": { "value": 1, @@ -557,82 +560,34 @@ "range": [] } }, - "models": {} + "searchURL": "https://huggingface.co/api/quicksearch?q={searchQuery}&type=model" }, "huggingface-local": { - "requiresAPIKey": false, - "remoteInference": false, - "searchURL": "https://huggingface.co/api/quicksearch?q={searchQuery}&type=model", - "defaultCapabilities": [], - "defaultParameters": { - "temperature": { - "value": 1, - "range": [ - 0.1, - 1 - ] - }, - "maximumLength": { - "value": 200, - "range": [ - 50, - 1024 - ] - }, - "topP": { - "value": 0.99, - "range": [ - 0.1, - 0.99 - ] - }, - "topK": { - "value": 1, - "range": [ - 1, - 500 - ] - }, - "repetitionPenalty": { - "value": 1, - "range": [ - 0.1, - 2 - ] - } - }, - "models": {} - }, - "anthropic": { - "requiresAPIKey": true, - "remoteInference": true, "models": { - "claude-instant-v1": { + "sobamchan/bart-large-scitldr-distilled-3-3": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [ - "logprobs" - ], "parameters": { "temperature": { "value": 1, "range": [ - 0, + 0.1, 1 ] }, "maximumLength": { - "value": 205, + "value": 200, "range": [ 50, 1024 ] }, "topP": { - "value": 1, + "value": 0.99, "range": [ 0.1, - 1 + 0.99 ] }, "topK": { @@ -642,37 +597,24 @@ 500 ] }, - "presencePenalty": { - "value": 1, - "range": [ - 0, - 1 - ] - }, - "frequencyPenalty": { + "repetitionPenalty": { "value": 1, "range": [ - 0, - 1 + 0.1, + 2 ] - }, - "stopSequences": { - "value": [], - "range": [] } } }, - "claude-v1": { + "openai/clip-vit-large-patch14": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [ - "logprobs" - ], "parameters": { "temperature": { "value": 1, "range": [ - 0, + 0.1, 1 ] }, @@ -684,10 +626,10 @@ ] }, "topP": { - "value": 1, + "value": 0.99, "range": [ 0.1, - 1 + 0.99 ] }, "topK": { @@ -697,43 +639,66 @@ 500 ] }, - "presencePenalty": { + "repetitionPenalty": { "value": 1, "range": [ - 0, + 0.1, + 2 + ] + } + } + }, + "shibing624/text2vec-base-chinese": { + "capabilities": [], + "enabled": false, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, 1 ] }, - "frequencyPenalty": { + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 0.99, + "range": [ + 0.1, + 0.99 + ] + }, + "topK": { "value": 1, "range": [ - 0, - 1 + 1, + 500 ] }, - "stopSequences": { - "value": [], - "range": [] + "repetitionPenalty": { + "value": 1, + "range": [ + 0.1, + 2 + ] } } - } - } - }, - "aleph-alpha": { - "requiresAPIKey": true, - "remoteInference": true, - "models": { - "luminous-supreme-control": { + }, + "laion/CLIP-ViT-H-14-laion2B-s32B-b79K": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [ - "logprobs" - ], "parameters": { "temperature": { "value": 1, "range": [ - 0, + 0.1, 1 ] }, @@ -745,10 +710,10 @@ ] }, "topP": { - "value": 1, + "value": 0.99, "range": [ - 0, - 1 + 0.1, + 0.99 ] }, "topK": { @@ -762,26 +727,20 @@ "value": 1, "range": [ 0.1, - 1 + 2 ] - }, - "stopSequences": { - "value": [], - "range": [] } } }, - "luminous-base": { + "csebuetnlp/mT5_m2o_chinese_simplified_crossSum": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [ - "logprobs" - ], "parameters": { "temperature": { "value": 1, "range": [ - 0, + 0.1, 1 ] }, @@ -793,10 +752,10 @@ ] }, "topP": { - "value": 1, + "value": 0.99, "range": [ - 0, - 1 + 0.1, + 0.99 ] }, "topK": { @@ -810,26 +769,20 @@ "value": 1, "range": [ 0.1, - 1 + 2 ] - }, - "stopSequences": { - "value": [], - "range": [] } } }, - "luminous-supreme": { + "decapoda-research/llama-7b-hf": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [ - "logprobs" - ], "parameters": { "temperature": { "value": 1, "range": [ - 0, + 0.1, 1 ] }, @@ -841,10 +794,10 @@ ] }, "topP": { - "value": 1, + "value": 0.99, "range": [ - 0, - 1 + 0.1, + 0.99 ] }, "topK": { @@ -858,26 +811,20 @@ "value": 1, "range": [ 0.1, - 1 + 2 ] - }, - "stopSequences": { - "value": [], - "range": [] } } }, - "luminous-extended": { + "IDEA-CCNL/Randeng-BART-139M-SUMMARY": { + "capabilities": [], "enabled": false, "status": "ready", - "capabilities": [ - "logprobs" - ], "parameters": { "temperature": { "value": 1, "range": [ - 0, + 0.1, 1 ] }, @@ -889,10 +836,10 @@ ] }, "topP": { - "value": 1, + "value": 0.99, "range": [ - 0, - 1 + 0.1, + 0.99 ] }, "topK": { @@ -906,22 +853,866 @@ "value": 1, "range": [ 0.1, - 1 + 2 ] - }, - "stopSequences": { - "value": [], - "range": [] } } } - } + }, + "requiresAPIKey": false, + "remoteInference": false, + "defaultParameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 0.99, + "range": [ + 0.1, + 0.99 + ] + }, + "topK": { + "value": 1, + "range": [ + 1, + 500 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0.1, + 2 + ] + } + }, + "searchURL": "https://huggingface.co/api/quicksearch?q={searchQuery}&type=model" + }, + "llama-local": { + "models": { + "llama-7b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:" + ], + "range": [] + } + } + }, + "llama-13b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:" + ], + "range": [] + } + } + }, + "llama-30b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:" + ], + "range": [] + } + } + }, + "alpaca-7b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:", + "### " + ], + "range": [] + } + } + }, + "alpaca-13b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:", + "### " + ], + "range": [] + } + } + }, + "alpaca-30b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:", + "### " + ], + "range": [] + } + } + }, + "alpacacn-7b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:", + "### " + ], + "range": [] + } + } + }, + "gpt4all-7b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:", + "### " + ], + "range": [] + } + } + }, + "vicuna-13b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:", + "### " + ], + "range": [] + } + } + }, + "koala-7b": { + "capabilities": [], + "enabled": true, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "User:", + "Bob:", + "Joke:", + "### " + ], + "range": [] + } + } + } + }, + "requiresAPIKey": false, + "remoteInference": false, + "defaultParameters": { + "temperature": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0, + 2 + ] + }, + "stopSequences": { + "value": [ + "Question:", + "\n" + ], + "range": [] + } + }, + "searchURL": null + }, + "anthropic": { + "models": { + "claude-instant-v1": { + "capabilities": [ + "logprobs" + ], + "enabled": false, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "maximumLength": { + "value": 205, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "topK": { + "value": 1, + "range": [ + 1, + 500 + ] + }, + "presencePenalty": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "frequencyPenalty": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "stopSequences": { + "value": [], + "range": [] + } + } + }, + "claude-v1": { + "capabilities": [ + "logprobs" + ], + "enabled": false, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "topK": { + "value": 1, + "range": [ + 1, + 500 + ] + }, + "presencePenalty": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "frequencyPenalty": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "stopSequences": { + "value": [], + "range": [] + } + } + } + }, + "requiresAPIKey": true, + "remoteInference": true, + "defaultParameters": null, + "searchURL": null + }, + "aleph-alpha": { + "models": { + "luminous-supreme-control": { + "capabilities": [ + "logprobs" + ], + "enabled": false, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "topK": { + "value": 1, + "range": [ + 1, + 500 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "stopSequences": { + "value": [], + "range": [] + } + } + }, + "luminous-base": { + "capabilities": [ + "logprobs" + ], + "enabled": false, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "topK": { + "value": 1, + "range": [ + 1, + 500 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "stopSequences": { + "value": [], + "range": [] + } + } + }, + "luminous-supreme": { + "capabilities": [ + "logprobs" + ], + "enabled": false, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "topK": { + "value": 1, + "range": [ + 1, + 500 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "stopSequences": { + "value": [], + "range": [] + } + } + }, + "luminous-extended": { + "capabilities": [ + "logprobs" + ], + "enabled": false, + "status": "ready", + "parameters": { + "temperature": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "maximumLength": { + "value": 200, + "range": [ + 50, + 1024 + ] + }, + "topP": { + "value": 1, + "range": [ + 0, + 1 + ] + }, + "topK": { + "value": 1, + "range": [ + 1, + 500 + ] + }, + "repetitionPenalty": { + "value": 1, + "range": [ + 0.1, + 1 + ] + }, + "stopSequences": { + "value": [], + "range": [] + } + } + } + }, + "requiresAPIKey": true, + "remoteInference": true, + "defaultParameters": null, + "searchURL": null }, "openplayground": { + "models": {}, "requiresAPIKey": false, "remoteInference": false, - "searchURL": "https://openplayground.filler/api/search?q={searchQuery}", - "defaultCapabilities": [], "defaultParameters": { "temperature": { "value": 1, @@ -959,6 +1750,6 @@ ] } }, - "models": {} + "searchURL": "https://openplayground.filler/api/search?q={searchQuery}" } } \ No newline at end of file diff --git a/server/requirements.txt b/server/requirements.txt index 12a741f..5806f6f 100755 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -14,3 +14,4 @@ six==1.16.0 sseclient==0.0.27 torch==2.0.0 transformers==4.27.1 +llama-cpp-python==0.1.19