diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6769e21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/evaluate_model.py b/evaluate_model.py new file mode 100644 index 0000000..cdd5b68 --- /dev/null +++ b/evaluate_model.py @@ -0,0 +1,72 @@ +import pandas as pd +import requests +import time +import os +from dotenv import load_dotenv +from vllm_api_client import post_http_request, get_response +load_dotenv() + +only_prompts = pd.read_json("/dataset/only_prompts.json")[0] +N_PROMPTS_TO_PROCESS = 5 + +BASE_URL = "http://213.173.102.136:10400" + +VERIFY_TOKEN = os.get_environ("VERIFY_TOKEN") + +def get_evaluation(json_payload): + response = requests.post(BASE_URL, json=json_payload) + return response.json() + +def get_model_response(prompt): + start_time = time.time() + host = "localhost" + port = 8000 + api_url = f"http://{host}:{port}/generate" + n = 4 + stream = False + response = post_http_request(prompt, api_url, n, stream) + output = get_response(response) + end_time = time.time() + elapsed_time = end_time - start_time + return elapsed_time, output + +def generate_evaluation_json_list(prompts_df): + evaluations_json_list = [] + for prompt in prompts_df[:N_PROMPTS_TO_PROCESS]: + elapsed_time, text_response = get_model_response(prompt) + json_payload = { + "verify_token": VERIFY_TOKEN, + "prompt": prompt, + "responses": [text_response,] + } + evaluation_json = get_evaluation(json_payload) + evaluation_json["time_elapsed_in_seconds"] = elapsed_time + evaluations_json_list.append(evaluation_json) + return evaluations_json_list + +def generate_evaluation_df_from_json_list(evaluations_json_list): + evaluation_data_list = [] + for evaluation_json in evaluations_json_list: + rewards_details = evaluation_json["reward_details"] + reciprocate_reward_model=rewards_details["reciprocate_reward_model"][0] + relevance_filter=rewards_details["relevance_filter"][0] + rlhf_reward_model=rewards_details["rlhf_reward_model"][0] + evaluation_data_list.append( + [ + reciprocate_reward_model, + relevance_filter, + rlhf_reward_model, + evaluation_json["rewards"][0], + evaluation_json["time_elapsed_in_seconds"] + ] + ) + evaluation_df= pd.DataFrame(evaluation_data_list, columns=["reciprocate_reward_model", "relevance_filter", "rlhf_reward_model", "rewards", "time_elapsed_in_seconds"]) + return evaluation_df + + +if __name__ == "main": + prompts = pd.read_json("/dataset/only_prompts.json") + evaluations_json_list = generate_evaluation_json_list(prompts) + evaluation_df = generate_evaluation_df_from_json_list(evaluations_json_list) + evaluation_df.to_csv("/dataset/evaluation.csv") + diff --git a/run_server.sh b/run_server.sh new file mode 100644 index 0000000..0a2eb36 --- /dev/null +++ b/run_server.sh @@ -0,0 +1,2 @@ +python -m vllm.entrypoints.openai.api_server \ + --model TheBloke/vicuna-13B-1.1-GPTQ-4bit-128g \ No newline at end of file diff --git a/sync_files_with_vm.sh b/sync_files_with_vm.sh new file mode 100644 index 0000000..dfa00de --- /dev/null +++ b/sync_files_with_vm.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +GREEN=$(tput setaf 2) +RED=$(tput setaf 1) +NC=$(tput sgr0) + +PROJECT_ID="salto-gpt" +ZONE="us-central1-a" +INSTANCE_NAME="salto-gpt" +EXTERNAL_IP_ADRESS="34.31.175.223" + +echo "${GREEN}Deploying to production...${NC}" + +echo "${GREEN} Pushing last changes...${NC}" +rsync -av --delete --exclude-from=$(pwd)'/.gitignore' $PWD/. mateusnobre@$EXTERNAL_IP_ADRESS:~/app diff --git a/test_models/build_evaluator.ipynb b/test_models/build_evaluator.ipynb new file mode 100644 index 0000000..8b895c4 --- /dev/null +++ b/test_models/build_evaluator.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 31, + "id": "eb740c69-2468-449a-84aa-d273e1c1902b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import requests\n", + "import json\n", + "import time\n", + "import openai" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "4a77e000-00c1-4420-a0de-6e31b7be1662", + "metadata": {}, + "outputs": [], + "source": [ + "only_prompts = pd.read_json(\"../dataset/only_prompts.json\")[0]\n", + "N_PROMPTS_TO_PROCESS = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "6fab9874-65f6-4455-a8bc-5d175a21e758", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "BASE_URL = \"http://213.173.102.136:10400\"\n", + "\n", + "VERIFY_TOKEN = \"SjhSXuEmZoW#%SD@#nAsd123bash#$%&@n\"\n", + "def get_evaluation(json_payload):\n", + " response = requests.post(BASE_URL, json=json_payload)\n", + " return response.json()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "d8349577-9bd6-4d29-9b9a-145281c445e9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def get_model_response(prompt, model_name=None):\n", + " start_time = time.time()\n", + " time.sleep(0.1)\n", + " response = \"error\"\n", + " end_time = time.time()\n", + " elapsed_time = end_time - start_time\n", + " return elapsed_time, response\n", + "def generate_evaluation_json_list(prompts_df):\n", + " evaluations_json_list = []\n", + " for prompt in prompts_df[:N_PROMPTS_TO_PROCESS]:\n", + " elapsed_time, text_response = get_model_response(prompt)\n", + " json_payload = {\n", + " \"verify_token\": VERIFY_TOKEN,\n", + " \"prompt\": prompt,\n", + " \"responses\": [text_response,]\n", + " }\n", + " evaluation_json = get_evaluation(json_payload)\n", + " evaluation_json[\"time_elapsed_in_seconds\"] = elapsed_time\n", + " evaluations_json_list.append(evaluation_json)\n", + " return evaluations_json_list\n", + "def generate_evaluation_df_from_json_list(evaluations_json_list):\n", + " evaluation_data_list = []\n", + " for evaluation_json in evaluations_json_list:\n", + " rewards_details = evaluation_json[\"reward_details\"]\n", + " reciprocate_reward_model=rewards_details[\"reciprocate_reward_model\"][0]\n", + " relevance_filter=rewards_details[\"relevance_filter\"][0]\n", + " rlhf_reward_model=rewards_details[\"rlhf_reward_model\"][0]\n", + " evaluation_data_list.append(\n", + " [\n", + " reciprocate_reward_model,\n", + " relevance_filter,\n", + " rlhf_reward_model,\n", + " evaluation_json[\"rewards\"][0],\n", + " evaluation_json[\"time_elapsed_in_seconds\"]\n", + " ]\n", + " )\n", + " evaluation_df= pd.DataFrame(evaluation_data_list, columns=[\"reciprocate_reward_model\", \"relevance_filter\", \"rlhf_reward_model\", \"rewards\", \"time_elapsed_in_seconds\"])\n", + " return evaluation_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "822fba8b-0aba-464b-a074-b4085731cb98", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "4c7d04a5-74f1-4004-b82a-adddcd1f9f50", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c0b05b7-0105-4fd3-a248-15bdd593b2db", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test_models/test_reward_endpoint.json b/test_models/test_reward_endpoint.json new file mode 100644 index 0000000..83fafa8 --- /dev/null +++ b/test_models/test_reward_endpoint.json @@ -0,0 +1 @@ +{"reward_details": {"reciprocate_reward_model": [0.596572995185852, 0.4419805407524109, 0.596572995185852, 0.4419805407524109], "relevance_filter": [1.0, 0.0, 1.0, 0.0], "rlhf_reward_model": [0.605496883392334, 0.48980969190597534, 0.605496883392334, 0.48980969190597534]}, "rewards": [0.6019273400306702, 0.0, 0.6019273400306702, 0.0]} \ No newline at end of file diff --git a/test_models/test_reward_endpoint.py b/test_models/test_reward_endpoint.py index ba0edd2..ae60ed5 100644 --- a/test_models/test_reward_endpoint.py +++ b/test_models/test_reward_endpoint.py @@ -4,7 +4,7 @@ # Endpoint URL # url = "http://url:port" url = "http://213.173.102.136:10400" - +# "http://213.173.102.136:10400", "http://213.173.102.136:10401", "http://213.173.102.136:10402" # Data to send data = { "verify_token": "SjhSXuEmZoW#%SD@#nAsd123bash#$%&@n", # Your authentication token @@ -17,9 +17,18 @@ Experimental Confirmation: Theoretical physics is full of brilliant ideas, but not all of them are correct. It often takes considerable time to gather the experimental evidence needed to confirm or refute theoretical predictions. In Nambu's case, his theories were eventually confirmed by the discovery of the Higgs boson in 2012, which required the construction of the Large Hadron Collider, a project of unprecedented scale in experimental physics. Nobel Selection Process: The process for awarding the Nobel Prize is notoriously selective and conservative. It often takes many years, sometimes decades, for a discovery to be deemed "Nobel worthy". This is in part because the committee tends to award those whose contributions have stood the test of time and have been thoroughly vetted by the scientific community. Given the above, it's not unusual that it took nearly 50 years for Nambu's work to be recognized with a Nobel Prize. +As for a related and insightful question: Can you elaborate on the specific contributions Nambu made in the field of spontaneous symmetry breaking, and how these contributions influenced the work of other physicists like Higgs and Englert? + ''', + 'This is a bad answer that does not relate to the prompt so it will receive lower scores for both reward models and a 0 for the relevance filter, making the total score 0.', + ''' +Groundbreaking Work: The initial work by Nambu on spontaneous symmetry breaking was indeed groundbreaking. However, groundbreaking work is often ahead of its time, and it can take years, sometimes decades, for the full impact of such work to be understood and appreciated by the wider scientific community. +Experimental Confirmation: Theoretical physics is full of brilliant ideas, but not all of them are correct. It often takes considerable time to gather the experimental evidence needed to confirm or refute theoretical predictions. In Nambu's case, his theories were eventually confirmed by the discovery of the Higgs boson in 2012, which required the construction of the Large Hadron Collider, a project of unprecedented scale in experimental physics. +Nobel Selection Process: The process for awarding the Nobel Prize is notoriously selective and conservative. It often takes many years, sometimes decades, for a discovery to be deemed "Nobel worthy". This is in part because the committee tends to award those whose contributions have stood the test of time and have been thoroughly vetted by the scientific community. +Given the above, it's not unusual that it took nearly 50 years for Nambu's work to be recognized with a Nobel Prize. As for a related and insightful question: Can you elaborate on the specific contributions Nambu made in the field of spontaneous symmetry breaking, and how these contributions influenced the work of other physicists like Higgs and Englert? ''', 'This is a bad answer that does not relate to the prompt so it will receive lower scores for both reward models and a 0 for the relevance filter, making the total score 0.' + ] } @@ -27,4 +36,7 @@ response = requests.post(url, json=data) # Print the response -print(json.dumps(response.json())) \ No newline at end of file +print(json.dumps(response.json())) + +with open('test_reward_endpoint.json', 'w') as f: + json.dump(response.json(), f) \ No newline at end of file diff --git a/vllm_api_client.py b/vllm_api_client.py new file mode 100644 index 0000000..14b4fdb --- /dev/null +++ b/vllm_api_client.py @@ -0,0 +1,77 @@ +"""Example Python client for vllm.entrypoints.api_server""" + +import argparse +import json +from typing import Iterable, List + +import requests + + +def clear_line(n: int = 1) -> None: + LINE_UP = '\033[1A' + LINE_CLEAR = '\x1b[2K' + for _ in range(n): + print(LINE_UP, end=LINE_CLEAR, flush=True) + + +def post_http_request(prompt: str, + api_url: str, + n: int = 1, + stream: bool = False) -> requests.Response: + headers = {"User-Agent": "Test Client"} + pload = { + "prompt": prompt, + "n": n, + "use_beam_search": True, + "temperature": 0.0, + "max_tokens": 16, + "stream": stream, + } + response = requests.post(api_url, headers=headers, json=pload, stream=True) + return response + + +def get_streaming_response(response: requests.Response) -> Iterable[List[str]]: + for chunk in response.iter_lines(chunk_size=8192, + decode_unicode=False, + delimiter=b"\0"): + if chunk: + data = json.loads(chunk.decode("utf-8")) + output = data["text"] + yield output + + +def get_response(response: requests.Response) -> List[str]: + data = json.loads(response.content) + output = data["text"] + return output + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="localhost") + parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--n", type=int, default=4) + parser.add_argument("--prompt", type=str, default="San Francisco is a") + parser.add_argument("--stream", action="store_true") + args = parser.parse_args() + prompt = args.prompt + api_url = f"http://{args.host}:{args.port}/generate" + n = args.n + stream = args.stream + + print(f"Prompt: {prompt!r}\n", flush=True) + response = post_http_request(prompt, api_url, n, stream) + + if stream: + num_printed_lines = 0 + for h in get_streaming_response(response): + clear_line(num_printed_lines) + num_printed_lines = 0 + for i, line in enumerate(h): + num_printed_lines += 1 + print(f"Beam candidate {i}: {line!r}", flush=True) + else: + output = get_response(response) + for i, line in enumerate(output): + print(f"Beam candidate {i}: {line!r}", flush=True) \ No newline at end of file