diff --git a/.gitignore b/.gitignore index a262569f..bcc67868 100644 --- a/.gitignore +++ b/.gitignore @@ -8,9 +8,4 @@ __pycache__ env runtime .idea -output -logs -reference -GPT_weights -SoVITS_weights -TEMP \ No newline at end of file +dist/ \ No newline at end of file diff --git a/README.md b/README.md index d59f6eeb..47c8a0aa 100644 --- a/README.md +++ b/README.md @@ -24,42 +24,4 @@ I do not add the packages related to torch to the dependencies of GPT-SoVITS-Inf ## Usage Example -Download the pretrained model from [HuggingFace](https://huggingface.co/lj1995/GPT-SoVITS) to `pretrained_models` folder (or any place you like): - -``` -git clone https://huggingface.co/lj1995/GPT-SoVITS pretrained_models -``` - -Create a `playground` folder (or any place you like) to save prompt audio and outputs: - -``` -mkdir playground -wget -P playground https://huggingface.co/datasets/BeautyyuYanli/sample_files/resolve/main/%E4%BD%A0%E5%A5%BD%20ChatGPT%EF%BC%8C%E8%AF%B7%E9%97%AE%E4%BD%A0%E7%9F%A5%E9%81%93%E4%B8%BA%E4%BB%80%E4%B9%88%E9%B2%81%E8%BF%85%E6%9A%B4%E6%89%93%E5%91%A8%E6%A0%91%E4%BA%BA%E5%90%97%EF%BC%9F.wav -``` - -The prompt audio should be about 3-10 seconds. - -Generate the output: - -```python -from scipy.io import wavfile - -inference = GPTSoVITSInference( - bert_path="pretrained_models/chinese-roberta-wwm-ext-large", - cnhubert_base_path="pretrained_models/chinese-hubert-base", -) -inference.load_sovits("pretrained_models/s2G488k.pth") -inference.load_gpt( - "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt" -) -prompt_text = "你好 ChatGPT,请问你知道为什么鲁迅暴打周树人吗?" -inference.set_prompt_audio( - prompt_audio_path=f"playground/{prompt_text}.wav", - prompt_text=prompt_text, -) - -sample_rate, data = inference.get_tts_wav( - text="鲁迅为什么暴打周树人???这是一个问题\n\n自古以来,文人相轻,鲁迅和周树人也不例外。鲁迅和周树人是中国现代文学史上的两位伟大作家,他们的文学成就都是不可磨灭的。但是,鲁迅和周树人之间的关系并不和谐,两人之间曾经发生过一次激烈的冲突,甚至还打了起来。那么,鲁迅为什么会暴打周树人呢?这是一个问题。 ", -) -wavfile.write(f"playground/output.wav", sample_rate, data) -``` \ No newline at end of file +Check out the [example](example.ipynb) notebook for a quick start. \ No newline at end of file diff --git a/example.ipynb b/example.ipynb new file mode 100644 index 00000000..60a657ab --- /dev/null +++ b/example.ipynb @@ -0,0 +1,168 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Example Usage of GPT-SoVITS-Infer\n", + "\n", + "Prepare environments:" + ], + "metadata": { + "id": "8LZrsYDHOdPc" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YoaU2LKfMlQ0" + }, + "outputs": [], + "source": [ + "%pip install torch torchvision torchaudio pytorch-lightning" + ] + }, + { + "cell_type": "code", + "source": [ + "!apt install ffmpeg libsox-dev" + ], + "metadata": { + "id": "dtW6fe-3MvQD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "%pip install GPT-SoVITS-Infer" + ], + "metadata": { + "id": "96sF4CkeN5yw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Download the pretrained model from [HuggingFace](https://huggingface.co/lj1995/GPT-SoVITS) to `pretrained_models` folder (or any place you like):" + ], + "metadata": { + "id": "5UGpX8PJOpUX" + } + }, + { + "cell_type": "code", + "source": [ + "!git clone https://huggingface.co/lj1995/GPT-SoVITS pretrained_models" + ], + "metadata": { + "id": "3kXPwEe1OJ11" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Create a `playground` folder (or any place you like) to save prompt audio and outputs.\n", + "\n", + "The prompt audio should be about 3-10 seconds." + ], + "metadata": { + "id": "_6Gpwe_9Ovhy" + } + }, + { + "cell_type": "code", + "source": [ + "!mkdir playground\n", + "!wget -P playground https://huggingface.co/datasets/BeautyyuYanli/sample_files/resolve/main/%E4%BD%A0%E5%A5%BD%20ChatGPT%EF%BC%8C%E8%AF%B7%E9%97%AE%E4%BD%A0%E7%9F%A5%E9%81%93%E4%B8%BA%E4%BB%80%E4%B9%88%E9%B2%81%E8%BF%85%E6%9A%B4%E6%89%93%E5%91%A8%E6%A0%91%E4%BA%BA%E5%90%97%EF%BC%9F.wav" + ], + "metadata": { + "id": "-zouFhW8OOj9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Generate the output:" + ], + "metadata": { + "id": "ckGLyuIWO2J_" + } + }, + { + "cell_type": "code", + "source": [ + "from gpt_sovits.infer import GPTSoVITSInference\n", + "from scipy.io import wavfile\n", + "\n", + "inference = GPTSoVITSInference(\n", + " bert_path=\"pretrained_models/chinese-roberta-wwm-ext-large\",\n", + " cnhubert_base_path=\"pretrained_models/chinese-hubert-base\",\n", + " is_half=False, # Seems not support for the Colab environment\n", + ")\n", + "inference.load_sovits(\"pretrained_models/s2G488k.pth\")\n", + "inference.load_gpt(\n", + " \"pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt\"\n", + ")\n", + "prompt_text = \"你好 ChatGPT,请问你知道为什么鲁迅暴打周树人吗?\"\n", + "inference.set_prompt_audio(\n", + " prompt_audio_path=f\"playground/{prompt_text}.wav\",\n", + " prompt_text=prompt_text,\n", + ")\n", + "\n", + "sample_rate, data = inference.get_tts_wav(\n", + " text=\"鲁迅为什么暴打周树人???这是一个问题\\n\\n自古以来,文人相轻,鲁迅和周树人也不例外。鲁迅和周树人是中国现代文学史上的两位伟大作家,他们的文学成就都是不可磨灭的。但是,鲁迅和周树人之间的关系并不和谐,两人之间曾经发生过一次激烈的冲突,甚至还打了起来。那么,鲁迅为什么会暴打周树人呢?这是一个问题。 \",\n", + ")\n", + "wavfile.write(f\"playground/output.wav\", sample_rate, data)" + ], + "metadata": { + "id": "2MW_r47sOUGD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now you can play the audio by the following code. Or find the file in `playground/output`" + ], + "metadata": { + "id": "8kUL_vjNQYaH" + } + }, + { + "cell_type": "code", + "source": [ + "from IPython.display import Audio, display\n", + "\n", + "display(Audio(data=data, rate=sample_rate, autoplay=True))" + ], + "metadata": { + "id": "JqYPlLezPwsM" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file