add example

BeautyyuYanli · Jun 5, 2024 · 03894ac · 03894ac
1 parent ef91575
commit 03894ac
Show file tree

Hide file tree

Showing 3 changed files with 170 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,9 +8,4 @@ __pycache__
 env
 runtime
 .idea
-output
-logs
-reference
-GPT_weights
-SoVITS_weights
-TEMP
+dist/
diff --git a/README.md b/README.md
@@ -24,42 +24,4 @@ I do not add the packages related to torch to the dependencies of GPT-SoVITS-Inf
 
 ## Usage Example
 
-Download the pretrained model from [HuggingFace](https://huggingface.co/lj1995/GPT-SoVITS) to `pretrained_models` folder (or any place you like):
-
-```
-git clone https://huggingface.co/lj1995/GPT-SoVITS pretrained_models
-```
-
-Create a `playground` folder (or any place you like) to save prompt audio and outputs:
-
-```
-mkdir playground
-wget -P playground https://huggingface.co/datasets/BeautyyuYanli/sample_files/resolve/main/%E4%BD%A0%E5%A5%BD%20ChatGPT%EF%BC%8C%E8%AF%B7%E9%97%AE%E4%BD%A0%E7%9F%A5%E9%81%93%E4%B8%BA%E4%BB%80%E4%B9%88%E9%B2%81%E8%BF%85%E6%9A%B4%E6%89%93%E5%91%A8%E6%A0%91%E4%BA%BA%E5%90%97%EF%BC%9F.wav
-```
-
-The prompt audio should be about 3-10 seconds.
-
-Generate the output:
-
-```python
-from scipy.io import wavfile
-
-inference = GPTSoVITSInference(
-    bert_path="pretrained_models/chinese-roberta-wwm-ext-large",
-    cnhubert_base_path="pretrained_models/chinese-hubert-base",
-)
-inference.load_sovits("pretrained_models/s2G488k.pth")
-inference.load_gpt(
-    "pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"
-)
-prompt_text = "你好 ChatGPT，请问你知道为什么鲁迅暴打周树人吗？"
-inference.set_prompt_audio(
-    prompt_audio_path=f"playground/{prompt_text}.wav",
-    prompt_text=prompt_text,
-)
-
-sample_rate, data = inference.get_tts_wav(
-    text="鲁迅为什么暴打周树人？？？这是一个问题\n\n自古以来，文人相轻，鲁迅和周树人也不例外。鲁迅和周树人是中国现代文学史上的两位伟大作家，他们的文学成就都是不可磨灭的。但是，鲁迅和周树人之间的关系并不和谐，两人之间曾经发生过一次激烈的冲突，甚至还打了起来。那么，鲁迅为什么会暴打周树人呢？这是一个问题。  ",
-)
-wavfile.write(f"playground/output.wav", sample_rate, data)
-```
+Check out the [example](example.ipynb) notebook for a quick start.
diff --git a/example.ipynb b/example.ipynb
@@ -0,0 +1,168 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Example Usage of GPT-SoVITS-Infer\n",
+        "\n",
+        "Prepare environments:"
+      ],
+      "metadata": {
+        "id": "8LZrsYDHOdPc"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YoaU2LKfMlQ0"
+      },
+      "outputs": [],
+      "source": [
+        "%pip install torch torchvision torchaudio pytorch-lightning"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!apt install ffmpeg libsox-dev"
+      ],
+      "metadata": {
+        "id": "dtW6fe-3MvQD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%pip install GPT-SoVITS-Infer"
+      ],
+      "metadata": {
+        "id": "96sF4CkeN5yw"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Download the pretrained model from [HuggingFace](https://huggingface.co/lj1995/GPT-SoVITS) to `pretrained_models` folder (or any place you like):"
+      ],
+      "metadata": {
+        "id": "5UGpX8PJOpUX"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!git clone https://huggingface.co/lj1995/GPT-SoVITS pretrained_models"
+      ],
+      "metadata": {
+        "id": "3kXPwEe1OJ11"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Create a `playground` folder (or any place you like) to save prompt audio and outputs.\n",
+        "\n",
+        "The prompt audio should be about 3-10 seconds."
+      ],
+      "metadata": {
+        "id": "_6Gpwe_9Ovhy"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!mkdir playground\n",
+        "!wget -P playground https://huggingface.co/datasets/BeautyyuYanli/sample_files/resolve/main/%E4%BD%A0%E5%A5%BD%20ChatGPT%EF%BC%8C%E8%AF%B7%E9%97%AE%E4%BD%A0%E7%9F%A5%E9%81%93%E4%B8%BA%E4%BB%80%E4%B9%88%E9%B2%81%E8%BF%85%E6%9A%B4%E6%89%93%E5%91%A8%E6%A0%91%E4%BA%BA%E5%90%97%EF%BC%9F.wav"
+      ],
+      "metadata": {
+        "id": "-zouFhW8OOj9"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Generate the output:"
+      ],
+      "metadata": {
+        "id": "ckGLyuIWO2J_"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from gpt_sovits.infer import GPTSoVITSInference\n",
+        "from scipy.io import wavfile\n",
+        "\n",
+        "inference = GPTSoVITSInference(\n",
+        "    bert_path=\"pretrained_models/chinese-roberta-wwm-ext-large\",\n",
+        "    cnhubert_base_path=\"pretrained_models/chinese-hubert-base\",\n",
+        "    is_half=False, # Seems not support for the Colab environment\n",
+        ")\n",
+        "inference.load_sovits(\"pretrained_models/s2G488k.pth\")\n",
+        "inference.load_gpt(\n",
+        "    \"pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt\"\n",
+        ")\n",
+        "prompt_text = \"你好 ChatGPT，请问你知道为什么鲁迅暴打周树人吗？\"\n",
+        "inference.set_prompt_audio(\n",
+        "    prompt_audio_path=f\"playground/{prompt_text}.wav\",\n",
+        "    prompt_text=prompt_text,\n",
+        ")\n",
+        "\n",
+        "sample_rate, data = inference.get_tts_wav(\n",
+        "    text=\"鲁迅为什么暴打周树人？？？这是一个问题\\n\\n自古以来，文人相轻，鲁迅和周树人也不例外。鲁迅和周树人是中国现代文学史上的两位伟大作家，他们的文学成就都是不可磨灭的。但是，鲁迅和周树人之间的关系并不和谐，两人之间曾经发生过一次激烈的冲突，甚至还打了起来。那么，鲁迅为什么会暴打周树人呢？这是一个问题。  \",\n",
+        ")\n",
+        "wavfile.write(f\"playground/output.wav\", sample_rate, data)"
+      ],
+      "metadata": {
+        "id": "2MW_r47sOUGD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Now you can play the audio by the following code. Or find the file in `playground/output`"
+      ],
+      "metadata": {
+        "id": "8kUL_vjNQYaH"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from IPython.display import Audio, display\n",
+        "\n",
+        "display(Audio(data=data, rate=sample_rate, autoplay=True))"
+      ],
+      "metadata": {
+        "id": "JqYPlLezPwsM"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}