Skip to content

Commit

Permalink
enhance text split
Browse files Browse the repository at this point in the history
  • Loading branch information
BeautyyuYanli committed Jun 7, 2024
1 parent 55a3be0 commit 64a0093
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 15 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "GPT-SoVITS-Infer"
version = "0.2.2"
version = "0.2.3"
description = "Inference code for GPT-SoVITS"
authors = [
{name = "Yanli",email = "[email protected]"},
Expand Down
4 changes: 2 additions & 2 deletions src/gpt_sovits/infer/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from gpt_sovits.text.cleaner import clean_text
from gpt_sovits.module.mel_processing import spectrogram_torch

from gpt_sovits.infer.text_utils import splits, clean_and_cut_text
from gpt_sovits.infer.text_utils import clean_and_cut_text, full_splits


class DictToAttrRecursive(dict):
Expand Down Expand Up @@ -291,7 +291,7 @@ def set_prompt_audio(
):
if prompt_text:
prompt_text = prompt_text.strip("\n")
if prompt_text[-1] not in splits:
if prompt_text[-1] not in full_splits:
prompt_text += "."
self.prompt_text = prompt_text

Expand Down
55 changes: 43 additions & 12 deletions src/gpt_sovits/infer/text_utils.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,60 @@
import re
from typing import List
from typing import List, Set


splits = {
",",
tier1_splits = {
"。",
"?",
"!",
",",
".",
"?",
"!",
"~",
}

tier2_splits = {
",",
",",
":",
":",
"—",
"…",
"~",
"、",
";",
";",
"(",
"(",
")",
")",
"《",
"》",
"“",
"”",
"‘",
"’",
'"',
"'",
"【",
"】",
"[",
"]",
"「",
"」",
"『",
"』",
"<",
">",
}

full_splits = tier1_splits | tier2_splits

def cut5(inp: str):

def cut5(inp: str, splits: Set[str], append_dot: str):
"""Cut one line of text into pieces."""
items = re.split(f"([{''.join(splits)}])", inp)
items = re.split(f"([{''.join(re.escape(x) for x in splits)}])", inp)
if items[-1] == "":
items = items[:-1]
if len(items) % 2 == 1:
items.append(".")
items.append(append_dot)

mergeitems: List[str] = [items[0]]
for item in items[1:]:
Expand Down Expand Up @@ -55,11 +84,13 @@ def merge_short_texts(texts: List[str], threshold: int = 32):

def clean_and_cut_text(text: str) -> List[str]:
lines = [line.strip() for line in text.split("\n") if line.strip()]
sents = [
sent for line in lines for sent in cut5(line, tier1_splits, ".") if sent.strip()
]
texts = [
merged.strip()
for line in lines
for merged in merge_short_texts(cut5(line))
if not all(char in splits for char in merged.strip())
for sent in sents
for merged in merge_short_texts(cut5(sent, tier2_splits, ""))
]
texts = [("." + text) if len(text) < 5 else text for text in texts]
return texts

0 comments on commit 64a0093

Please sign in to comment.