Skip to content

Commit

Permalink
enhance text
Browse files Browse the repository at this point in the history
  • Loading branch information
BeautyyuYanli committed Jun 8, 2024
1 parent 64a0093 commit 3512b85
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 70 deletions.
2 changes: 1 addition & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions src/gpt_sovits/infer/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from gpt_sovits.text.cleaner import clean_text
from gpt_sovits.module.mel_processing import spectrogram_torch

from gpt_sovits.infer.text_utils import clean_and_cut_text, full_splits
from gpt_sovits.infer.text_utils import clean_and_cut_text


class DictToAttrRecursive(dict):
Expand Down Expand Up @@ -291,8 +291,6 @@ def set_prompt_audio(
):
if prompt_text:
prompt_text = prompt_text.strip("\n")
if prompt_text[-1] not in full_splits:
prompt_text += "."
self.prompt_text = prompt_text

if prompt_audio_path:
Expand Down
87 changes: 21 additions & 66 deletions src/gpt_sovits/infer/text_utils.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,18 @@
import re
from typing import List, Set
from typing import List, Sequence

tier1_splits = {
"。",
"?",
"!",
".",
"?",
"!",
}
tier1_punc = ".?!。?!"
tier2_punc = ",.:;…~-—,、:;"
tier3_punc = "\"'“”‘’()《》【】[]「」『』<>·"

tier2_splits = {
",",
",",
":",
":",
"—",
"…",
"~",
"、",
";",
";",
"(",
"(",
")",
")",
"《",
"》",
"“",
"”",
"‘",
"’",
'"',
"'",
"【",
"】",
"[",
"]",
"「",
"」",
"『",
"』",
"<",
">",
}

full_splits = tier1_splits | tier2_splits


def cut5(inp: str, splits: Set[str], append_dot: str):
def cut5(inp: str, splits: Sequence[str]):
"""Cut one line of text into pieces."""
items = re.split(f"([{''.join(re.escape(x) for x in splits)}])", inp)
if items[-1] == "":
items = items[:-1]
items = [item.strip() for item in items if item.strip()]
if len(items) % 2 == 1:
items.append(append_dot)

mergeitems: List[str] = [items[0]]
for item in items[1:]:
if item == "":
continue
if item not in splits:
mergeitems.append(item)
else:
mergeitems[-1] += item

items.append("")
mergeitems = [a + b for a, b in zip(items[0::2], items[1::2])]
return mergeitems


Expand All @@ -83,14 +31,21 @@ def merge_short_texts(texts: List[str], threshold: int = 32):


def clean_and_cut_text(text: str) -> List[str]:
new_text = ""
for char in text:
if char in tier2_punc:
new_text += ","
elif char in tier3_punc:
new_text += " "
else:
new_text += char
text = new_text

lines = [line.strip() for line in text.split("\n") if line.strip()]
sents = [
sent for line in lines for sent in cut5(line, tier1_splits, ".") if sent.strip()
]
sents = [sent for line in lines for sent in cut5(line, tier1_punc)]
texts = [
merged.strip()
"," + merged.strip()
for sent in sents
for merged in merge_short_texts(cut5(sent, tier2_splits, ""))
for merged in merge_short_texts(cut5(sent, ","))
]
texts = [("." + text) if len(text) < 5 else text for text in texts]
return texts

0 comments on commit 3512b85

Please sign in to comment.