From 1295c6a9d30b1f94185156ec8d9ca7c893c4f6c6 Mon Sep 17 00:00:00 2001 From: Yanli Date: Thu, 13 Jun 2024 16:02:50 +0800 Subject: [PATCH] update clean text --- src/gpt_sovits/infer/text_utils.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/gpt_sovits/infer/text_utils.py b/src/gpt_sovits/infer/text_utils.py index 8edd312d..9a85da3a 100644 --- a/src/gpt_sovits/infer/text_utils.py +++ b/src/gpt_sovits/infer/text_utils.py @@ -16,7 +16,7 @@ def cut5(inp: str, splits: Sequence[str]): return mergeitems -def merge_short_texts(texts: List[str], threshold: int = 32): +def merge_short_texts(texts: List[str], threshold: int = 36): """Merge short texts to longer ones. Texts are generated by cut5.""" result: List[str] = [] text = "" @@ -42,10 +42,15 @@ def clean_and_cut_text(text: str) -> List[str]: text = new_text lines = [line.strip() for line in text.split("\n") if line.strip()] - sents = [sent for line in lines for sent in cut5(line, tier1_punc)] - texts = [ - "," + merged.strip() - for sent in sents - for merged in merge_short_texts(cut5(sent, ",")) - ] - return texts + res: List[str] = [] + for line in lines: + sents = [sent for sent in cut5(line, tier1_punc)] + texts = [ + merged.strip() + for sent in sents + for merged in merge_short_texts(cut5(sent, ",")) + ] + texts = merge_short_texts(texts) + res.extend(texts) + res = [("。" + x) if x[0] != "。" else x for x in res] + return res