Skip to content

Commit

Permalink
update clean text
Browse files Browse the repository at this point in the history
  • Loading branch information
BeautyyuYanli committed Jun 13, 2024
1 parent 3512b85 commit 1295c6a
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions src/gpt_sovits/infer/text_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def cut5(inp: str, splits: Sequence[str]):
return mergeitems


def merge_short_texts(texts: List[str], threshold: int = 32):
def merge_short_texts(texts: List[str], threshold: int = 36):
"""Merge short texts to longer ones. Texts are generated by cut5."""
result: List[str] = []
text = ""
Expand All @@ -42,10 +42,15 @@ def clean_and_cut_text(text: str) -> List[str]:
text = new_text

lines = [line.strip() for line in text.split("\n") if line.strip()]
sents = [sent for line in lines for sent in cut5(line, tier1_punc)]
texts = [
"," + merged.strip()
for sent in sents
for merged in merge_short_texts(cut5(sent, ","))
]
return texts
res: List[str] = []
for line in lines:
sents = [sent for sent in cut5(line, tier1_punc)]
texts = [
merged.strip()
for sent in sents
for merged in merge_short_texts(cut5(sent, ","))
]
texts = merge_short_texts(texts)
res.extend(texts)
res = [("。" + x) if x[0] != "。" else x for x in res]
return res

0 comments on commit 1295c6a

Please sign in to comment.