forked from RVC-Boss/GPT-SoVITS
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fa05041
commit 7f5c29b
Showing
4 changed files
with
97 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
from gpt_sovits.infer.inference import GPTSoVITSInference | ||
from gpt_sovits.infer.worker import GPTSoVITSInference | ||
|
||
__all__ = ["GPTSoVITSInference"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import re | ||
from typing import List | ||
|
||
|
||
splits = { | ||
",", | ||
"。", | ||
"?", | ||
"!", | ||
",", | ||
".", | ||
"?", | ||
"!", | ||
"~", | ||
":", | ||
":", | ||
"—", | ||
"…", | ||
} | ||
|
||
|
||
def cut5(inp: str): | ||
"""Cut one line of text into pieces.""" | ||
items = re.split(f"([{''.join(splits)}])", inp) | ||
if items[-1] == "": | ||
items = items[:-1] | ||
if len(items) % 2 == 1: | ||
items.append(".") | ||
|
||
mergeitems: List[str] = [items[0]] | ||
for item in items[1:]: | ||
if item == "": | ||
continue | ||
if item not in splits: | ||
mergeitems.append(item) | ||
else: | ||
mergeitems[-1] += item | ||
|
||
return mergeitems | ||
|
||
|
||
def merge_short_texts(texts: List[str], threshold: int = 6): | ||
"""Merge short texts to longer ones. Texts are generated by cut5.""" | ||
result: List[str] = [] | ||
text = "" | ||
for ele in texts: | ||
text += ele | ||
if len(text) >= threshold: | ||
result.append(text) | ||
text = "" | ||
if text: | ||
result.append(text) | ||
return result | ||
|
||
|
||
def clean_and_cut_text(text: str) -> List[str]: | ||
lines = [line.strip() for line in text.split("\n") if line.strip()] | ||
texts = [ | ||
merged.strip() | ||
for line in lines | ||
for merged in merge_short_texts(cut5(line)) | ||
if not all(char in splits for char in merged.strip()) | ||
] | ||
texts = ["." + text if len(text) < 5 else text for text in texts] | ||
return texts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters