mirror of
https://github.com/myshell-ai/OpenVoice.git
synced 2025-12-24 04:09:33 +01:00
add chinese model
This commit is contained in:
58
utils.py
58
utils.py
@@ -75,6 +75,13 @@ def bits_to_string(bits_array):
|
||||
return output_string
|
||||
|
||||
|
||||
def split_sentence(text, min_len=10, language_str='[EN]'):
|
||||
if language_str in ['EN']:
|
||||
sentences = split_sentences_latin(text, min_len=min_len)
|
||||
else:
|
||||
sentences = split_sentences_zh(text, min_len=min_len)
|
||||
return sentences
|
||||
|
||||
def split_sentences_latin(text, min_len=10):
|
||||
"""Split Long sentences into list of short ones
|
||||
|
||||
@@ -133,4 +140,55 @@ def merge_short_sentences_latin(sens):
|
||||
sens_out.pop(-1)
|
||||
except:
|
||||
pass
|
||||
return sens_out
|
||||
|
||||
def split_sentences_zh(text, min_len=10):
|
||||
text = re.sub('[。!?;]', '.', text)
|
||||
text = re.sub('[,]', ',', text)
|
||||
# 将文本中的换行符、空格和制表符替换为空格
|
||||
text = re.sub('[\n\t ]+', ' ', text)
|
||||
# 在标点符号后添加一个空格
|
||||
text = re.sub('([,.!?;])', r'\1 $#!', text)
|
||||
# 分隔句子并去除前后空格
|
||||
# sentences = [s.strip() for s in re.split('(。|!|?|;)', text)]
|
||||
sentences = [s.strip() for s in text.split('$#!')]
|
||||
if len(sentences[-1]) == 0: del sentences[-1]
|
||||
|
||||
new_sentences = []
|
||||
new_sent = []
|
||||
count_len = 0
|
||||
for ind, sent in enumerate(sentences):
|
||||
new_sent.append(sent)
|
||||
count_len += len(sent)
|
||||
if count_len > min_len or ind == len(sentences) - 1:
|
||||
count_len = 0
|
||||
new_sentences.append(' '.join(new_sent))
|
||||
new_sent = []
|
||||
return merge_short_sentences_zh(new_sentences)
|
||||
|
||||
|
||||
def merge_short_sentences_zh(sens):
|
||||
# return sens
|
||||
"""Avoid short sentences by merging them with the following sentence.
|
||||
|
||||
Args:
|
||||
List[str]: list of input sentences.
|
||||
|
||||
Returns:
|
||||
List[str]: list of output sentences.
|
||||
"""
|
||||
sens_out = []
|
||||
for s in sens:
|
||||
# If the previous sentense is too short, merge them with
|
||||
# the current sentence.
|
||||
if len(sens_out) > 0 and len(sens_out[-1]) <= 2:
|
||||
sens_out[-1] = sens_out[-1] + " " + s
|
||||
else:
|
||||
sens_out.append(s)
|
||||
try:
|
||||
if len(sens_out[-1]) <= 2:
|
||||
sens_out[-2] = sens_out[-2] + " " + sens_out[-1]
|
||||
sens_out.pop(-1)
|
||||
except:
|
||||
pass
|
||||
return sens_out
|
||||
Reference in New Issue
Block a user