diff --git a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py index ed34143f..c6aabf75 100644 --- a/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py +++ b/modelscope/models/audio/tts/frontend/generic_text_to_speech_frontend.py @@ -2,8 +2,6 @@ import os import zipfile from typing import Any, Dict, List -import ttsfrd - from modelscope.models.base import Model from modelscope.models.builder import MODELS from modelscope.utils.audio.tts_exceptions import ( @@ -20,6 +18,8 @@ class GenericTtsFrontend(Model): def __init__(self, model_dir='.', lang_type='pinyin', *args, **kwargs): super().__init__(model_dir, *args, **kwargs) + import ttsfrd + frontend = ttsfrd.TtsFrontendEngine() zip_file = os.path.join(model_dir, 'resource.zip') self._res_path = os.path.join(model_dir, 'resource') diff --git a/modelscope/models/audio/tts/vocoder/models/models.py b/modelscope/models/audio/tts/vocoder/models/models.py index 83fc7dc2..c46a9204 100755 --- a/modelscope/models/audio/tts/vocoder/models/models.py +++ b/modelscope/models/audio/tts/vocoder/models/models.py @@ -3,7 +3,6 @@ from distutils.version import LooseVersion import torch import torch.nn as nn import torch.nn.functional as F -from pytorch_wavelets import DWT1DForward from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm @@ -357,6 +356,7 @@ class MultiScaleDiscriminator(torch.nn.Module): DiscriminatorS(), DiscriminatorS(), ]) + from pytorch_wavelets import DWT1DForward self.meanpools = nn.ModuleList( [DWT1DForward(wave='db3', J=1), DWT1DForward(wave='db3', J=1)]) diff --git a/modelscope/preprocessors/text_to_speech.py b/modelscope/preprocessors/text_to_speech.py index fd41b752..8b8dae14 100644 --- a/modelscope/preprocessors/text_to_speech.py +++ b/modelscope/preprocessors/text_to_speech.py @@ -2,8 +2,6 @@ import io from typing import Any, Dict, Union -import ttsfrd - from modelscope.fileio import File from modelscope.models.audio.tts.frontend import GenericTtsFrontend from modelscope.models.base import Model