diff --git a/TTS/tts/utils/text/belarusian/__init__.py b/TTS/tts/utils/text/belarusian/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/TTS/tts/utils/text/belarusian/phonemizer.py b/TTS/tts/utils/text/belarusian/phonemizer.py new file mode 100644 index 00000000..3c07a209 --- /dev/null +++ b/TTS/tts/utils/text/belarusian/phonemizer.py @@ -0,0 +1,34 @@ +import os + +finder = None + + +def init(): + try: + import jpype + import jpype.imports + except ModuleNotFoundError: + raise ModuleNotFoundError("Belarusian phonemizer requires to install module 'jpype1' manually. Try `pip install jpype1`.") + + try: + jar_path = os.environ["BEL_FANETYKA_JAR"] + except KeyError: + raise KeyError("You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file") + + jpype.startJVM(classpath=[jar_path]) + + # import the Java modules + from org.alex73.korpus.base import GrammarDB2, GrammarFinder + + grammar_db = GrammarDB2.initializeFromJar() + global finder + finder = GrammarFinder(grammar_db) + + +def belarusian_text_to_phonemes(text: str) -> str: + # Initialize only on first run + if finder is None: + init() + + from org.alex73.fanetyka.impl import FanetykaText + return str(FanetykaText(finder, text).ipa) diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index 5c285731..638184fd 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -1,4 +1,5 @@ from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer +from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut @@ -35,6 +36,7 @@ DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"] DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name() +DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name() # JA phonemizer has deal breaking dependencies like MeCab for some systems. @@ -68,6 +70,8 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: return KO_KR_Phonemizer(**kwargs) if name == "bn_phonemizer": return BN_Phonemizer(**kwargs) + if name == "be_phonemizer": + return BEL_Phonemizer(**kwargs) raise ValueError(f"Phonemizer {name} not found") diff --git a/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py b/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py new file mode 100644 index 00000000..fb620766 --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py @@ -0,0 +1,55 @@ +from typing import Dict + +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer +from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes + +_DEF_BE_PUNCS = ",!." # TODO + + +class BEL_Phonemizer(BasePhonemizer): + """🐸TTS be phonemizer using functions in `TTS.tts.utils.text.belarusian.phonemizer` + + Args: + punctuations (str): + Set of characters to be treated as punctuation. Defaults to `_DEF_BE_PUNCS`. + + keep_puncs (bool): + If True, keep the punctuations after phonemization. Defaults to False. + """ + + language = "be" + + def __init__(self, punctuations=_DEF_BE_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument + super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) + + @staticmethod + def name(): + return "be_phonemizer" + + @staticmethod + def phonemize_be(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument + return belarusian_text_to_phonemes(text) + + def _phonemize(self, text, separator): + return self.phonemize_be(text, separator) + + @staticmethod + def supported_languages() -> Dict: + return {"be": "Belarusian"} + + def version(self) -> str: + return "0.0.1" + + def is_available(self) -> bool: + return True + + +if __name__ == "__main__": + txt = "тэст" + e = BEL_Phonemizer() + print(e.supported_languages()) + print(e.version()) + print(e.language) + print(e.name()) + print(e.is_available()) + print("`" + e.phonemize(txt) + "`") diff --git a/recipes/bel-alex73/train_glowtts.py b/recipes/bel-alex73/train_glowtts.py index e0827cdc..24b62d79 100644 --- a/recipes/bel-alex73/train_glowtts.py +++ b/recipes/bel-alex73/train_glowtts.py @@ -60,7 +60,7 @@ config = GlowTTSConfig( output_path=output_path, add_blank=True, datasets=[dataset_config], - characters=characters, +# characters=characters, enable_eos_bos_chars=True, mixed_precision=False, save_step=10000, @@ -69,6 +69,8 @@ config = GlowTTSConfig( text_cleaner="no_cleaners", audio=audio_config, test_sentences=[], + use_phonemes=True, + phoneme_language="be", ) if __name__ == "__main__": diff --git a/tests/text_tests/test_belarusian_phonemizer.py b/tests/text_tests/test_belarusian_phonemizer.py new file mode 100644 index 00000000..278ee8be --- /dev/null +++ b/tests/text_tests/test_belarusian_phonemizer.py @@ -0,0 +1,29 @@ +import os +import warnings +import unittest + +from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes + +_TEST_CASES = """ +Фанетычны канвертар/fanʲɛˈtɨt͡ʂnɨ kanˈvʲɛrtar +Гэтак мы працавалі/ˈɣɛtak ˈmɨ prat͡saˈvalʲi +""" + + +class TestText(unittest.TestCase): + def test_belarusian_text_to_phonemes(self): + try: + os.environ["BEL_FANETYKA_JAR"] + except KeyError: + warnings.warn( + "You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file to test Belarusian phonemizer", + Warning) + return + + for line in _TEST_CASES.strip().split("\n"): + text, phonemes = line.split("/") + self.assertEqual(belarusian_text_to_phonemes(text), phonemes) + + +if __name__ == "__main__": + unittest.main()