diff --git a/TTS/VERSION b/TTS/VERSION
index ceddfb28..e3b86dd9 100644
--- a/TTS/VERSION
+++ b/TTS/VERSION
@@ -1 +1 @@
-0.0.15
+0.0.16
diff --git a/TTS/__init__.py b/TTS/__init__.py
index da35faf8..5162d4ec 100644
--- a/TTS/__init__.py
+++ b/TTS/__init__.py
@@ -1,6 +1,5 @@
 import os
 
-
 with open(os.path.join(os.path.dirname(__file__), "VERSION")) as f:
     version = f.read().strip()
 
diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py
index 4eb79d76..a0551484 100755
--- a/TTS/bin/extract_tts_spectrograms.py
+++ b/TTS/bin/extract_tts_spectrograms.py
@@ -299,5 +299,6 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     c = load_config(args.config_path)
-    c.audio['do_trim_silence'] = False  # IMPORTANT!!!!!!!!!!!!!!! disable to align mel
+    c.audio["do_trim_silence"] = False  # IMPORTANT!!!!!!!!!!!!!!! disable to align mel
+
     main(args)
diff --git a/TTS/tts/configs/shared_configs.py b/TTS/tts/configs/shared_configs.py
index 4690e76f..a501a880 100644
--- a/TTS/tts/configs/shared_configs.py
+++ b/TTS/tts/configs/shared_configs.py
@@ -97,6 +97,8 @@ class BaseTTSConfig(BaseTrainingConfig):
             Audio processor config object instance.
         use_phonemes (bool):
             enable / disable phoneme use.
+        use_espeak_phonemes (bool):
+            enable / disable eSpeak-compatible phonemes (only if use_phonemes = `True`).
         compute_input_seq_cache (bool):
             enable / disable precomputation of the phoneme sequences. At the expense of some delay at the beginning of
             the training, It allows faster data loader time and precise limitation with `max_seq_len` and
@@ -136,6 +138,7 @@ class BaseTTSConfig(BaseTrainingConfig):
     audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
     # phoneme settings
     use_phonemes: bool = False
+    use_espeak_phonemes: bool = True
     phoneme_language: str = None
     compute_input_seq_cache: bool = False
     text_cleaner: str = MISSING
diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py
index 9f417a1d..0ddf7ebe 100644
--- a/TTS/tts/utils/synthesis.py
+++ b/TTS/tts/utils/synthesis.py
@@ -25,6 +25,7 @@ def text_to_seqvec(text, CONFIG):
                 CONFIG.enable_eos_bos_chars,
                 tp=CONFIG.characters,
                 add_blank=CONFIG.add_blank,
+                use_espeak_phonemes=CONFIG.use_espeak_phonemes,
             ),
             dtype=np.int32,
         )
diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py
index f9f44167..787394b5 100644
--- a/TTS/tts/utils/text/__init__.py
+++ b/TTS/tts/utils/text/__init__.py
@@ -1,7 +1,9 @@
 # -*- coding: utf-8 -*-
 
 import re
+import unicodedata
 
+import gruut
 from packaging import version
 
 from TTS.tts.utils.text import cleaners
@@ -25,8 +27,11 @@ _CURLY_RE = re.compile(r"(.*?)\{(.+?)\}(.*)")
 # Regular expression matching punctuations, ignoring empty space
 PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+"
 
+# Table for str.translate to fix gruut/TTS phoneme mismatch
+GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")
 
-def text2phone(text, language):
+
+def text2phone(text, language, use_espeak_phonemes=False):
     """Convert graphemes to phonemes.
     Parameters:
             text (str): text to phonemize
@@ -39,10 +44,43 @@ def text2phone(text, language):
     # TO REVIEW : How to have a good implementation for this?
     if language == "zh-CN":
         ph = chinese_text_to_phonemes(text)
+        print(" > Phonemes: {}".format(ph))
         return ph
 
     if language == "ja-jp":
         ph = japanese_text_to_phonemes(text)
+        print(" > Phonemes: {}".format(ph))
+        return ph
+
+    if gruut.is_language_supported(language):
+        # Use gruut for phonemization
+        phonemizer_args = {
+            "remove_stress": True,
+            "ipa_minor_breaks": False,  # don't replace commas/semi-colons with IPA |
+            "ipa_major_breaks": False,  # don't replace periods with IPA ‖
+        }
+
+        if use_espeak_phonemes:
+            # Use a lexicon/g2p model train on eSpeak IPA instead of gruut IPA.
+            # This is intended for backwards compatibility with TTS<=v0.0.13
+            # pre-trained models.
+            phonemizer_args["model_prefix"] = "espeak"
+
+        ph_list = gruut.text_to_phonemes(
+            text,
+            lang=language,
+            return_format="word_phonemes",
+            phonemizer_args=phonemizer_args,
+        )
+
+        # Join and re-split to break apart dipthongs, suprasegmentals, etc.
+        ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list]
+        ph = "| ".join(ph_words)
+
+        # Fix a few phonemes
+        ph = ph.translate(GRUUT_TRANS_TABLE)
+
+        print(" > Phonemes: {}".format(ph))
         return ph
 
     raise ValueError(f" [!] Language {language} is not supported for phonemization.")
@@ -66,7 +104,9 @@ def pad_with_eos_bos(phoneme_sequence, tp=None):
     return [_phonemes_to_id[_bos]] + list(phoneme_sequence) + [_phonemes_to_id[_eos]]
 
 
-def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False, tp=None, add_blank=False):
+def phoneme_to_sequence(
+    text, cleaner_names, language, enable_eos_bos=False, tp=None, add_blank=False, use_espeak_phonemes=False
+):
     # pylint: disable=global-statement
     global _phonemes_to_id, _phonemes
     if tp:
@@ -75,7 +115,7 @@ def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False, tp=
 
     sequence = []
     clean_text = _clean_text(text, cleaner_names)
-    to_phonemes = text2phone(clean_text, language)
+    to_phonemes = text2phone(clean_text, language, use_espeak_phonemes=use_espeak_phonemes)
     if to_phonemes is None:
         print("!! After phoneme conversion the result is None. -- {} ".format(clean_text))
     # iterate by skipping empty strings - NOTE: might be useful to keep it to have a better intonation.
@@ -86,6 +126,7 @@ def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False, tp=
         sequence = pad_with_eos_bos(sequence, tp=tp)
     if add_blank:
         sequence = intersperse(sequence, len(_phonemes))  # add a blank token (new), whose id number is len(_phonemes)
+
     return sequence
 
 
diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py
index cf7df7de..f5165079 100644
--- a/TTS/utils/manage.py
+++ b/TTS/utils/manage.py
@@ -102,10 +102,10 @@ class ModelManager(object):
         output_model_path = os.path.join(output_path, "model_file.pth.tar")
         output_config_path = os.path.join(output_path, "config.json")
         # NOTE : band-aid for removing phoneme support
-        if "needs_phonemizer" in model_item and model_item["needs_phonemizer"]:
-            raise RuntimeError(
-                " [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models."
-            )
+        # if "needs_phonemizer" in model_item and model_item["needs_phonemizer"]:
+        #     raise RuntimeError(
+        #         " [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models."
+        #     )
         if os.path.exists(output_path):
             print(f" > {model_name} is already downloaded.")
         else:
diff --git a/requirements.txt b/requirements.txt
index fde48978..046139d0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,3 +22,5 @@ coqpit
 # japanese g2p deps
 mecab-python3==1.0.3
 unidic-lite==1.0.8
+# gruut+supported langs
+gruut[cs,de,es,fr,it,nl,pt,ru,sv]~=1.2.0
diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py
new file mode 100644
index 00000000..3c424a15
--- /dev/null
+++ b/tests/test_text_processing.py
@@ -0,0 +1,104 @@
+"""Tests for text to phoneme converstion"""
+import unittest
+
+from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme, text2phone
+
+# -----------------------------------------------------------------------------
+
+LANG = "en-us"
+
+EXAMPLE_TEXT = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
+
+EXPECTED_PHONEMES = "ɹ|iː|s|ə|n|t| ɹ|ᵻ|s|ɜː|tʃ| æ|t| h|ɑːɹ|v|ɚ|d| h|æ|z| ʃ|oʊ|n| m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| f|ɔːɹ| æ|z| l|ɪ|ɾ|əl| æ|z| eɪ|t| w|iː|k|s| k|æ|n| æ|k|tʃ|uː|əl|i| ɪ|ŋ|k|ɹ|iː|s| ,| ð|ə| ɡ|ɹ|eɪ| m|æ|ɾ|ɚ| ɪ|n| ð|ə| p|ɑːɹ|t|s| ʌ|v| ð|ə| b|ɹ|eɪ|n| ɹ|ᵻ|s|p|ɑː|n|s|ᵻ|b|əl| f|ɔːɹ| ɪ|m|oʊ|ʃ|ə|n|əl| ɹ|ɛ|ɡ|j|ʊ|l|eɪ|ʃ|ə|n| æ|n|d| l|ɜː|n|ɪ|ŋ| !"
+
+# -----------------------------------------------------------------------------
+
+
+class TextProcessingTextCase(unittest.TestCase):
+    """Tests for text to phoneme conversion"""
+
+    def test_phoneme_to_sequence(self):
+        """Verify en-us sentence phonemes without blank token"""
+        self._test_phoneme_to_sequence(add_blank=False)
+
+    def test_phoneme_to_sequence_with_blank_token(self):
+        """Verify en-us sentence phonemes with blank token"""
+        self._test_phoneme_to_sequence(add_blank=True)
+
+    def _test_phoneme_to_sequence(self, add_blank):
+        """Verify en-us sentence phonemes"""
+        text_cleaner = ["phoneme_cleaners"]
+        sequence = phoneme_to_sequence(EXAMPLE_TEXT, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
+        text_hat = sequence_to_phoneme(sequence)
+        text_hat_with_params = sequence_to_phoneme(sequence)
+        gt = EXPECTED_PHONEMES.replace("|", "")
+        self.assertEqual(text_hat, text_hat_with_params)
+        self.assertEqual(text_hat, gt)
+
+        # multiple punctuations
+        text = "Be a voice, not an! echo?"
+        sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
+        text_hat = sequence_to_phoneme(sequence)
+        text_hat_with_params = sequence_to_phoneme(sequence)
+        gt = "biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ ?"
+        print(text_hat)
+        print(len(sequence))
+        self.assertEqual(text_hat, text_hat_with_params)
+        self.assertEqual(text_hat, gt)
+
+        # not ending with punctuation
+        text = "Be a voice, not an! echo"
+        sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
+        text_hat = sequence_to_phoneme(sequence)
+        text_hat_with_params = sequence_to_phoneme(sequence)
+        gt = "biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ"
+        print(text_hat)
+        print(len(sequence))
+        self.assertEqual(text_hat, text_hat_with_params)
+        self.assertEqual(text_hat, gt)
+
+        # original
+        text = "Be a voice, not an echo!"
+        sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
+        text_hat = sequence_to_phoneme(sequence)
+        text_hat_with_params = sequence_to_phoneme(sequence)
+        gt = "biː ɐ vɔɪs , nɑːt ɐn ɛkoʊ !"
+        print(text_hat)
+        print(len(sequence))
+        self.assertEqual(text_hat, text_hat_with_params)
+        self.assertEqual(text_hat, gt)
+
+        # extra space after the sentence
+        text = "Be a voice, not an! echo.  "
+        sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
+        text_hat = sequence_to_phoneme(sequence)
+        text_hat_with_params = sequence_to_phoneme(sequence)
+        gt = "biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ ."
+        print(text_hat)
+        print(len(sequence))
+        self.assertEqual(text_hat, text_hat_with_params)
+        self.assertEqual(text_hat, gt)
+
+        # extra space after the sentence
+        text = "Be a voice, not an! echo.  "
+        sequence = phoneme_to_sequence(
+            text, text_cleaner, LANG, enable_eos_bos=True, add_blank=add_blank, use_espeak_phonemes=True
+        )
+        text_hat = sequence_to_phoneme(sequence)
+        text_hat_with_params = sequence_to_phoneme(sequence)
+        gt = "^biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ .~"
+        print(text_hat)
+        print(len(sequence))
+        self.assertEqual(text_hat, text_hat_with_params)
+        self.assertEqual(text_hat, gt)
+
+    def test_text2phone(self):
+        """Verify phones directly (with |)"""
+        ph = text2phone(EXAMPLE_TEXT, LANG, use_espeak_phonemes=True)
+        self.assertEqual(ph, EXPECTED_PHONEMES)
+
+
+# -----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/tts_tests/test_glow_tts_train.py b/tests/tts_tests/test_glow_tts_train.py
index 2e675d13..e44f6365 100644
--- a/tests/tts_tests/test_glow_tts_train.py
+++ b/tests/tts_tests/test_glow_tts_train.py
@@ -16,7 +16,8 @@ config = GlowTTSConfig(
     num_val_loader_workers=0,
     text_cleaner="english_cleaners",
     use_phonemes=True,
-    phoneme_language="zh-CN",
+    use_espeak_phonemes=True,
+    phoneme_language="en-us",
     phoneme_cache_path="tests/data/ljspeech/phoneme_cache/",
     run_eval=True,
     test_delay_epochs=-1,
diff --git a/tests/tts_tests/test_speedy_speech_train.py b/tests/tts_tests/test_speedy_speech_train.py
index 3f508117..9dcf0ad8 100644
--- a/tests/tts_tests/test_speedy_speech_train.py
+++ b/tests/tts_tests/test_speedy_speech_train.py
@@ -16,7 +16,7 @@ config = SpeedySpeechConfig(
     num_val_loader_workers=0,
     text_cleaner="english_cleaners",
     use_phonemes=True,
-    phoneme_language="zh-CN",
+    phoneme_language="en-us",
     phoneme_cache_path="tests/data/ljspeech/phoneme_cache/",
     run_eval=True,
     test_delay_epochs=-1,
diff --git a/tests/vocoder_tests/test_multiband_melgan_train.py b/tests/vocoder_tests/test_multiband_melgan_train.py
index 081fb40e..ef362414 100644
--- a/tests/vocoder_tests/test_multiband_melgan_train.py
+++ b/tests/vocoder_tests/test_multiband_melgan_train.py
@@ -20,6 +20,7 @@ config = MultibandMelganConfig(
     eval_split_size=1,
     print_step=1,
     print_eval=True,
+    discriminator_model_params={"base_channels": 16, "max_channels": 128, "downsample_factors": [4, 4, 4]},
     data_path="tests/data/ljspeech",
     output_path=output_path,
 )