diff --git a/TTS/speaker_encoder/utils/prepare_voxceleb.py b/TTS/speaker_encoder/utils/prepare_voxceleb.py
index 7bcbaf95..1901a21c 100644
--- a/TTS/speaker_encoder/utils/prepare_voxceleb.py
+++ b/TTS/speaker_encoder/utils/prepare_voxceleb.py
@@ -25,12 +25,11 @@ import subprocess
 import sys
 import zipfile
 
+import pandas
 import soundfile as sf
 import tensorflow as tf
 from absl import logging
 
-import pandas
-
 gfile = tf.compat.v1.gfile
 
 SUBSETS = {
diff --git a/TTS/tts/layers/glow_tts/monotonic_align/core.pyx b/TTS/tts/layers/glow_tts/monotonic_align/core.pyx
index 6aabccc4..091fcc3a 100644
--- a/TTS/tts/layers/glow_tts/monotonic_align/core.pyx
+++ b/TTS/tts/layers/glow_tts/monotonic_align/core.pyx
@@ -1,6 +1,8 @@
 import numpy as np
-cimport numpy as np
+
 cimport cython
+cimport numpy as np
+
 from cython.parallel import prange
 
 
diff --git a/notebooks/dataset_analysis/analyze.py b/notebooks/dataset_analysis/analyze.py
index 66d008cd..6c6bc582 100644
--- a/notebooks/dataset_analysis/analyze.py
+++ b/notebooks/dataset_analysis/analyze.py
@@ -6,13 +6,12 @@ import random
 from statistics import StatisticsError, mean, median, mode, stdev
 
 import matplotlib.pyplot as plt
-
 import seaborn as sns
 from text.cmudict import CMUDict
 
 
 def get_audio_seconds(frames):
-    return (frames*12.5)/1000
+    return (frames * 12.5) / 1000
 
 
 def append_data_statistics(meta_data):
@@ -29,9 +28,7 @@ def append_data_statistics(meta_data):
         median_audio_len = median(audio_len_list)
 
         try:
-            std = stdev(
-                d["audio_len"] for d in data
-            )
+            std = stdev(d["audio_len"] for d in data)
         except StatisticsError:
             std = 0
 
@@ -46,24 +43,22 @@ def process_meta_data(path):
     meta_data = {}
 
     # load meta data
-    with open(path, 'r') as f:
-        data = csv.reader(f, delimiter='|')
+    with open(path, "r") as f:
+        data = csv.reader(f, delimiter="|")
         for row in data:
             frames = int(row[2])
             utt = row[3]
             audio_len = get_audio_seconds(frames)
             char_count = len(utt)
             if not meta_data.get(char_count):
-                meta_data[char_count] = {
-                    "data": []
-                }
+                meta_data[char_count] = {"data": []}
 
             meta_data[char_count]["data"].append(
                 {
                     "utt": utt,
                     "frames": frames,
                     "audio_len": audio_len,
-                    "row": "{}|{}|{}|{}".format(row[0], row[1], row[2], row[3])
+                    "row": "{}|{}|{}|{}".format(row[0], row[1], row[2], row[3]),
                 }
             )
 
@@ -74,30 +69,30 @@ def process_meta_data(path):
 
 def get_data_points(meta_data):
     x = meta_data
-    y_avg = [meta_data[d]['mean'] for d in meta_data]
-    y_mode = [meta_data[d]['mode'] for d in meta_data]
-    y_median = [meta_data[d]['median'] for d in meta_data]
-    y_std = [meta_data[d]['std'] for d in meta_data]
-    y_num_samples = [len(meta_data[d]['data']) for d in meta_data]
+    y_avg = [meta_data[d]["mean"] for d in meta_data]
+    y_mode = [meta_data[d]["mode"] for d in meta_data]
+    y_median = [meta_data[d]["median"] for d in meta_data]
+    y_std = [meta_data[d]["std"] for d in meta_data]
+    y_num_samples = [len(meta_data[d]["data"]) for d in meta_data]
     return {
         "x": x,
         "y_avg": y_avg,
         "y_mode": y_mode,
         "y_median": y_median,
         "y_std": y_std,
-        "y_num_samples": y_num_samples
+        "y_num_samples": y_num_samples,
     }
 
 
 def save_training(file_path, meta_data):
     rows = []
     for char_cnt in meta_data:
-        data = meta_data[char_cnt]['data']
+        data = meta_data[char_cnt]["data"]
         for d in data:
-            rows.append(d['row'] + "\n")
+            rows.append(d["row"] + "\n")
 
     random.shuffle(rows)
-    with open(file_path, 'w+') as f:
+    with open(file_path, "w+") as f:
         for row in rows:
             f.write(row)
 
@@ -108,15 +103,15 @@ def plot(meta_data, save_path=None):
         save = True
 
     graph_data = get_data_points(meta_data)
-    x = graph_data['x']
-    y_avg = graph_data['y_avg']
-    y_std = graph_data['y_std']
-    y_mode = graph_data['y_mode']
-    y_median = graph_data['y_median']
-    y_num_samples = graph_data['y_num_samples']
+    x = graph_data["x"]
+    y_avg = graph_data["y_avg"]
+    y_std = graph_data["y_std"]
+    y_mode = graph_data["y_mode"]
+    y_median = graph_data["y_median"]
+    y_num_samples = graph_data["y_num_samples"]
 
     plt.figure()
-    plt.plot(x, y_avg, 'ro')
+    plt.plot(x, y_avg, "ro")
     plt.xlabel("character lengths", fontsize=30)
     plt.ylabel("avg seconds", fontsize=30)
     if save:
@@ -124,7 +119,7 @@ def plot(meta_data, save_path=None):
         plt.savefig(os.path.join(save_path, name))
 
     plt.figure()
-    plt.plot(x, y_mode, 'ro')
+    plt.plot(x, y_mode, "ro")
     plt.xlabel("character lengths", fontsize=30)
     plt.ylabel("mode seconds", fontsize=30)
     if save:
@@ -132,7 +127,7 @@ def plot(meta_data, save_path=None):
         plt.savefig(os.path.join(save_path, name))
 
     plt.figure()
-    plt.plot(x, y_median, 'ro')
+    plt.plot(x, y_median, "ro")
     plt.xlabel("character lengths", fontsize=30)
     plt.ylabel("median seconds", fontsize=30)
     if save:
@@ -140,7 +135,7 @@ def plot(meta_data, save_path=None):
         plt.savefig(os.path.join(save_path, name))
 
     plt.figure()
-    plt.plot(x, y_std, 'ro')
+    plt.plot(x, y_std, "ro")
     plt.xlabel("character lengths", fontsize=30)
     plt.ylabel("standard deviation", fontsize=30)
     if save:
@@ -148,7 +143,7 @@ def plot(meta_data, save_path=None):
         plt.savefig(os.path.join(save_path, name))
 
     plt.figure()
-    plt.plot(x, y_num_samples, 'ro')
+    plt.plot(x, y_num_samples, "ro")
     plt.xlabel("character lengths", fontsize=30)
     plt.ylabel("number of samples", fontsize=30)
     if save:
@@ -161,8 +156,8 @@ def plot_phonemes(train_path, cmu_dict_path, save_path):
 
     phonemes = {}
 
-    with open(train_path, 'r') as f:
-        data = csv.reader(f, delimiter='|')
+    with open(train_path, "r") as f:
+        data = csv.reader(f, delimiter="|")
         phonemes["None"] = 0
         for row in data:
             words = row[3].split()
@@ -194,15 +189,12 @@ def plot_phonemes(train_path, cmu_dict_path, save_path):
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--train_file_path', required=True,
-        help='this is the path to the train.txt file that the preprocess.py script creates'
-    )
-    parser.add_argument(
-        '--save_to', help='path to save charts of data to'
-    )
-    parser.add_argument(
-        '--cmu_dict_path', help='give cmudict-0.7b to see phoneme distribution'
+        "--train_file_path",
+        required=True,
+        help="this is the path to the train.txt file that the preprocess.py script creates",
     )
+    parser.add_argument("--save_to", help="path to save charts of data to")
+    parser.add_argument("--cmu_dict_path", help="give cmudict-0.7b to see phoneme distribution")
     args = parser.parse_args()
     meta_data = process_meta_data(args.train_file_path)
     plt.rcParams["figure.figsize"] = (10, 5)
@@ -213,5 +205,6 @@ def main():
 
     plt.show()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/pyproject.toml b/pyproject.toml
index 335303d1..5c742966 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,3 +26,8 @@ exclude = '''
                      # the root of the project
 )
 '''
+
+[tool.isort]
+line_length = 120
+profile = "black"
+multi_line_output = 3
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 42544666..c479da9b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,10 +18,12 @@ bokeh==1.4.0
 pysbd
 # pyworld
 soundfile
-nose==1.3.7
-cardboardlint==1.3.0
-pylint==2.5.3
 gdown
 umap-learn==0.4.6
 cython
-pyyaml
\ No newline at end of file
+pyyaml
+# quality and style
+nose
+black
+isort
+pylint==2.7.4
\ No newline at end of file
diff --git a/tests/test_audio.py b/tests/test_audio.py
index 75141730..8065383e 100644
--- a/tests/test_audio.py
+++ b/tests/test_audio.py
@@ -10,7 +10,7 @@ OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
 WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
 
 os.makedirs(OUT_PATH, exist_ok=True)
-conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+conf = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 
 
 # pylint: disable=protected-access
@@ -20,10 +20,10 @@ class TestAudio(unittest.TestCase):
         self.ap = AudioProcessor(**conf.audio)
 
     def test_audio_synthesis(self):
-        """ 1. load wav
-            2. set normalization parameters
-            3. extract mel-spec
-            4. invert to wav and save the output
+        """1. load wav
+        2. set normalization parameters
+        3. extract mel-spec
+        4. invert to wav and save the output
         """
         print(" > Sanity check for the process wav -> mel -> wav")
 
@@ -35,23 +35,24 @@ class TestAudio(unittest.TestCase):
             wav = self.ap.load_wav(WAV_FILE)
             mel = self.ap.melspectrogram(wav)
             wav_ = self.ap.inv_melspectrogram(mel)
-            file_name = "/audio_test-melspec_max_norm_{}-signal_norm_{}-symmetric_{}-clip_norm_{}.wav"\
-                .format(max_norm, signal_norm, symmetric_norm, clip_norm)
+            file_name = "/audio_test-melspec_max_norm_{}-signal_norm_{}-symmetric_{}-clip_norm_{}.wav".format(
+                max_norm, signal_norm, symmetric_norm, clip_norm
+            )
             print(" | > Creating wav file at : ", file_name)
             self.ap.save_wav(wav_, OUT_PATH + file_name)
 
         # maxnorm = 1.0
-        _test(1., False, False, False)
-        _test(1., True, False, False)
-        _test(1., True, True, False)
-        _test(1., True, False, True)
-        _test(1., True, True, True)
+        _test(1.0, False, False, False)
+        _test(1.0, True, False, False)
+        _test(1.0, True, True, False)
+        _test(1.0, True, False, True)
+        _test(1.0, True, True, True)
         # maxnorm = 4.0
-        _test(4., False, False, False)
-        _test(4., True, False, False)
-        _test(4., True, True, False)
-        _test(4., True, False, True)
-        _test(4., True, True, True)
+        _test(4.0, False, False, False)
+        _test(4.0, True, False, False)
+        _test(4.0, True, True, False)
+        _test(4.0, True, False, True)
+        _test(4.0, True, True, True)
 
     def test_normalize(self):
         """Check normalization and denormalization for range values and consistency """
@@ -67,7 +68,9 @@ class TestAudio(unittest.TestCase):
         self.ap.clip_norm = False
         self.ap.max_norm = 4.0
         x_norm = self.ap.normalize(x)
-        print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}")
+        print(
+            f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}"
+        )
         assert (x_old - x).sum() == 0
         # check value range
         assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max()
@@ -81,8 +84,9 @@ class TestAudio(unittest.TestCase):
         self.ap.clip_norm = True
         self.ap.max_norm = 4.0
         x_norm = self.ap.normalize(x)
-        print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}")
-
+        print(
+            f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}"
+        )
 
         assert (x_old - x).sum() == 0
         # check value range
@@ -97,13 +101,14 @@ class TestAudio(unittest.TestCase):
         self.ap.clip_norm = False
         self.ap.max_norm = 4.0
         x_norm = self.ap.normalize(x)
-        print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}")
-
+        print(
+            f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}"
+        )
 
         assert (x_old - x).sum() == 0
         # check value range
         assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max()
-        assert x_norm.min() >= -self.ap.max_norm - 2, x_norm.min()  #pylint: disable=invalid-unary-operand-type
+        assert x_norm.min() >= -self.ap.max_norm - 2, x_norm.min()  # pylint: disable=invalid-unary-operand-type
         assert x_norm.min() <= 0, x_norm.min()
         # check denorm.
         x_ = self.ap.denormalize(x_norm)
@@ -114,13 +119,14 @@ class TestAudio(unittest.TestCase):
         self.ap.clip_norm = True
         self.ap.max_norm = 4.0
         x_norm = self.ap.normalize(x)
-        print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}")
-
+        print(
+            f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}"
+        )
 
         assert (x_old - x).sum() == 0
         # check value range
         assert x_norm.max() <= self.ap.max_norm, x_norm.max()
-        assert x_norm.min() >= -self.ap.max_norm, x_norm.min()  #pylint: disable=invalid-unary-operand-type
+        assert x_norm.min() >= -self.ap.max_norm, x_norm.min()  # pylint: disable=invalid-unary-operand-type
         assert x_norm.min() <= 0, x_norm.min()
         # check denorm.
         x_ = self.ap.denormalize(x_norm)
@@ -130,8 +136,9 @@ class TestAudio(unittest.TestCase):
         self.ap.symmetric_norm = False
         self.ap.max_norm = 1.0
         x_norm = self.ap.normalize(x)
-        print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}")
-
+        print(
+            f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}"
+        )
 
         assert (x_old - x).sum() == 0
         assert x_norm.max() <= self.ap.max_norm, x_norm.max()
@@ -143,22 +150,23 @@ class TestAudio(unittest.TestCase):
         self.ap.symmetric_norm = True
         self.ap.max_norm = 1.0
         x_norm = self.ap.normalize(x)
-        print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}")
-
+        print(
+            f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} --  {x_norm.min()}"
+        )
 
         assert (x_old - x).sum() == 0
         assert x_norm.max() <= self.ap.max_norm, x_norm.max()
-        assert x_norm.min() >= -self.ap.max_norm, x_norm.min()  #pylint: disable=invalid-unary-operand-type
+        assert x_norm.min() >= -self.ap.max_norm, x_norm.min()  # pylint: disable=invalid-unary-operand-type
         assert x_norm.min() < 0, x_norm.min()
         x_ = self.ap.denormalize(x_norm)
         assert (x - x_).sum() < 1e-3
 
     def test_scaler(self):
-        scaler_stats_path = os.path.join(get_tests_input_path(), 'scale_stats.npy')
-        conf.audio['stats_path'] = scaler_stats_path
-        conf.audio['preemphasis'] = 0.0
-        conf.audio['do_trim_silence'] = True
-        conf.audio['signal_norm'] = True
+        scaler_stats_path = os.path.join(get_tests_input_path(), "scale_stats.npy")
+        conf.audio["stats_path"] = scaler_stats_path
+        conf.audio["preemphasis"] = 0.0
+        conf.audio["do_trim_silence"] = True
+        conf.audio["signal_norm"] = True
 
         ap = AudioProcessor(**conf.audio)
         mel_mean, mel_std, linear_mean, linear_std, _ = ap.load_stats(scaler_stats_path)
diff --git a/tests/test_feed_forward_layers.py b/tests/test_feed_forward_layers.py
index a19e808c..1db980a3 100644
--- a/tests/test_feed_forward_layers.py
+++ b/tests/test_feed_forward_layers.py
@@ -9,99 +9,99 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 def test_encoder():
     input_dummy = torch.rand(8, 14, 37).to(device)
-    input_lengths = torch.randint(31, 37, (8, )).long().to(device)
+    input_lengths = torch.randint(31, 37, (8,)).long().to(device)
     input_lengths[-1] = 37
-    input_mask = torch.unsqueeze(
-        sequence_mask(input_lengths, input_dummy.size(2)), 1).to(device)
+    input_mask = torch.unsqueeze(sequence_mask(input_lengths, input_dummy.size(2)), 1).to(device)
     # relative positional transformer encoder
-    layer = Encoder(out_channels=11,
-                    in_hidden_channels=14,
-                    encoder_type='relative_position_transformer',
-                    encoder_params={
-                        'hidden_channels_ffn': 768,
-                        'num_heads': 2,
-                        "kernel_size": 3,
-                        "dropout_p": 0.1,
-                        "num_layers": 6,
-                        "rel_attn_window_size": 4,
-                        "input_length": None
-                    }).to(device)
+    layer = Encoder(
+        out_channels=11,
+        in_hidden_channels=14,
+        encoder_type="relative_position_transformer",
+        encoder_params={
+            "hidden_channels_ffn": 768,
+            "num_heads": 2,
+            "kernel_size": 3,
+            "dropout_p": 0.1,
+            "num_layers": 6,
+            "rel_attn_window_size": 4,
+            "input_length": None,
+        },
+    ).to(device)
     output = layer(input_dummy, input_mask)
     assert list(output.shape) == [8, 11, 37]
     # residual conv bn encoder
-    layer = Encoder(out_channels=11,
-                    in_hidden_channels=14,
-                    encoder_type='residual_conv_bn',
-                    encoder_params={
-                        "kernel_size": 4,
-                        "dilations": 4 * [1, 2, 4] + [1],
-                        "num_conv_blocks": 2,
-                        "num_res_blocks": 13
-                    }).to(device)
+    layer = Encoder(
+        out_channels=11,
+        in_hidden_channels=14,
+        encoder_type="residual_conv_bn",
+        encoder_params={"kernel_size": 4, "dilations": 4 * [1, 2, 4] + [1], "num_conv_blocks": 2, "num_res_blocks": 13},
+    ).to(device)
     output = layer(input_dummy, input_mask)
     assert list(output.shape) == [8, 11, 37]
     # FFTransformer encoder
-    layer = Encoder(out_channels=14,
-                    in_hidden_channels=14,
-                    encoder_type='fftransformer',
-                    encoder_params={
-                        "hidden_channels_ffn": 31,
-                        "num_heads": 2,
-                        "num_layers": 2,
-                        "dropout_p": 0.1
-                    }).to(device)
+    layer = Encoder(
+        out_channels=14,
+        in_hidden_channels=14,
+        encoder_type="fftransformer",
+        encoder_params={"hidden_channels_ffn": 31, "num_heads": 2, "num_layers": 2, "dropout_p": 0.1},
+    ).to(device)
     output = layer(input_dummy, input_mask)
     assert list(output.shape) == [8, 14, 37]
 
 
 def test_decoder():
     input_dummy = torch.rand(8, 128, 37).to(device)
-    input_lengths = torch.randint(31, 37, (8, )).long().to(device)
+    input_lengths = torch.randint(31, 37, (8,)).long().to(device)
     input_lengths[-1] = 37
 
-    input_mask = torch.unsqueeze(
-        sequence_mask(input_lengths, input_dummy.size(2)), 1).to(device)
+    input_mask = torch.unsqueeze(sequence_mask(input_lengths, input_dummy.size(2)), 1).to(device)
     # residual bn conv decoder
     layer = Decoder(out_channels=11, in_hidden_channels=128).to(device)
     output = layer(input_dummy, input_mask)
     assert list(output.shape) == [8, 11, 37]
     # transformer decoder
-    layer = Decoder(out_channels=11,
-                    in_hidden_channels=128,
-                    decoder_type='relative_position_transformer',
-                    decoder_params={
-                        'hidden_channels_ffn': 128,
-                        'num_heads': 2,
-                        "kernel_size": 3,
-                        "dropout_p": 0.1,
-                        "num_layers": 8,
-                        "rel_attn_window_size": 4,
-                        "input_length": None
-                    }).to(device)
+    layer = Decoder(
+        out_channels=11,
+        in_hidden_channels=128,
+        decoder_type="relative_position_transformer",
+        decoder_params={
+            "hidden_channels_ffn": 128,
+            "num_heads": 2,
+            "kernel_size": 3,
+            "dropout_p": 0.1,
+            "num_layers": 8,
+            "rel_attn_window_size": 4,
+            "input_length": None,
+        },
+    ).to(device)
     output = layer(input_dummy, input_mask)
     assert list(output.shape) == [8, 11, 37]
     # wavenet decoder
-    layer = Decoder(out_channels=11,
-                    in_hidden_channels=128,
-                    decoder_type='wavenet',
-                    decoder_params={
-                        "num_blocks": 12,
-                        "hidden_channels": 192,
-                        "kernel_size": 5,
-                        "dilation_rate": 1,
-                        "num_layers": 4,
-                        "dropout_p": 0.05
-                    }).to(device)
+    layer = Decoder(
+        out_channels=11,
+        in_hidden_channels=128,
+        decoder_type="wavenet",
+        decoder_params={
+            "num_blocks": 12,
+            "hidden_channels": 192,
+            "kernel_size": 5,
+            "dilation_rate": 1,
+            "num_layers": 4,
+            "dropout_p": 0.05,
+        },
+    ).to(device)
     output = layer(input_dummy, input_mask)
     # FFTransformer decoder
-    layer = Decoder(out_channels=11,
-                    in_hidden_channels=128,
-                    decoder_type='fftransformer',
-                    decoder_params={
-                        'hidden_channels_ffn': 31,
-                        'num_heads': 2,
-                        "dropout_p": 0.1,
-                        "num_layers": 2,
-                    }).to(device)
+    layer = Decoder(
+        out_channels=11,
+        in_hidden_channels=128,
+        decoder_type="fftransformer",
+        decoder_params={
+            "hidden_channels_ffn": 31,
+            "num_heads": 2,
+            "dropout_p": 0.1,
+            "num_layers": 2,
+        },
+    ).to(device)
     output = layer(input_dummy, input_mask)
     assert list(output.shape) == [8, 11, 37]
diff --git a/tests/test_glow_tts.py b/tests/test_glow_tts.py
index 66d594e2..8e699faf 100644
--- a/tests/test_glow_tts.py
+++ b/tests/test_glow_tts.py
@@ -11,13 +11,13 @@ from TTS.tts.models.glow_tts import GlowTTS
 from TTS.utils.audio import AudioProcessor
 from TTS.utils.io import load_config
 
-#pylint: disable=unused-variable
+# pylint: disable=unused-variable
 
 torch.manual_seed(1)
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-c = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+c = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 
 ap = AudioProcessor(**c.audio)
 WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
@@ -32,11 +32,11 @@ class GlowTTSTrainTest(unittest.TestCase):
     @staticmethod
     def test_train_step():
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 129, (8,)).long().to(device)
         input_lengths[-1] = 128
-        mel_spec = torch.rand(8, c.audio['num_mels'], 30).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        mel_spec = torch.rand(8, c.audio["num_mels"], 30).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         criterion = GlowTTSLoss()
 
@@ -47,27 +47,28 @@ class GlowTTSTrainTest(unittest.TestCase):
             hidden_channels_dec=48,
             hidden_channels_dp=32,
             out_channels=80,
-            encoder_type='rel_pos_transformer',
+            encoder_type="rel_pos_transformer",
             encoder_params={
-                'kernel_size': 3,
-                'dropout_p': 0.1,
-                'num_layers': 6,
-                'num_heads': 2,
-                'hidden_channels_ffn': 16,  # 4 times the hidden_channels
-                'input_length': None
+                "kernel_size": 3,
+                "dropout_p": 0.1,
+                "num_layers": 6,
+                "num_heads": 2,
+                "hidden_channels_ffn": 16,  # 4 times the hidden_channels
+                "input_length": None,
             },
             use_encoder_prenet=True,
             num_flow_blocks_dec=12,
             kernel_size_dec=5,
             dilation_rate=1,
             num_block_layers=4,
-            dropout_p_dec=0.,
+            dropout_p_dec=0.0,
             num_speakers=0,
             c_in_channels=0,
             num_splits=4,
             num_squeeze=1,
             sigmoid_scale=False,
-            mean_only=False).to(device)
+            mean_only=False,
+        ).to(device)
 
         # reference model to compare model weights
         model_ref = GlowTTS(
@@ -76,38 +77,37 @@ class GlowTTSTrainTest(unittest.TestCase):
             hidden_channels_dec=48,
             hidden_channels_dp=32,
             out_channels=80,
-            encoder_type='rel_pos_transformer',
+            encoder_type="rel_pos_transformer",
             encoder_params={
-                'kernel_size': 3,
-                'dropout_p': 0.1,
-                'num_layers': 6,
-                'num_heads': 2,
-                'hidden_channels_ffn': 16,  # 4 times the hidden_channels
-                'input_length': None
+                "kernel_size": 3,
+                "dropout_p": 0.1,
+                "num_layers": 6,
+                "num_heads": 2,
+                "hidden_channels_ffn": 16,  # 4 times the hidden_channels
+                "input_length": None,
             },
             use_encoder_prenet=True,
             num_flow_blocks_dec=12,
             kernel_size_dec=5,
             dilation_rate=1,
             num_block_layers=4,
-            dropout_p_dec=0.,
+            dropout_p_dec=0.0,
             num_speakers=0,
             c_in_channels=0,
             num_splits=4,
             num_squeeze=1,
             sigmoid_scale=False,
-            mean_only=False).to(device)
+            mean_only=False,
+        ).to(device)
 
         model.train()
-        print(" > Num parameters for GlowTTS model:%s" %
-              (count_parameters(model)))
+        print(" > Num parameters for GlowTTS model:%s" % (count_parameters(model)))
 
         # pass the state to ref model
         model_ref.load_state_dict(copy.deepcopy(model.state_dict()))
 
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
 
@@ -115,18 +115,17 @@ class GlowTTSTrainTest(unittest.TestCase):
         for _ in range(5):
             optimizer.zero_grad()
             z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, None)
-            loss_dict = criterion(z, y_mean, y_log_scale, logdet, mel_lengths,
-                                  o_dur_log, o_total_dur, input_lengths)
-            loss = loss_dict['loss']
+                input_dummy, input_lengths, mel_spec, mel_lengths, None
+            )
+            loss_dict = criterion(z, y_mean, y_log_scale, logdet, mel_lengths, o_dur_log, o_total_dur, input_lengths)
+            loss = loss_dict["loss"]
             loss.backward()
             optimizer.step()
 
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
diff --git a/tests/test_layers.py b/tests/test_layers.py
index 9224c673..9b89e645 100644
--- a/tests/test_layers.py
+++ b/tests/test_layers.py
@@ -10,7 +10,7 @@ from TTS.tts.utils.generic_utils import sequence_mask
 
 
 class PrenetTests(unittest.TestCase):
-    def test_in_out(self):  #pylint: disable=no-self-use
+    def test_in_out(self):  # pylint: disable=no-self-use
         layer = Prenet(128, out_features=[256, 128])
         dummy_input = T.rand(4, 128)
 
@@ -22,7 +22,7 @@ class PrenetTests(unittest.TestCase):
 
 class CBHGTests(unittest.TestCase):
     def test_in_out(self):
-        #pylint: disable=attribute-defined-outside-init
+        # pylint: disable=attribute-defined-outside-init
         layer = self.cbhg = CBHG(
             128,
             K=8,
@@ -30,7 +30,8 @@ class CBHGTests(unittest.TestCase):
             conv_projections=[160, 128],
             highway_features=80,
             gru_features=80,
-            num_highways=4)
+            num_highways=4,
+        )
         # B x D x T
         dummy_input = T.rand(4, 128, 8)
 
@@ -53,26 +54,27 @@ class DecoderTests(unittest.TestCase):
             attn_norm="sigmoid",
             attn_K=5,
             attn_type="original",
-            prenet_type='original',
+            prenet_type="original",
             prenet_dropout=True,
             forward_attn=True,
             trans_agent=True,
             forward_attn_mask=True,
             location_attn=True,
-            separate_stopnet=True)
+            separate_stopnet=True,
+        )
         dummy_input = T.rand(4, 8, 256)
         dummy_memory = T.rand(4, 2, 80)
 
-        output, alignment, stop_tokens = layer(
-            dummy_input, dummy_memory, mask=None)
+        output, alignment, stop_tokens = layer(dummy_input, dummy_memory, mask=None)
 
         assert output.shape[0] == 4
         assert output.shape[1] == 80, "size not {}".format(output.shape[1])
         assert output.shape[2] == 2, "size not {}".format(output.shape[2])
         assert stop_tokens.shape[0] == 4
 
+
 class EncoderTests(unittest.TestCase):
-    def test_in_out(self):  #pylint: disable=no-self-use
+    def test_in_out(self):  # pylint: disable=no-self-use
         layer = Encoder(128)
         dummy_input = T.rand(4, 8, 128)
 
@@ -85,7 +87,7 @@ class EncoderTests(unittest.TestCase):
 
 
 class L1LossMaskedTests(unittest.TestCase):
-    def test_in_out(self):  #pylint: disable=no-self-use
+    def test_in_out(self):  # pylint: disable=no-self-use
         # test input == target
         layer = L1LossMasked(seq_len_norm=False)
         dummy_input = T.ones(4, 8, 128).float()
@@ -105,16 +107,14 @@ class L1LossMaskedTests(unittest.TestCase):
         dummy_input = T.ones(4, 8, 128).float()
         dummy_target = T.zeros(4, 8, 128).float()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert output.item() == 1.0, "1.0 vs {}".format(output.item())
 
         dummy_input = T.rand(4, 8, 128).float()
         dummy_target = dummy_input.detach()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert output.item() == 0, "0 vs {}".format(output.item())
 
@@ -138,22 +138,20 @@ class L1LossMaskedTests(unittest.TestCase):
         dummy_input = T.ones(4, 8, 128).float()
         dummy_target = T.zeros(4, 8, 128).float()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item())
 
         dummy_input = T.rand(4, 8, 128).float()
         dummy_target = dummy_input.detach()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert output.item() == 0, "0 vs {}".format(output.item())
 
 
 class SSIMLossTests(unittest.TestCase):
-    def test_in_out(self):  #pylint: disable=no-self-use
+    def test_in_out(self):  # pylint: disable=no-self-use
         # test input == target
         layer = SSIMLoss()
         dummy_input = T.ones(4, 8, 128).float()
@@ -173,16 +171,14 @@ class SSIMLossTests(unittest.TestCase):
         dummy_input = T.ones(4, 8, 128).float()
         dummy_target = T.zeros(4, 8, 128).float()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert abs(output.item() - 1.0) < 1e-4, "1.0 vs {}".format(output.item())
 
         dummy_input = T.rand(4, 8, 128).float()
         dummy_target = dummy_input.detach()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert output.item() == 0, "0 vs {}".format(output.item())
 
@@ -206,15 +202,13 @@ class SSIMLossTests(unittest.TestCase):
         dummy_input = T.ones(4, 8, 128).float()
         dummy_target = T.zeros(4, 8, 128).float()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item())
 
         dummy_input = T.rand(4, 8, 128).float()
         dummy_target = dummy_input.detach()
         dummy_length = (T.arange(5, 9)).long()
-        mask = (
-            (sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
         output = layer(dummy_input + mask, dummy_target, dummy_length)
         assert output.item() == 0, "0 vs {}".format(output.item())
diff --git a/tests/test_loader.py b/tests/test_loader.py
index e711cc03..6174865b 100644
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@@ -12,11 +12,11 @@ from TTS.tts.datasets.preprocess import ljspeech
 from TTS.utils.audio import AudioProcessor
 from TTS.utils.io import load_config
 
-#pylint: disable=unused-variable
+# pylint: disable=unused-variable
 
 OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
 os.makedirs(OUTPATH, exist_ok=True)
-c = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+c = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 ok_ljspeech = os.path.exists(c.data_path)
 
 DATA_EXIST = True
@@ -33,25 +33,27 @@ class TestTTSDataset(unittest.TestCase):
         self.ap = AudioProcessor(**c.audio)
 
     def _create_dataloader(self, batch_size, r, bgs):
-        items = ljspeech(c.data_path, 'metadata.csv')
+        items = ljspeech(c.data_path, "metadata.csv")
         dataset = TTSDataset.MyDataset(
             r,
             c.text_cleaner,
             compute_linear_spec=True,
             ap=self.ap,
             meta_data=items,
-            tp=c.characters if 'characters' in c.keys() else None,
+            tp=c.characters if "characters" in c.keys() else None,
             batch_group_size=bgs,
             min_seq_len=c.min_seq_len,
             max_seq_len=float("inf"),
-            use_phonemes=False)
+            use_phonemes=False,
+        )
         dataloader = DataLoader(
             dataset,
             batch_size=batch_size,
             shuffle=False,
             collate_fn=dataset.collate_fn,
             drop_last=True,
-            num_workers=c.num_loader_workers)
+            num_workers=c.num_loader_workers,
+        )
         return dataloader, dataset
 
     def test_loader(self):
@@ -72,18 +74,17 @@ class TestTTSDataset(unittest.TestCase):
 
                 neg_values = text_input[text_input < 0]
                 check_count = len(neg_values)
-                assert check_count == 0, \
-                    " !! Negative values in text_input: {}".format(check_count)
+                assert check_count == 0, " !! Negative values in text_input: {}".format(check_count)
                 # TODO: more assertion here
                 assert isinstance(speaker_name[0], str)
                 assert linear_input.shape[0] == c.batch_size
                 assert linear_input.shape[2] == self.ap.fft_size // 2 + 1
                 assert mel_input.shape[0] == c.batch_size
-                assert mel_input.shape[2] == c.audio['num_mels']
+                assert mel_input.shape[2] == c.audio["num_mels"]
                 # check normalization ranges
                 if self.ap.symmetric_norm:
                     assert mel_input.max() <= self.ap.max_norm
-                    assert mel_input.min() >= -self.ap.max_norm  #pylint: disable=invalid-unary-operand-type
+                    assert mel_input.min() >= -self.ap.max_norm  # pylint: disable=invalid-unary-operand-type
                     assert mel_input.min() < 0
                 else:
                     assert mel_input.max() <= self.ap.max_norm
@@ -134,7 +135,7 @@ class TestTTSDataset(unittest.TestCase):
 
                 # check mel_spec consistency
                 wav = np.asarray(self.ap.load_wav(item_idx[0]), dtype=np.float32)
-                mel = self.ap.melspectrogram(wav).astype('float32')
+                mel = self.ap.melspectrogram(wav).astype("float32")
                 mel = torch.FloatTensor(mel).contiguous()
                 mel_dl = mel_input[0]
                 # NOTE: Below needs to check == 0 but due to an unknown reason
@@ -145,15 +146,14 @@ class TestTTSDataset(unittest.TestCase):
                 # check mel-spec correctness
                 mel_spec = mel_input[0].cpu().numpy()
                 wav = self.ap.inv_melspectrogram(mel_spec.T)
-                self.ap.save_wav(wav, OUTPATH + '/mel_inv_dataloader.wav')
-                shutil.copy(item_idx[0], OUTPATH + '/mel_target_dataloader.wav')
+                self.ap.save_wav(wav, OUTPATH + "/mel_inv_dataloader.wav")
+                shutil.copy(item_idx[0], OUTPATH + "/mel_target_dataloader.wav")
 
                 # check linear-spec
                 linear_spec = linear_input[0].cpu().numpy()
                 wav = self.ap.inv_spectrogram(linear_spec.T)
-                self.ap.save_wav(wav, OUTPATH + '/linear_inv_dataloader.wav')
-                shutil.copy(item_idx[0],
-                            OUTPATH + '/linear_target_dataloader.wav')
+                self.ap.save_wav(wav, OUTPATH + "/linear_inv_dataloader.wav")
+                shutil.copy(item_idx[0], OUTPATH + "/linear_target_dataloader.wav")
 
                 # check the last time step to be zero padded
                 assert linear_input[0, -1].sum() != 0
@@ -202,8 +202,8 @@ class TestTTSDataset(unittest.TestCase):
                 # check the second itme in the batch
                 assert linear_input[1 - idx, -1].sum() == 0
                 assert mel_input[1 - idx, -1].sum() == 0
-                assert stop_target[1, mel_lengths[1]-1] == 1
-                assert stop_target[1, mel_lengths[1]:].sum() == 0
+                assert stop_target[1, mel_lengths[1] - 1] == 1
+                assert stop_target[1, mel_lengths[1] :].sum() == 0
                 assert len(mel_lengths.shape) == 1
 
                 # check batch zero-frame conditions (zero-frame disabled)
diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py
index c120018d..968e2a29 100644
--- a/tests/test_preprocessors.py
+++ b/tests/test_preprocessors.py
@@ -6,12 +6,11 @@ from TTS.tts.datasets.preprocess import common_voice
 
 
 class TestPreprocessors(unittest.TestCase):
-
-    def test_common_voice_preprocessor(self):  #pylint: disable=no-self-use
+    def test_common_voice_preprocessor(self):  # pylint: disable=no-self-use
         root_path = get_tests_input_path()
         meta_file = "common_voice.tsv"
         items = common_voice(root_path, meta_file)
-        assert items[0][0] == 'The applicants are invited for coffee and visa is given immediately.'
+        assert items[0][0] == "The applicants are invited for coffee and visa is given immediately."
         assert items[0][1] == os.path.join(get_tests_input_path(), "clips", "common_voice_en_20005954.wav")
 
         assert items[-1][0] == "Competition for limited resources has also resulted in some local conflicts."
diff --git a/tests/test_speaker_encoder.py b/tests/test_speaker_encoder.py
index 77f3b54c..32ba2924 100644
--- a/tests/test_speaker_encoder.py
+++ b/tests/test_speaker_encoder.py
@@ -17,9 +17,7 @@ class SpeakerEncoderTests(unittest.TestCase):
     def test_in_out(self):
         dummy_input = T.rand(4, 20, 80)  # B x T x D
         dummy_hidden = [T.rand(2, 4, 128), T.rand(2, 4, 128)]
-        model = SpeakerEncoder(
-            input_dim=80, proj_dim=256, lstm_dim=768, num_lstm_layers=3
-        )
+        model = SpeakerEncoder(input_dim=80, proj_dim=256, lstm_dim=768, num_lstm_layers=3)
         # computing d vectors
         output = model.forward(dummy_input)
         assert output.shape[0] == 4
@@ -36,9 +34,7 @@ class SpeakerEncoderTests(unittest.TestCase):
         output_norm = T.nn.functional.normalize(output, dim=1, p=2)
         assert_diff = (output_norm - output).sum().item()
         assert output.type() == "torch.FloatTensor"
-        assert (
-            abs(assert_diff) < 1e-4
-        ), f" [!] output_norm has wrong values - {assert_diff}"
+        assert abs(assert_diff) < 1e-4, f" [!] output_norm has wrong values - {assert_diff}"
         # compute d for a given batch
         dummy_input = T.rand(1, 240, 80)  # B x T x D
         output = model.compute_embedding(dummy_input, num_frames=160, overlap=0.5)
@@ -74,6 +70,7 @@ class GE2ELossTests(unittest.TestCase):
         output = loss.forward(dummy_input)
         assert output.item() < 0.005
 
+
 class AngleProtoLossTests(unittest.TestCase):
     # pylint: disable=R0201
     def test_in_out(self):
@@ -103,6 +100,7 @@ class AngleProtoLossTests(unittest.TestCase):
         output = loss.forward(dummy_input)
         assert output.item() < 0.005
 
+
 # class LoaderTest(unittest.TestCase):
 #     def test_output(self):
 #         items = libri_tts("/home/erogol/Data/Libri-TTS/train-clean-360/")
diff --git a/tests/test_speedy_speech_layers.py b/tests/test_speedy_speech_layers.py
index 51a2450a..3473769b 100644
--- a/tests/test_speedy_speech_layers.py
+++ b/tests/test_speedy_speech_layers.py
@@ -10,11 +10,10 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 def test_duration_predictor():
     input_dummy = torch.rand(8, 128, 27).to(device)
-    input_lengths = torch.randint(20, 27, (8, )).long().to(device)
+    input_lengths = torch.randint(20, 27, (8,)).long().to(device)
     input_lengths[-1] = 27
 
-    x_mask = torch.unsqueeze(sequence_mask(input_lengths, input_dummy.size(2)),
-                             1).to(device)
+    x_mask = torch.unsqueeze(sequence_mask(input_lengths, input_dummy.size(2)), 1).to(device)
 
     layer = DurationPredictor(hidden_channels=128).to(device)
 
@@ -29,7 +28,7 @@ def test_speedy_speech():
     T_de = 74
 
     x_dummy = torch.randint(0, 7, (B, T_en)).long().to(device)
-    x_lengths = torch.randint(31, T_en, (B, )).long().to(device)
+    x_lengths = torch.randint(31, T_en, (B,)).long().to(device)
     x_lengths[-1] = T_en
 
     # set durations. max total duration should be equal to T_de
@@ -53,34 +52,18 @@ def test_speedy_speech():
     assert list(o_dr.shape) == [B, T_en]
 
     # with speaker embedding
-    model = SpeedySpeech(num_chars,
-                         out_channels=80,
-                         hidden_channels=128,
-                         num_speakers=10,
-                         c_in_channels=256).to(device)
-    model.forward(x_dummy,
-                  x_lengths,
-                  y_lengths,
-                  durations,
-                  g=torch.randint(0, 10, (B,)).to(device))
+    model = SpeedySpeech(num_chars, out_channels=80, hidden_channels=128, num_speakers=10, c_in_channels=256).to(device)
+    model.forward(x_dummy, x_lengths, y_lengths, durations, g=torch.randint(0, 10, (B,)).to(device))
 
     assert list(o_de.shape) == [B, 80, T_de], f"{list(o_de.shape)}"
     assert list(attn.shape) == [B, T_de, T_en]
     assert list(o_dr.shape) == [B, T_en]
 
-
     # with speaker external embedding
-    model = SpeedySpeech(num_chars,
-                         out_channels=80,
-                         hidden_channels=128,
-                         num_speakers=10,
-                         external_c=True,
-                         c_in_channels=256).to(device)
-    model.forward(x_dummy,
-                  x_lengths,
-                  y_lengths,
-                  durations,
-                  g=torch.rand((B, 256)).to(device))
+    model = SpeedySpeech(
+        num_chars, out_channels=80, hidden_channels=128, num_speakers=10, external_c=True, c_in_channels=256
+    ).to(device)
+    model.forward(x_dummy, x_lengths, y_lengths, durations, g=torch.rand((B, 256)).to(device))
 
     assert list(o_de.shape) == [B, 80, T_de], f"{list(o_de.shape)}"
     assert list(attn.shape) == [B, T_de, T_en]
diff --git a/tests/test_symbols.py b/tests/test_symbols.py
index 0c24f124..49b25986 100644
--- a/tests/test_symbols.py
+++ b/tests/test_symbols.py
@@ -4,5 +4,5 @@ from TTS.tts.utils.text import phonemes
 
 
 class SymbolsTest(unittest.TestCase):
-    def test_uniqueness(self):  #pylint: disable=no-self-use
+    def test_uniqueness(self):  # pylint: disable=no-self-use
         assert sorted(phonemes) == sorted(list(set(phonemes))), " {} vs {} ".format(len(phonemes), len(set(phonemes)))
diff --git a/tests/test_synthesizer.py b/tests/test_synthesizer.py
index 1c2c23b2..46b9ab74 100644
--- a/tests/test_synthesizer.py
+++ b/tests/test_synthesizer.py
@@ -14,8 +14,8 @@ class SynthesizerTest(unittest.TestCase):
     def _create_random_model(self):
         # pylint: disable=global-statement
         global symbols, phonemes
-        config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json'))
-        if 'characters' in config.keys():
+        config = load_config(os.path.join(get_tests_output_path(), "dummy_model_config.json"))
+        if "characters" in config.keys():
             symbols, phonemes = make_symbols(**config.characters)
 
         num_chars = len(phonemes) if config.use_phonemes else len(symbols)
@@ -25,11 +25,11 @@ class SynthesizerTest(unittest.TestCase):
 
     def test_in_out(self):
         self._create_random_model()
-        config = load_config(os.path.join(get_tests_input_path(), 'server_config.json'))
+        config = load_config(os.path.join(get_tests_input_path(), "server_config.json"))
         tts_root_path = get_tests_output_path()
-        config['tts_checkpoint'] = os.path.join(tts_root_path, config['tts_checkpoint'])
-        config['tts_config'] = os.path.join(tts_root_path, config['tts_config'])
-        synthesizer = Synthesizer(config['tts_checkpoint'], config['tts_config'], None, None)
+        config["tts_checkpoint"] = os.path.join(tts_root_path, config["tts_checkpoint"])
+        config["tts_config"] = os.path.join(tts_root_path, config["tts_config"])
+        synthesizer = Synthesizer(config["tts_checkpoint"], config["tts_config"], None, None)
         synthesizer.tts("Better this test works!!")
 
     def test_split_into_sentences(self):
@@ -38,20 +38,48 @@ class SynthesizerTest(unittest.TestCase):
         # pylint: disable=attribute-defined-outside-init
         self.seg = Synthesizer.get_segmenter("en")
         sis = Synthesizer.split_into_sentences
-        assert sis(self, 'Hello. Two sentences') == ['Hello.', 'Two sentences']
-        assert sis(self, 'He went to meet the adviser from Scott, Waltman & Co. next morning.') == ['He went to meet the adviser from Scott, Waltman & Co. next morning.']
-        assert sis(self, 'Let\'s run it past Sarah and co. They\'ll want to see this.') == ['Let\'s run it past Sarah and co.', 'They\'ll want to see this.']
-        assert sis(self, 'Where is Bobby Jr.\'s rabbit?') == ['Where is Bobby Jr.\'s rabbit?']
-        assert sis(self, 'Please inform the U.K. authorities right away.') == ['Please inform the U.K. authorities right away.']
-        assert sis(self, 'Were David and co. at the event?') == ['Were David and co. at the event?']
-        assert sis(self, 'paging dr. green, please come to theatre four immediately.') == ['paging dr. green, please come to theatre four immediately.']
-        assert sis(self, 'The email format is Firstname.Lastname@example.com. I think you reversed them.') == ['The email format is Firstname.Lastname@example.com.', 'I think you reversed them.']
-        assert sis(self, 'The demo site is: https://top100.example.com/subsection/latestnews.html. Please send us your feedback.') == ['The demo site is: https://top100.example.com/subsection/latestnews.html.', 'Please send us your feedback.']
-        assert sis(self, 'Scowling at him, \'You are not done yet!\' she yelled.') == ['Scowling at him, \'You are not done yet!\' she yelled.'] # with the  final lowercase "she" we see it's all one sentence
-        assert sis(self, 'Hey!! So good to see you.') == ['Hey!!', 'So good to see you.']
-        assert sis(self, 'He went to Yahoo! but I don\'t know the division.') == ['He went to Yahoo! but I don\'t know the division.']
-        assert sis(self, 'If you can\'t remember a quote, “at least make up a memorable one that\'s plausible..."') == ['If you can\'t remember a quote, “at least make up a memorable one that\'s plausible..."']
-        assert sis(self, 'The address is not google.com.') == ['The address is not google.com.']
-        assert sis(self, '1.) The first item 2.) The second item') == ['1.) The first item', '2.) The second item']
-        assert sis(self, '1) The first item 2) The second item') == ['1) The first item', '2) The second item']
-        assert sis(self, 'a. The first item b. The second item c. The third list item') == ['a. The first item', 'b. The second item', 'c. The third list item']
+        assert sis(self, "Hello. Two sentences") == ["Hello.", "Two sentences"]
+        assert sis(self, "He went to meet the adviser from Scott, Waltman & Co. next morning.") == [
+            "He went to meet the adviser from Scott, Waltman & Co. next morning."
+        ]
+        assert sis(self, "Let's run it past Sarah and co. They'll want to see this.") == [
+            "Let's run it past Sarah and co.",
+            "They'll want to see this.",
+        ]
+        assert sis(self, "Where is Bobby Jr.'s rabbit?") == ["Where is Bobby Jr.'s rabbit?"]
+        assert sis(self, "Please inform the U.K. authorities right away.") == [
+            "Please inform the U.K. authorities right away."
+        ]
+        assert sis(self, "Were David and co. at the event?") == ["Were David and co. at the event?"]
+        assert sis(self, "paging dr. green, please come to theatre four immediately.") == [
+            "paging dr. green, please come to theatre four immediately."
+        ]
+        assert sis(self, "The email format is Firstname.Lastname@example.com. I think you reversed them.") == [
+            "The email format is Firstname.Lastname@example.com.",
+            "I think you reversed them.",
+        ]
+        assert sis(
+            self,
+            "The demo site is: https://top100.example.com/subsection/latestnews.html. Please send us your feedback.",
+        ) == [
+            "The demo site is: https://top100.example.com/subsection/latestnews.html.",
+            "Please send us your feedback.",
+        ]
+        assert sis(self, "Scowling at him, 'You are not done yet!' she yelled.") == [
+            "Scowling at him, 'You are not done yet!' she yelled."
+        ]  # with the  final lowercase "she" we see it's all one sentence
+        assert sis(self, "Hey!! So good to see you.") == ["Hey!!", "So good to see you."]
+        assert sis(self, "He went to Yahoo! but I don't know the division.") == [
+            "He went to Yahoo! but I don't know the division."
+        ]
+        assert sis(self, "If you can't remember a quote, “at least make up a memorable one that's plausible...\"") == [
+            "If you can't remember a quote, “at least make up a memorable one that's plausible...\""
+        ]
+        assert sis(self, "The address is not google.com.") == ["The address is not google.com."]
+        assert sis(self, "1.) The first item 2.) The second item") == ["1.) The first item", "2.) The second item"]
+        assert sis(self, "1) The first item 2) The second item") == ["1) The first item", "2) The second item"]
+        assert sis(self, "a. The first item b. The second item c. The third list item") == [
+            "a. The first item",
+            "b. The second item",
+            "c. The third list item",
+        ]
diff --git a/tests/test_tacotron2_model.py b/tests/test_tacotron2_model.py
index fb811eaa..0e35605f 100644
--- a/tests/test_tacotron2_model.py
+++ b/tests/test_tacotron2_model.py
@@ -11,13 +11,13 @@ from TTS.tts.models.tacotron2 import Tacotron2
 from TTS.utils.audio import AudioProcessor
 from TTS.utils.io import load_config
 
-#pylint: disable=unused-variable
+# pylint: disable=unused-variable
 
 torch.manual_seed(1)
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-c = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+c = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 
 ap = AudioProcessor(**c.audio)
 WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
@@ -26,20 +26,19 @@ WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
 class TacotronTrainTest(unittest.TestCase):
     def test_train_step(self):  # pylint: disable=no-self-use
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 128, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 128, (8,)).long().to(device)
         input_lengths = torch.sort(input_lengths, descending=True)[0]
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_postnet_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         mel_lengths[0] = 30
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = MSELossMasked(seq_len_norm=False).to(device)
@@ -48,14 +47,14 @@ class TacotronTrainTest(unittest.TestCase):
         model.train()
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for i in range(5):
             mel_out, mel_postnet_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids
+            )
             assert torch.sigmoid(stop_tokens).data.max() <= 1.0
             assert torch.sigmoid(stop_tokens).data.min() >= 0.0
             optimizer.zero_grad()
@@ -66,13 +65,12 @@ class TacotronTrainTest(unittest.TestCase):
             optimizer.step()
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
 
 
@@ -80,20 +78,19 @@ class MultiSpeakeTacotronTrainTest(unittest.TestCase):
     @staticmethod
     def test_train_step():
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 128, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 128, (8,)).long().to(device)
         input_lengths = torch.sort(input_lengths, descending=True)[0]
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_postnet_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         mel_lengths[0] = 30
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
         speaker_embeddings = torch.rand(8, 55).to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = MSELossMasked(seq_len_norm=False).to(device)
@@ -102,14 +99,14 @@ class MultiSpeakeTacotronTrainTest(unittest.TestCase):
         model.train()
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for i in range(5):
             mel_out, mel_postnet_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings
+            )
             assert torch.sigmoid(stop_tokens).data.max() <= 1.0
             assert torch.sigmoid(stop_tokens).data.min() >= 0.0
             optimizer.zero_grad()
@@ -120,39 +117,46 @@ class MultiSpeakeTacotronTrainTest(unittest.TestCase):
             optimizer.step()
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
 
+
 class TacotronGSTTrainTest(unittest.TestCase):
-    #pylint: disable=no-self-use
+    # pylint: disable=no-self-use
     def test_train_step(self):
         # with random gst mel style
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 128, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 128, (8,)).long().to(device)
         input_lengths = torch.sort(input_lengths, descending=True)[0]
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_postnet_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         mel_lengths[0] = 30
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = MSELossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
-        model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, gst=True, gst_embedding_dim=c.gst['gst_embedding_dim'], gst_num_heads=c.gst['gst_num_heads'], gst_style_tokens=c.gst['gst_style_tokens']).to(device)
+        model = Tacotron2(
+            num_chars=24,
+            r=c.r,
+            num_speakers=5,
+            gst=True,
+            gst_embedding_dim=c.gst["gst_embedding_dim"],
+            gst_num_heads=c.gst["gst_num_heads"],
+            gst_style_tokens=c.gst["gst_style_tokens"],
+        ).to(device)
         model.train()
         model_ref = copy.deepcopy(model)
         count = 0
@@ -162,7 +166,8 @@ class TacotronGSTTrainTest(unittest.TestCase):
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for i in range(10):
             mel_out, mel_postnet_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids
+            )
             assert torch.sigmoid(stop_tokens).data.max() <= 1.0
             assert torch.sigmoid(stop_tokens).data.min() >= 0.0
             optimizer.zero_grad()
@@ -177,36 +182,45 @@ class TacotronGSTTrainTest(unittest.TestCase):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
             name, param = name_param
-            if name == 'gst_layer.encoder.recurrence.weight_hh_l0':
-                #print(param.grad)
+            if name == "gst_layer.encoder.recurrence.weight_hh_l0":
+                # print(param.grad)
                 continue
-            assert (param != param_ref).any(
-            ), "param {} {} with shape {} not updated!! \n{}\n{}".format(
-                name, count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} {} with shape {} not updated!! \n{}\n{}".format(
+                name, count, param.shape, param, param_ref
+            )
             count += 1
 
         # with file gst style
-        mel_spec = torch.FloatTensor(ap.melspectrogram(ap.load_wav(WAV_FILE)))[:, :30].unsqueeze(0).transpose(1, 2).to(device)
+        mel_spec = (
+            torch.FloatTensor(ap.melspectrogram(ap.load_wav(WAV_FILE)))[:, :30].unsqueeze(0).transpose(1, 2).to(device)
+        )
         mel_spec = mel_spec.repeat(8, 1, 1)
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 128, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 128, (8,)).long().to(device)
         input_lengths = torch.sort(input_lengths, descending=True)[0]
-        mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_postnet_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         mel_lengths[0] = 30
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = MSELossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
-        model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, gst=True, gst_embedding_dim=c.gst['gst_embedding_dim'], gst_num_heads=c.gst['gst_num_heads'], gst_style_tokens=c.gst['gst_style_tokens']).to(device)
+        model = Tacotron2(
+            num_chars=24,
+            r=c.r,
+            num_speakers=5,
+            gst=True,
+            gst_embedding_dim=c.gst["gst_embedding_dim"],
+            gst_num_heads=c.gst["gst_num_heads"],
+            gst_style_tokens=c.gst["gst_style_tokens"],
+        ).to(device)
         model.train()
         model_ref = copy.deepcopy(model)
         count = 0
@@ -216,7 +230,8 @@ class TacotronGSTTrainTest(unittest.TestCase):
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for i in range(10):
             mel_out, mel_postnet_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids
+            )
             assert torch.sigmoid(stop_tokens).data.max() <= 1.0
             assert torch.sigmoid(stop_tokens).data.min() >= 0.0
             optimizer.zero_grad()
@@ -231,47 +246,57 @@ class TacotronGSTTrainTest(unittest.TestCase):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
             name, param = name_param
-            if name == 'gst_layer.encoder.recurrence.weight_hh_l0':
-                #print(param.grad)
+            if name == "gst_layer.encoder.recurrence.weight_hh_l0":
+                # print(param.grad)
                 continue
-            assert (param != param_ref).any(
-            ), "param {} {} with shape {} not updated!! \n{}\n{}".format(
-                name, count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} {} with shape {} not updated!! \n{}\n{}".format(
+                name, count, param.shape, param, param_ref
+            )
             count += 1
 
+
 class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
     @staticmethod
     def test_train_step():
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 128, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 128, (8,)).long().to(device)
         input_lengths = torch.sort(input_lengths, descending=True)[0]
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_postnet_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         mel_lengths[0] = 30
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
         speaker_embeddings = torch.rand(8, 55).to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
         criterion = MSELossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
-        model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, speaker_embedding_dim=55, gst=True, gst_embedding_dim=c.gst['gst_embedding_dim'], gst_num_heads=c.gst['gst_num_heads'], gst_style_tokens=c.gst['gst_style_tokens'], gst_use_speaker_embedding=c.gst['gst_use_speaker_embedding']).to(device)
+        model = Tacotron2(
+            num_chars=24,
+            r=c.r,
+            num_speakers=5,
+            speaker_embedding_dim=55,
+            gst=True,
+            gst_embedding_dim=c.gst["gst_embedding_dim"],
+            gst_num_heads=c.gst["gst_num_heads"],
+            gst_style_tokens=c.gst["gst_style_tokens"],
+            gst_use_speaker_embedding=c.gst["gst_use_speaker_embedding"],
+        ).to(device)
         model.train()
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for i in range(5):
             mel_out, mel_postnet_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings
+            )
             assert torch.sigmoid(stop_tokens).data.max() <= 1.0
             assert torch.sigmoid(stop_tokens).data.min() >= 0.0
             optimizer.zero_grad()
@@ -282,14 +307,13 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
             optimizer.step()
         # check parameter changes
         count = 0
-        for name_param, param_ref in zip(model.named_parameters(),
-                                         model_ref.parameters()):
+        for name_param, param_ref in zip(model.named_parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
             name, param = name_param
-            if name == 'gst_layer.encoder.recurrence.weight_hh_l0':
+            if name == "gst_layer.encoder.recurrence.weight_hh_l0":
                 continue
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
diff --git a/tests/test_tacotron2_tf_model.py b/tests/test_tacotron2_tf_model.py
index 084b972d..767e5ffc 100644
--- a/tests/test_tacotron2_tf_model.py
+++ b/tests/test_tacotron2_tf_model.py
@@ -10,48 +10,51 @@ from TTS.tts.tf.models.tacotron2 import Tacotron2
 from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
 from TTS.utils.io import load_config
 
-tf.get_logger().setLevel('INFO')
+tf.get_logger().setLevel("INFO")
 
 
-
-#pylint: disable=unused-variable
+# pylint: disable=unused-variable
 
 torch.manual_seed(1)
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-c = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+c = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 
 
 class TacotronTFTrainTest(unittest.TestCase):
-
     @staticmethod
     def generate_dummy_inputs():
         chars_seq = torch.randint(0, 24, (8, 128)).long().to(device)
-        chars_seq_lengths = torch.randint(100, 128, (8, )).long().to(device)
+        chars_seq_lengths = torch.randint(100, 128, (8,)).long().to(device)
         chars_seq_lengths = torch.sort(chars_seq_lengths, descending=True)[0]
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_postnet_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         chars_seq = tf.convert_to_tensor(chars_seq.cpu().numpy())
         chars_seq_lengths = tf.convert_to_tensor(chars_seq_lengths.cpu().numpy())
         mel_spec = tf.convert_to_tensor(mel_spec.cpu().numpy())
-        return chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\
-            stop_targets, speaker_ids
+        return chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths, stop_targets, speaker_ids
 
     def test_train_step(self):
-        ''' test forward pass '''
-        chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\
-            stop_targets, speaker_ids = self.generate_dummy_inputs()
+        """ test forward pass """
+        (
+            chars_seq,
+            chars_seq_lengths,
+            mel_spec,
+            mel_postnet_spec,
+            mel_lengths,
+            stop_targets,
+            speaker_ids,
+        ) = self.generate_dummy_inputs()
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(chars_seq.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = stop_targets.view(chars_seq.shape[0], stop_targets.size(1) // c.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         model = Tacotron2(num_chars=24, r=c.r, num_speakers=5)
@@ -68,15 +71,23 @@ class TacotronTFTrainTest(unittest.TestCase):
         # inference pass
         output = model(chars_seq, training=False)
 
-    def test_forward_attention(self,):
-        chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\
-            stop_targets, speaker_ids = self.generate_dummy_inputs()
+    def test_forward_attention(
+        self,
+    ):
+        (
+            chars_seq,
+            chars_seq_lengths,
+            mel_spec,
+            mel_postnet_spec,
+            mel_lengths,
+            stop_targets,
+            speaker_ids,
+        ) = self.generate_dummy_inputs()
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(chars_seq.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = stop_targets.view(chars_seq.shape[0], stop_targets.size(1) // c.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, forward_attn=True)
@@ -93,45 +104,51 @@ class TacotronTFTrainTest(unittest.TestCase):
         # inference pass
         output = model(chars_seq, training=False)
 
-    def test_tflite_conversion(self, ):  #pylint:disable=no-self-use
-        model = Tacotron2(num_chars=24,
-                          num_speakers=0,
-                          r=3,
-                          postnet_output_dim=80,
-                          decoder_output_dim=80,
-                          attn_type='original',
-                          attn_win=False,
-                          attn_norm='sigmoid',
-                          prenet_type='original',
-                          prenet_dropout=True,
-                          forward_attn=False,
-                          trans_agent=False,
-                          forward_attn_mask=False,
-                          location_attn=True,
-                          attn_K=0,
-                          separate_stopnet=True,
-                          bidirectional_decoder=False,
-                          enable_tflite=True)
+    def test_tflite_conversion(
+        self,
+    ):  # pylint:disable=no-self-use
+        model = Tacotron2(
+            num_chars=24,
+            num_speakers=0,
+            r=3,
+            postnet_output_dim=80,
+            decoder_output_dim=80,
+            attn_type="original",
+            attn_win=False,
+            attn_norm="sigmoid",
+            prenet_type="original",
+            prenet_dropout=True,
+            forward_attn=False,
+            trans_agent=False,
+            forward_attn_mask=False,
+            location_attn=True,
+            attn_K=0,
+            separate_stopnet=True,
+            bidirectional_decoder=False,
+            enable_tflite=True,
+        )
         model.build_inference()
-        convert_tacotron2_to_tflite(model, output_path='test_tacotron2.tflite', experimental_converter=True)
+        convert_tacotron2_to_tflite(model, output_path="test_tacotron2.tflite", experimental_converter=True)
         # init tflite model
-        tflite_model = load_tflite_model('test_tacotron2.tflite')
+        tflite_model = load_tflite_model("test_tacotron2.tflite")
         # fake input
-        inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32)  #pylint:disable=unexpected-keyword-arg
+        inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32)  # pylint:disable=unexpected-keyword-arg
         # run inference
         # get input and output details
         input_details = tflite_model.get_input_details()
         output_details = tflite_model.get_output_details()
         # reshape input tensor for the new input shape
-        tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape)  #pylint:disable=unexpected-keyword-arg
+        tflite_model.resize_tensor_input(
+            input_details[0]["index"], inputs.shape
+        )  # pylint:disable=unexpected-keyword-arg
         tflite_model.allocate_tensors()
         detail = input_details[0]
-        input_shape = detail['shape']
-        tflite_model.set_tensor(detail['index'], inputs)
+        input_shape = detail["shape"]
+        tflite_model.set_tensor(detail["index"], inputs)
         # run the tflite_model
         tflite_model.invoke()
         # collect outputs
-        decoder_output = tflite_model.get_tensor(output_details[0]['index'])
-        postnet_output = tflite_model.get_tensor(output_details[1]['index'])
+        decoder_output = tflite_model.get_tensor(output_details[0]["index"])
+        postnet_output = tflite_model.get_tensor(output_details[1]["index"])
         # remove tflite binary
-        os.remove('test_tacotron2.tflite')
+        os.remove("test_tacotron2.tflite")
diff --git a/tests/test_tacotron_model.py b/tests/test_tacotron_model.py
index 0af8dab4..e3ed8ae2 100644
--- a/tests/test_tacotron_model.py
+++ b/tests/test_tacotron_model.py
@@ -11,13 +11,13 @@ from TTS.tts.models.tacotron import Tacotron
 from TTS.utils.audio import AudioProcessor
 from TTS.utils.io import load_config
 
-#pylint: disable=unused-variable
+# pylint: disable=unused-variable
 
 torch.manual_seed(1)
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-c = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+c = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 
 ap = AudioProcessor(**c.audio)
 WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
@@ -32,147 +32,140 @@ class TacotronTrainTest(unittest.TestCase):
     @staticmethod
     def test_train_step():
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 129, (8,)).long().to(device)
         input_lengths[-1] = 128
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        linear_spec = torch.rand(8, 30, c.audio["fft_size"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
-        stop_targets = (stop_targets.sum(2) >
-                        0.0).unsqueeze(2).float().squeeze()
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = L1LossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
         model = Tacotron(
             num_chars=32,
             num_speakers=5,
-            postnet_output_dim=c.audio['fft_size'],
-            decoder_output_dim=c.audio['num_mels'],
+            postnet_output_dim=c.audio["fft_size"],
+            decoder_output_dim=c.audio["num_mels"],
             r=c.r,
-            memory_size=c.memory_size
-        ).to(device)  #FIXME: missing num_speakers parameter to Tacotron ctor
+            memory_size=c.memory_size,
+        ).to(
+            device
+        )  # FIXME: missing num_speakers parameter to Tacotron ctor
         model.train()
-        print(" > Num parameters for Tacotron model:%s" %
-              (count_parameters(model)))
+        print(" > Num parameters for Tacotron model:%s" % (count_parameters(model)))
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for _ in range(5):
             mel_out, linear_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids
+            )
             optimizer.zero_grad()
             loss = criterion(mel_out, mel_spec, mel_lengths)
             stop_loss = criterion_st(stop_tokens, stop_targets)
-            loss = loss + criterion(linear_out, linear_spec,
-                                    mel_lengths) + stop_loss
+            loss = loss + criterion(linear_out, linear_spec, mel_lengths) + stop_loss
             loss.backward()
             optimizer.step()
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
 
+
 class MultiSpeakeTacotronTrainTest(unittest.TestCase):
     @staticmethod
     def test_train_step():
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 129, (8,)).long().to(device)
         input_lengths[-1] = 128
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        linear_spec = torch.rand(8, 30, c.audio["fft_size"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
         speaker_embeddings = torch.rand(8, 55).to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
-        stop_targets = (stop_targets.sum(2) >
-                        0.0).unsqueeze(2).float().squeeze()
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = L1LossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
         model = Tacotron(
             num_chars=32,
             num_speakers=5,
-            postnet_output_dim=c.audio['fft_size'],
-            decoder_output_dim=c.audio['num_mels'],
+            postnet_output_dim=c.audio["fft_size"],
+            decoder_output_dim=c.audio["num_mels"],
             r=c.r,
             memory_size=c.memory_size,
             speaker_embedding_dim=55,
-        ).to(device)  #FIXME: missing num_speakers parameter to Tacotron ctor
+        ).to(
+            device
+        )  # FIXME: missing num_speakers parameter to Tacotron ctor
         model.train()
-        print(" > Num parameters for Tacotron model:%s" %
-              (count_parameters(model)))
+        print(" > Num parameters for Tacotron model:%s" % (count_parameters(model)))
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for _ in range(5):
             mel_out, linear_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths,
-                speaker_embeddings=speaker_embeddings)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings
+            )
             optimizer.zero_grad()
             loss = criterion(mel_out, mel_spec, mel_lengths)
             stop_loss = criterion_st(stop_tokens, stop_targets)
-            loss = loss + criterion(linear_out, linear_spec,
-                                    mel_lengths) + stop_loss
+            loss = loss + criterion(linear_out, linear_spec, mel_lengths) + stop_loss
             loss.backward()
             optimizer.step()
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
 
+
 class TacotronGSTTrainTest(unittest.TestCase):
     @staticmethod
     def test_train_step():
         # with random gst mel style
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 129, (8,)).long().to(device)
         input_lengths[-1] = 128
-        mel_spec = torch.rand(8, 120, c.audio['num_mels']).to(device)
-        linear_spec = torch.rand(8, 120, c.audio['fft_size']).to(device)
-        mel_lengths = torch.randint(20, 120, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 120, c.audio["num_mels"]).to(device)
+        linear_spec = torch.rand(8, 120, c.audio["fft_size"]).to(device)
+        mel_lengths = torch.randint(20, 120, (8,)).long().to(device)
         mel_lengths[-1] = 120
         stop_targets = torch.zeros(8, 120, 1).float().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
-        stop_targets = (stop_targets.sum(2) >
-                        0.0).unsqueeze(2).float().squeeze()
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = L1LossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
@@ -180,65 +173,64 @@ class TacotronGSTTrainTest(unittest.TestCase):
             num_chars=32,
             num_speakers=5,
             gst=True,
-            gst_embedding_dim=c.gst['gst_embedding_dim'],
-            gst_num_heads=c.gst['gst_num_heads'],
-            gst_style_tokens=c.gst['gst_style_tokens'],
-            postnet_output_dim=c.audio['fft_size'],
-            decoder_output_dim=c.audio['num_mels'],
+            gst_embedding_dim=c.gst["gst_embedding_dim"],
+            gst_num_heads=c.gst["gst_num_heads"],
+            gst_style_tokens=c.gst["gst_style_tokens"],
+            postnet_output_dim=c.audio["fft_size"],
+            decoder_output_dim=c.audio["num_mels"],
             r=c.r,
-            memory_size=c.memory_size
-        ).to(device)  #FIXME: missing num_speakers parameter to Tacotron ctor
+            memory_size=c.memory_size,
+        ).to(
+            device
+        )  # FIXME: missing num_speakers parameter to Tacotron ctor
         model.train()
         # print(model)
-        print(" > Num parameters for Tacotron GST model:%s" %
-              (count_parameters(model)))
+        print(" > Num parameters for Tacotron GST model:%s" % (count_parameters(model)))
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for _ in range(10):
             mel_out, linear_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids
+            )
             optimizer.zero_grad()
             loss = criterion(mel_out, mel_spec, mel_lengths)
             stop_loss = criterion_st(stop_tokens, stop_targets)
-            loss = loss + criterion(linear_out, linear_spec,
-                                    mel_lengths) + stop_loss
+            loss = loss + criterion(linear_out, linear_spec, mel_lengths) + stop_loss
             loss.backward()
             optimizer.step()
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
 
         # with file gst style
-        mel_spec = torch.FloatTensor(ap.melspectrogram(ap.load_wav(WAV_FILE)))[:, :120].unsqueeze(0).transpose(1, 2).to(device)
+        mel_spec = (
+            torch.FloatTensor(ap.melspectrogram(ap.load_wav(WAV_FILE)))[:, :120].unsqueeze(0).transpose(1, 2).to(device)
+        )
         mel_spec = mel_spec.repeat(8, 1, 1)
 
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 129, (8,)).long().to(device)
         input_lengths[-1] = 128
-        linear_spec = torch.rand(8, mel_spec.size(1), c.audio['fft_size']).to(device)
-        mel_lengths = torch.randint(20, mel_spec.size(1), (8, )).long().to(device)
+        linear_spec = torch.rand(8, mel_spec.size(1), c.audio["fft_size"]).to(device)
+        mel_lengths = torch.randint(20, mel_spec.size(1), (8,)).long().to(device)
         mel_lengths[-1] = mel_spec.size(1)
         stop_targets = torch.zeros(8, mel_spec.size(1), 1).float().to(device)
-        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+        speaker_ids = torch.randint(0, 5, (8,)).long().to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
-        stop_targets = (stop_targets.sum(2) >
-                        0.0).unsqueeze(2).float().squeeze()
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = L1LossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
@@ -246,113 +238,109 @@ class TacotronGSTTrainTest(unittest.TestCase):
             num_chars=32,
             num_speakers=5,
             gst=True,
-            gst_embedding_dim=c.gst['gst_embedding_dim'],
-            gst_num_heads=c.gst['gst_num_heads'],
-            gst_style_tokens=c.gst['gst_style_tokens'],
-            postnet_output_dim=c.audio['fft_size'],
-            decoder_output_dim=c.audio['num_mels'],
+            gst_embedding_dim=c.gst["gst_embedding_dim"],
+            gst_num_heads=c.gst["gst_num_heads"],
+            gst_style_tokens=c.gst["gst_style_tokens"],
+            postnet_output_dim=c.audio["fft_size"],
+            decoder_output_dim=c.audio["num_mels"],
             r=c.r,
-            memory_size=c.memory_size
-        ).to(device)  #FIXME: missing num_speakers parameter to Tacotron ctor
+            memory_size=c.memory_size,
+        ).to(
+            device
+        )  # FIXME: missing num_speakers parameter to Tacotron ctor
         model.train()
         # print(model)
-        print(" > Num parameters for Tacotron GST model:%s" %
-              (count_parameters(model)))
+        print(" > Num parameters for Tacotron GST model:%s" % (count_parameters(model)))
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for _ in range(10):
             mel_out, linear_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids
+            )
             optimizer.zero_grad()
             loss = criterion(mel_out, mel_spec, mel_lengths)
             stop_loss = criterion_st(stop_tokens, stop_targets)
-            loss = loss + criterion(linear_out, linear_spec,
-                                    mel_lengths) + stop_loss
+            loss = loss + criterion(linear_out, linear_spec, mel_lengths) + stop_loss
             loss.backward()
             optimizer.step()
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
 
+
 class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
     @staticmethod
     def test_train_step():
         input_dummy = torch.randint(0, 24, (8, 128)).long().to(device)
-        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
+        input_lengths = torch.randint(100, 129, (8,)).long().to(device)
         input_lengths[-1] = 128
-        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
-        linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device)
-        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_spec = torch.rand(8, 30, c.audio["num_mels"]).to(device)
+        linear_spec = torch.rand(8, 30, c.audio["fft_size"]).to(device)
+        mel_lengths = torch.randint(20, 30, (8,)).long().to(device)
         mel_lengths[-1] = mel_spec.size(1)
         stop_targets = torch.zeros(8, 30, 1).float().to(device)
         speaker_embeddings = torch.rand(8, 55).to(device)
 
         for idx in mel_lengths:
-            stop_targets[:, int(idx.item()):, 0] = 1.0
+            stop_targets[:, int(idx.item()) :, 0] = 1.0
 
-        stop_targets = stop_targets.view(input_dummy.shape[0],
-                                         stop_targets.size(1) // c.r, -1)
-        stop_targets = (stop_targets.sum(2) >
-                        0.0).unsqueeze(2).float().squeeze()
+        stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
 
         criterion = L1LossMasked(seq_len_norm=False).to(device)
         criterion_st = nn.BCEWithLogitsLoss().to(device)
         model = Tacotron(
             num_chars=32,
             num_speakers=5,
-            postnet_output_dim=c.audio['fft_size'],
-            decoder_output_dim=c.audio['num_mels'],
+            postnet_output_dim=c.audio["fft_size"],
+            decoder_output_dim=c.audio["num_mels"],
             gst=True,
-            gst_embedding_dim=c.gst['gst_embedding_dim'],
-            gst_num_heads=c.gst['gst_num_heads'],
-            gst_style_tokens=c.gst['gst_style_tokens'],
-            gst_use_speaker_embedding=c.gst['gst_use_speaker_embedding'],
+            gst_embedding_dim=c.gst["gst_embedding_dim"],
+            gst_num_heads=c.gst["gst_num_heads"],
+            gst_style_tokens=c.gst["gst_style_tokens"],
+            gst_use_speaker_embedding=c.gst["gst_use_speaker_embedding"],
             r=c.r,
             memory_size=c.memory_size,
             speaker_embedding_dim=55,
-        ).to(device)  #FIXME: missing num_speakers parameter to Tacotron ctor
+        ).to(
+            device
+        )  # FIXME: missing num_speakers parameter to Tacotron ctor
         model.train()
-        print(" > Num parameters for Tacotron model:%s" %
-              (count_parameters(model)))
+        print(" > Num parameters for Tacotron model:%s" % (count_parameters(model)))
         model_ref = copy.deepcopy(model)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for _ in range(5):
             mel_out, linear_out, align, stop_tokens = model.forward(
-                input_dummy, input_lengths, mel_spec, mel_lengths,
-                speaker_embeddings=speaker_embeddings)
+                input_dummy, input_lengths, mel_spec, mel_lengths, speaker_embeddings=speaker_embeddings
+            )
             optimizer.zero_grad()
             loss = criterion(mel_out, mel_spec, mel_lengths)
             stop_loss = criterion_st(stop_tokens, stop_targets)
-            loss = loss + criterion(linear_out, linear_spec,
-                                    mel_lengths) + stop_loss
+            loss = loss + criterion(linear_out, linear_spec, mel_lengths) + stop_loss
             loss.backward()
             optimizer.step()
         # check parameter changes
         count = 0
-        for name_param, param_ref in zip(model.named_parameters(),
-                                         model_ref.parameters()):
+        for name_param, param_ref in zip(model.named_parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
             name, param = name_param
-            if name == 'gst_layer.encoder.recurrence.weight_hh_l0':
+            if name == "gst_layer.encoder.recurrence.weight_hh_l0":
                 continue
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1
diff --git a/tests/test_text_cleaners.py b/tests/test_text_cleaners.py
index b301fb5a..fcfa71e7 100644
--- a/tests/test_text_cleaners.py
+++ b/tests/test_text_cleaners.py
@@ -17,5 +17,5 @@ def test_currency() -> None:
 
 
 def test_expand_numbers() -> None:
-    assert phoneme_cleaners("-1") == 'minus one'
-    assert phoneme_cleaners("1") == 'one'
+    assert phoneme_cleaners("-1") == "minus one"
+    assert phoneme_cleaners("1") == "one"
diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py
index b8b74e28..f70056b1 100644
--- a/tests/test_text_processing.py
+++ b/tests/test_text_processing.py
@@ -7,7 +7,8 @@ from tests import get_tests_input_path, get_tests_path
 from TTS.tts.utils.text import *
 from TTS.utils.io import load_config
 
-conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+conf = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
+
 
 def test_phoneme_to_sequence():
 
@@ -18,7 +19,7 @@ def test_phoneme_to_sequence():
     text_hat = sequence_to_phoneme(sequence)
     _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
     text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
-    gt = 'ɹiːsənt ɹᵻsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪŋkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹᵻspɑːnsᵻbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjʊleɪʃən ænd lɜːnɪŋ!'
+    gt = "ɹiːsənt ɹᵻsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪŋkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹᵻspɑːnsᵻbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjʊleɪʃən ænd lɜːnɪŋ!"
     assert text_hat == text_hat_with_params == gt
 
     # multiple punctuations
@@ -87,6 +88,7 @@ def test_phoneme_to_sequence():
     print(len(sequence))
     assert text_hat == text_hat_with_params == gt
 
+
 def test_phoneme_to_sequence_with_blank_token():
 
     text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
@@ -105,7 +107,7 @@ def test_phoneme_to_sequence_with_blank_token():
     text_hat = sequence_to_phoneme(sequence)
     _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
     text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
-    gt = 'biː ɐ vɔɪs, nɑːt æn! ɛkoʊ?'
+    gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ?"
     print(text_hat)
     print(len(sequence))
     assert text_hat == text_hat_with_params == gt
@@ -116,7 +118,7 @@ def test_phoneme_to_sequence_with_blank_token():
     text_hat = sequence_to_phoneme(sequence)
     _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
     text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
-    gt = 'biː ɐ vɔɪs, nɑːt æn! ɛkoʊ'
+    gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ"
     print(text_hat)
     print(len(sequence))
     assert text_hat == text_hat_with_params == gt
@@ -127,7 +129,7 @@ def test_phoneme_to_sequence_with_blank_token():
     text_hat = sequence_to_phoneme(sequence)
     _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
     text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
-    gt = 'biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!'
+    gt = "biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!"
     print(text_hat)
     print(len(sequence))
     assert text_hat == text_hat_with_params == gt
@@ -138,7 +140,7 @@ def test_phoneme_to_sequence_with_blank_token():
     text_hat = sequence_to_phoneme(sequence)
     _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
     text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
-    gt = 'biː ɐ vɔɪs, nɑːt æn! ɛkoʊ.'
+    gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ."
     print(text_hat)
     print(len(sequence))
     assert text_hat == text_hat_with_params == gt
@@ -165,9 +167,10 @@ def test_phoneme_to_sequence_with_blank_token():
     print(len(sequence))
     assert text_hat == text_hat_with_params == gt
 
+
 def test_text2phone():
     text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
-    gt = 'ɹ|iː|s|ə|n|t| |ɹ|ᵻ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i| |ɪ|ŋ|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ᵻ|s|p|ɑː|n|s|ᵻ|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|ʊ|l|eɪ|ʃ|ə|n| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!'
+    gt = "ɹ|iː|s|ə|n|t| |ɹ|ᵻ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i| |ɪ|ŋ|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ᵻ|s|p|ɑː|n|s|ᵻ|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|ʊ|l|eɪ|ʃ|ə|n| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!"
     lang = "en-us"
     ph = text2phone(text, lang)
     assert gt == ph
diff --git a/tests/test_vocoder_gan_datasets.py b/tests/test_vocoder_gan_datasets.py
index 13b1b9d2..84ddcd93 100644
--- a/tests/test_vocoder_gan_datasets.py
+++ b/tests/test_vocoder_gan_datasets.py
@@ -13,31 +13,32 @@ file_path = os.path.dirname(os.path.realpath(__file__))
 OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
 os.makedirs(OUTPATH, exist_ok=True)
 
-C = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+C = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 
 test_data_path = os.path.join(get_tests_path(), "data/ljspeech/")
 ok_ljspeech = os.path.exists(test_data_path)
 
 
-def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, use_noise_augment, use_cache, num_workers):
-    ''' run dataloader with given parameters and check conditions '''
+def gan_dataset_case(
+    batch_size, seq_len, hop_len, conv_pad, return_segments, use_noise_augment, use_cache, num_workers
+):
+    """ run dataloader with given parameters and check conditions """
     ap = AudioProcessor(**C.audio)
     _, train_items = load_wav_data(test_data_path, 10)
-    dataset = GANDataset(ap,
-                         train_items,
-                         seq_len=seq_len,
-                         hop_len=hop_len,
-                         pad_short=2000,
-                         conv_pad=conv_pad,
-                         return_segments=return_segments,
-                         use_noise_augment=use_noise_augment,
-                         use_cache=use_cache)
-    loader = DataLoader(dataset=dataset,
-                        batch_size=batch_size,
-                        shuffle=True,
-                        num_workers=num_workers,
-                        pin_memory=True,
-                        drop_last=True)
+    dataset = GANDataset(
+        ap,
+        train_items,
+        seq_len=seq_len,
+        hop_len=hop_len,
+        pad_short=2000,
+        conv_pad=conv_pad,
+        return_segments=return_segments,
+        use_noise_augment=use_noise_augment,
+        use_cache=use_cache,
+    )
+    loader = DataLoader(
+        dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True
+    )
 
     max_iter = 10
     count_iter = 0
@@ -61,8 +62,8 @@ def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, us
                     mel = ap.melspectrogram(audio)
                     # the first 2 and the last 2 frames are skipped due to the padding
                     # differences in stft
-                    max_diff = abs((feat - mel[:, :feat1.shape[-1]])[:, 2:-2]).max()
-                    assert max_diff <= 0, f' [!] {max_diff}'
+                    max_diff = abs((feat - mel[:, : feat1.shape[-1]])[:, 2:-2]).max()
+                    assert max_diff <= 0, f" [!] {max_diff}"
 
             count_iter += 1
             # if count_iter == max_iter:
@@ -79,17 +80,17 @@ def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, us
 
 
 def test_parametrized_gan_dataset():
-    ''' test dataloader with different parameters '''
+    """ test dataloader with different parameters """
     params = [
-        [32, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, False, True, 0],
-        [32, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, False, True, 4],
-        [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, True, True, 0],
-        [1, C.audio['hop_length'], C.audio['hop_length'], 0, True, True, True, 0],
-        [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, True, True, True, 0],
-        [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, False, True, True, 0],
-        [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, False, True, 0],
-        [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, True, True, False, 0],
-        [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 0, False, False, False, 0],
+        [32, C.audio["hop_length"] * 10, C.audio["hop_length"], 0, True, False, True, 0],
+        [32, C.audio["hop_length"] * 10, C.audio["hop_length"], 0, True, False, True, 4],
+        [1, C.audio["hop_length"] * 10, C.audio["hop_length"], 0, True, True, True, 0],
+        [1, C.audio["hop_length"], C.audio["hop_length"], 0, True, True, True, 0],
+        [1, C.audio["hop_length"] * 10, C.audio["hop_length"], 2, True, True, True, 0],
+        [1, C.audio["hop_length"] * 10, C.audio["hop_length"], 0, False, True, True, 0],
+        [1, C.audio["hop_length"] * 10, C.audio["hop_length"], 0, True, False, True, 0],
+        [1, C.audio["hop_length"] * 10, C.audio["hop_length"], 0, True, True, False, 0],
+        [1, C.audio["hop_length"] * 10, C.audio["hop_length"], 0, False, False, False, 0],
     ]
     for param in params:
         print(param)
diff --git a/tests/test_vocoder_losses.py b/tests/test_vocoder_losses.py
index 7b3c7017..87151a05 100644
--- a/tests/test_vocoder_losses.py
+++ b/tests/test_vocoder_losses.py
@@ -14,7 +14,7 @@ os.makedirs(OUT_PATH, exist_ok=True)
 
 WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
 
-C = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
+C = load_config(os.path.join(get_tests_input_path(), "test_config.json"))
 ap = AudioProcessor(**C.audio)
 
 
@@ -45,7 +45,8 @@ def test_multiscale_stft_loss():
     stft_loss = MultiScaleSTFTLoss(
         [ap.fft_size // 2, ap.fft_size, ap.fft_size * 2],
         [ap.hop_length // 2, ap.hop_length, ap.hop_length * 2],
-        [ap.win_length // 2, ap.win_length, ap.win_length * 2])
+        [ap.win_length // 2, ap.win_length, ap.win_length * 2],
+    )
     wav = ap.load_wav(WAV_FILE)
     wav = torch.from_numpy(wav[None, :]).float()
     loss_m, loss_sc = stft_loss(wav, wav)
diff --git a/tests/test_vocoder_parallel_wavegan_discriminator.py b/tests/test_vocoder_parallel_wavegan_discriminator.py
index 6496d562..d4eca0d1 100644
--- a/tests/test_vocoder_parallel_wavegan_discriminator.py
+++ b/tests/test_vocoder_parallel_wavegan_discriminator.py
@@ -1,8 +1,10 @@
 import numpy as np
 import torch
 
-from TTS.vocoder.models.parallel_wavegan_discriminator import (ParallelWaveganDiscriminator,
-                                                               ResidualParallelWaveganDiscriminator)
+from TTS.vocoder.models.parallel_wavegan_discriminator import (
+    ParallelWaveganDiscriminator,
+    ResidualParallelWaveganDiscriminator,
+)
 
 
 def test_pwgan_disciminator():
@@ -15,7 +17,8 @@ def test_pwgan_disciminator():
         dilation_factor=1,
         nonlinear_activation="LeakyReLU",
         nonlinear_activation_params={"negative_slope": 0.2},
-        bias=True)
+        bias=True,
+    )
     dummy_x = torch.rand((4, 1, 64 * 256))
     output = model(dummy_x)
     assert np.all(output.shape == (4, 1, 64 * 256))
@@ -35,7 +38,8 @@ def test_redisual_pwgan_disciminator():
         dropout=0.0,
         bias=True,
         nonlinear_activation="LeakyReLU",
-        nonlinear_activation_params={"negative_slope": 0.2})
+        nonlinear_activation_params={"negative_slope": 0.2},
+    )
     dummy_x = torch.rand((4, 1, 64 * 256))
     output = model(dummy_x)
     assert np.all(output.shape == (4, 1, 64 * 256))
diff --git a/tests/test_vocoder_parallel_wavegan_generator.py b/tests/test_vocoder_parallel_wavegan_generator.py
index 9eed0eee..21f6f08f 100644
--- a/tests/test_vocoder_parallel_wavegan_generator.py
+++ b/tests/test_vocoder_parallel_wavegan_generator.py
@@ -18,7 +18,8 @@ def test_pwgan_generator():
         dropout=0.0,
         bias=True,
         use_weight_norm=True,
-        upsample_factors=[4, 4, 4, 4])
+        upsample_factors=[4, 4, 4, 4],
+    )
     dummy_c = torch.rand((2, 80, 5))
     output = model(dummy_c)
     assert np.all(output.shape == (2, 1, 5 * 256)), output.shape
diff --git a/tests/test_vocoder_pqmf.py b/tests/test_vocoder_pqmf.py
index 3112df5a..afe8d1dc 100644
--- a/tests/test_vocoder_pqmf.py
+++ b/tests/test_vocoder_pqmf.py
@@ -23,5 +23,4 @@ def test_pqmf():
     print(w2_.max())
     print(w2_.min())
     print(w2_.mean())
-    sf.write(os.path.join(get_tests_output_path(), 'pqmf_output.wav'),
-             w2_.flatten().detach(), sr)
+    sf.write(os.path.join(get_tests_output_path(), "pqmf_output.wav"), w2_.flatten().detach(), sr)
diff --git a/tests/test_vocoder_rwd.py b/tests/test_vocoder_rwd.py
index 82525e1b..371ad9e4 100644
--- a/tests/test_vocoder_rwd.py
+++ b/tests/test_vocoder_rwd.py
@@ -5,14 +5,12 @@ from TTS.vocoder.models.random_window_discriminator import RandomWindowDiscrimin
 
 
 def test_rwd():
-    layer = RandomWindowDiscriminator(cond_channels=80,
-                                      window_sizes=(512, 1024, 2048, 4096,
-                                                    8192),
-                                      cond_disc_downsample_factors=[
-                                          (8, 4, 2, 2, 2), (8, 4, 2, 2),
-                                          (8, 4, 2), (8, 4), (4, 2, 2)
-                                      ],
-                                      hop_length=256)
+    layer = RandomWindowDiscriminator(
+        cond_channels=80,
+        window_sizes=(512, 1024, 2048, 4096, 8192),
+        cond_disc_downsample_factors=[(8, 4, 2, 2, 2), (8, 4, 2, 2), (8, 4, 2), (8, 4), (4, 2, 2)],
+        hop_length=256,
+    )
     x = torch.rand([4, 1, 22050])
     c = torch.rand([4, 80, 22050 // 256])
 
diff --git a/tests/test_vocoder_tf_pqmf.py b/tests/test_vocoder_tf_pqmf.py
index 28aebe5b..f1c3666b 100644
--- a/tests/test_vocoder_tf_pqmf.py
+++ b/tests/test_vocoder_tf_pqmf.py
@@ -24,5 +24,4 @@ def test_pqmf():
     print(w2_.max())
     print(w2_.min())
     print(w2_.mean())
-    sf.write(os.path.join(get_tests_output_path(), 'tf_pqmf_output.wav'),
-             w2_.flatten(), sr)
+    sf.write(os.path.join(get_tests_output_path(), "tf_pqmf_output.wav"), w2_.flatten(), sr)
diff --git a/tests/test_vocoder_wavernn_datasets.py b/tests/test_vocoder_wavernn_datasets.py
index 6a2a3339..7bd4380b 100644
--- a/tests/test_vocoder_wavernn_datasets.py
+++ b/tests/test_vocoder_wavernn_datasets.py
@@ -14,8 +14,7 @@ file_path = os.path.dirname(os.path.realpath(__file__))
 OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
 os.makedirs(OUTPATH, exist_ok=True)
 
-C = load_config(os.path.join(get_tests_input_path(),
-                             "test_vocoder_wavernn_config.json"))
+C = load_config(os.path.join(get_tests_input_path(), "test_vocoder_wavernn_config.json"))
 
 test_data_path = os.path.join(get_tests_path(), "data/ljspeech/")
 test_mel_feat_path = os.path.join(test_data_path, "mel")
@@ -33,25 +32,20 @@ def wavernn_dataset_case(batch_size, seq_len, hop_len, pad, mode, mulaw, num_wor
     C.data_path = test_data_path
 
     preprocess_wav_files(test_data_path, C, ap)
-    _, train_items = load_wav_feat_data(
-        test_data_path, test_mel_feat_path, 5)
+    _, train_items = load_wav_feat_data(test_data_path, test_mel_feat_path, 5)
 
-    dataset = WaveRNNDataset(ap=ap,
-                             items=train_items,
-                             seq_len=seq_len,
-                             hop_len=hop_len,
-                             pad=pad,
-                             mode=mode,
-                             mulaw=mulaw
-                             )
+    dataset = WaveRNNDataset(
+        ap=ap, items=train_items, seq_len=seq_len, hop_len=hop_len, pad=pad, mode=mode, mulaw=mulaw
+    )
     # sampler = DistributedSampler(dataset) if num_gpus > 1 else None
-    loader = DataLoader(dataset,
-                        shuffle=True,
-                        collate_fn=dataset.collate,
-                        batch_size=batch_size,
-                        num_workers=num_workers,
-                        pin_memory=True,
-                        )
+    loader = DataLoader(
+        dataset,
+        shuffle=True,
+        collate_fn=dataset.collate,
+        batch_size=batch_size,
+        num_workers=num_workers,
+        pin_memory=True,
+    )
 
     max_iter = 10
     count_iter = 0
@@ -59,10 +53,8 @@ def wavernn_dataset_case(batch_size, seq_len, hop_len, pad, mode, mulaw, num_wor
     try:
         for data in loader:
             x_input, mels, _ = data
-            expected_feat_shape = (ap.num_mels,
-                                   (x_input.shape[-1] // hop_len) + (pad * 2))
-            assert np.all(
-                mels.shape[1:] == expected_feat_shape), f" [!] {mels.shape} vs {expected_feat_shape}"
+            expected_feat_shape = (ap.num_mels, (x_input.shape[-1] // hop_len) + (pad * 2))
+            assert np.all(mels.shape[1:] == expected_feat_shape), f" [!] {mels.shape} vs {expected_feat_shape}"
 
             assert (mels.shape[2] - pad * 2) * hop_len == x_input.shape[1]
             count_iter += 1
@@ -77,15 +69,15 @@ def wavernn_dataset_case(batch_size, seq_len, hop_len, pad, mode, mulaw, num_wor
 
 
 def test_parametrized_wavernn_dataset():
-    ''' test dataloader with different parameters '''
+    """ test dataloader with different parameters """
     params = [
-        [16, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, 10, True, 0],
-        [16, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, "mold", False, 4],
-        [1, C.audio['hop_length'] * 10, C.audio['hop_length'], 2, 9, False, 0],
-        [1, C.audio['hop_length'], C.audio['hop_length'], 2, 10, True, 0],
-        [1, C.audio['hop_length'], C.audio['hop_length'], 2, "mold", False, 0],
-        [1, C.audio['hop_length'] * 5, C.audio['hop_length'], 4, 10, False, 2],
-        [1, C.audio['hop_length'] * 5, C.audio['hop_length'], 2, "mold", False, 0],
+        [16, C.audio["hop_length"] * 10, C.audio["hop_length"], 2, 10, True, 0],
+        [16, C.audio["hop_length"] * 10, C.audio["hop_length"], 2, "mold", False, 4],
+        [1, C.audio["hop_length"] * 10, C.audio["hop_length"], 2, 9, False, 0],
+        [1, C.audio["hop_length"], C.audio["hop_length"], 2, 10, True, 0],
+        [1, C.audio["hop_length"], C.audio["hop_length"], 2, "mold", False, 0],
+        [1, C.audio["hop_length"] * 5, C.audio["hop_length"], 4, 10, False, 2],
+        [1, C.audio["hop_length"] * 5, C.audio["hop_length"], 2, "mold", False, 0],
     ]
     for param in params:
         print(param)
diff --git a/tests/test_wavegrad_layers.py b/tests/test_wavegrad_layers.py
index 6052e922..0180eb0a 100644
--- a/tests/test_wavegrad_layers.py
+++ b/tests/test_wavegrad_layers.py
@@ -75,12 +75,12 @@ def test_wavegrad_forward():
     c = torch.rand(32, 80, 20)
     noise_scale = torch.rand(32)
 
-    model = Wavegrad(in_channels=80,
-                     out_channels=1,
-                     upsample_factors=[5, 5, 3, 2, 2],
-                     upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2],
-                                         [1, 2, 4, 8], [1, 2, 4, 8],
-                                         [1, 2, 4, 8]])
+    model = Wavegrad(
+        in_channels=80,
+        out_channels=1,
+        upsample_factors=[5, 5, 3, 2, 2],
+        upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 4, 8], [1, 2, 4, 8], [1, 2, 4, 8]],
+    )
     o = model.forward(x, c, noise_scale)
 
     assert o.shape[0] == 32
diff --git a/tests/test_wavegrad_train.py b/tests/test_wavegrad_train.py
index 6c950c5a..a28409e5 100644
--- a/tests/test_wavegrad_train.py
+++ b/tests/test_wavegrad_train.py
@@ -6,7 +6,7 @@ from torch import optim
 
 from TTS.vocoder.models.wavegrad import Wavegrad
 
-#pylint: disable=unused-variable
+# pylint: disable=unused-variable
 
 torch.manual_seed(1)
 use_cuda = torch.cuda.is_available()
@@ -20,19 +20,19 @@ class WavegradTrainTest(unittest.TestCase):
         mel_spec = torch.rand(8, 80, 20).to(device)
 
         criterion = torch.nn.L1Loss().to(device)
-        model = Wavegrad(in_channels=80,
-                         out_channels=1,
-                         upsample_factors=[5, 5, 3, 2, 2],
-                         upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2],
-                                             [1, 2, 4, 8], [1, 2, 4, 8],
-                                             [1, 2, 4, 8]])
+        model = Wavegrad(
+            in_channels=80,
+            out_channels=1,
+            upsample_factors=[5, 5, 3, 2, 2],
+            upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 4, 8], [1, 2, 4, 8], [1, 2, 4, 8]],
+        )
 
-        model_ref = Wavegrad(in_channels=80,
-                             out_channels=1,
-                             upsample_factors=[5, 5, 3, 2, 2],
-                             upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2],
-                                                 [1, 2, 4, 8], [1, 2, 4, 8],
-                                                 [1, 2, 4, 8]])
+        model_ref = Wavegrad(
+            in_channels=80,
+            out_channels=1,
+            upsample_factors=[5, 5, 3, 2, 2],
+            upsample_dilations=[[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 4, 8], [1, 2, 4, 8], [1, 2, 4, 8]],
+        )
         model.train()
         model.to(device)
         betas = np.linspace(1e-6, 1e-2, 1000)
@@ -40,8 +40,7 @@ class WavegradTrainTest(unittest.TestCase):
         model_ref.load_state_dict(model.state_dict())
         model_ref.to(device)
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             assert (param - param_ref).sum() == 0, param
             count += 1
         optimizer = optim.Adam(model.parameters(), lr=0.001)
@@ -53,11 +52,10 @@ class WavegradTrainTest(unittest.TestCase):
             optimizer.step()
         # check parameter changes
         count = 0
-        for param, param_ref in zip(model.parameters(),
-                                    model_ref.parameters()):
+        for param, param_ref in zip(model.parameters(), model_ref.parameters()):
             # ignore pre-higway layer since it works conditional
             # if count not in [145, 59]:
-            assert (param != param_ref).any(
-            ), "param {} with shape {} not updated!! \n{}\n{}".format(
-                count, param.shape, param, param_ref)
+            assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format(
+                count, param.shape, param, param_ref
+            )
             count += 1