2020-08-02 18:13:54 +02:00
{
"cells": [
{
"cell_type": "markdown",
2020-08-03 12:03:12 +02:00
"metadata": {
"Collapsed": "false"
},
2020-08-02 18:13:54 +02:00
"source": [
"# Jupyter Notbook for phoneme coverage analysis\n",
"\n",
"This jupyter notebook checks dataset configured in config.json for phoneme coverage.\n",
"As mentioned here https://github.com/mozilla/TTS/wiki/Dataset#what-makes-a-good-dataset a good phoneme coverage is recommended.\n",
"\n",
"Most parameters will be taken from config.json file in mozilla tts repo so please ensure it's configured correctly for your dataset.\n",
"This notebook used lots of existring code from the TTS repo to ensure future compatibility.\n",
"\n",
"Many thanks to Neil Stoker supporting me on this topic :-).\n",
"\n",
"I provide this notebook without any warrenty but it's hopefully useful for your dataset analysis.\n",
"\n",
"Happy TTS'ing :-)\n",
"\n",
"Thorsten Müller\n",
"\n",
"* https://github.com/thorstenMueller/deep-learning-german-tts\n",
"* https://discourse.mozilla.org/t/contributing-my-german-voice-for-tts/"
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 1,
2020-08-03 12:03:12 +02:00
"metadata": {
"Collapsed": "false"
},
2020-08-02 18:13:54 +02:00
"outputs": [],
"source": [
"# set some vars\n",
2020-08-03 12:03:12 +02:00
"# TTS_PATH = \"/home/thorsten/___dev/tts/mozilla/TTS\"\n",
2020-08-03 16:55:38 +02:00
"CONFIG_FILE = \"/path/to/config/config.json\"\n",
2020-08-02 18:13:54 +02:00
"CHARS_TO_REMOVE = \".,:!?'\""
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 2,
2020-08-03 12:03:12 +02:00
"metadata": {
"Collapsed": "false"
},
2020-08-02 18:13:54 +02:00
"outputs": [],
"source": [
"# import stuff\n",
"from TTS.utils.io import load_config\n",
2020-08-03 16:55:38 +02:00
"from TTS.datasets.preprocess import load_meta_data\n",
"from TTS.utils.text import phoneme_to_sequence, sequence_to_phoneme\n",
2020-08-02 18:13:54 +02:00
"from tqdm import tqdm\n",
"from matplotlib import pylab as plt\n",
2020-08-03 16:55:38 +02:00
"from multiprocessing import Pool, cpu_count\n",
2020-08-02 18:13:54 +02:00
"\n",
"# extra imports that might not be included in requirements.txt\n",
"import collections\n",
"import operator\n",
"\n"
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 3,
2020-08-03 12:03:12 +02:00
"metadata": {
2020-08-03 16:55:38 +02:00
"Collapsed": "false",
"tags": []
2020-08-03 12:03:12 +02:00
},
2020-08-03 16:55:38 +02:00
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "| Trainset (#): 34277\n | Evalset (#): 684\n"
}
],
2020-08-02 18:13:54 +02:00
"source": [
"# Load config.json properties\n",
"CONFIG = load_config(CONFIG_FILE)\n",
"\n",
"# Load some properties from config.json\n",
2020-08-03 16:55:38 +02:00
"CONFIG_METADATA = sorted(load_meta_data(CONFIG.datasets)[0])\n",
"CONFIG_METADATA = CONFIG_METADATA\n",
2020-08-02 18:13:54 +02:00
"CONFIG_DATASET = CONFIG.datasets[0]\n",
"CONFIG_PHONEME_LANGUAGE = CONFIG.phoneme_language\n",
"CONFIG_TEXT_CLEANER = CONFIG.text_cleaner\n",
"CONFIG_ENABLE_EOS_BOS_CHARS = CONFIG.enable_eos_bos_chars\n",
"\n",
"# Will be printed on generated output graph\n",
"CONFIG_RUN_NAME = CONFIG.run_name\n",
"CONFIG_RUN_DESC = CONFIG.run_description"
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 4,
2020-08-03 12:03:12 +02:00
"metadata": {
2020-08-03 16:55:38 +02:00
"Collapsed": "false",
"tags": []
2020-08-03 12:03:12 +02:00
},
2020-08-03 16:55:38 +02:00
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "> Run name: gothic_tts (tacotron2 with ddc and batch-normalization, multispeaker(31 speakers) with gst)\n > Dataset files: 1000\n > Phoneme language: de\n > Used text cleaner: basic_german_cleaners\n > Enable eos bos chars: False\n"
}
],
2020-08-02 18:13:54 +02:00
"source": [
"# print some debug information on loaded config values\n",
"print(\" > Run name: \" + CONFIG_RUN_NAME + \" (\" + CONFIG_RUN_DESC + \")\")\n",
"print(\" > Dataset files: \" + str(len(CONFIG_METADATA)))\n",
"print(\" > Phoneme language: \" + CONFIG_PHONEME_LANGUAGE)\n",
"print(\" > Used text cleaner: \" + CONFIG_TEXT_CLEANER)\n",
"print(\" > Enable eos bos chars: \" + str(CONFIG_ENABLE_EOS_BOS_CHARS))"
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def get_phoneme_from_sequence(text):\n",
" temp_list = []\n",
" if len(text[0]) > 0:\n",
" temp_text = text[0].rstrip('\\n')\n",
" for rm_bad_chars in CHARS_TO_REMOVE:\n",
" temp_text = temp_text.replace(rm_bad_chars,\"\")\n",
" seq = phoneme_to_sequence(temp_text, [CONFIG_TEXT_CLEANER], CONFIG_PHONEME_LANGUAGE, CONFIG_ENABLE_EOS_BOS_CHARS)\n",
" text = sequence_to_phoneme(seq)\n",
" text = text.replace(\" \",\"\")\n",
" temp_list.append(text)\n",
" return temp_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
2020-08-03 12:03:12 +02:00
"metadata": {
2020-08-03 16:55:38 +02:00
"Collapsed": "false",
"tags": []
2020-08-03 12:03:12 +02:00
},
2020-08-03 16:55:38 +02:00
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": "31%|███ | 306/1000 [00:02<00:05, 124.09it/s][WARNING] fount 1 utterances containing language switches on lines 1\n[WARNING] extra phones may appear in the \"de\" phoneset\n[WARNING] language switch flags have been kept (applying \"keep-flags\" policy)\n 50%|█████ | 505/1000 [00:04<00:04, 113.65it/s][WARNING] fount 1 utterances containing language switches on lines 1\n[WARNING] extra phones may appear in the \"de\" phoneset\n[WARNING] language switch flags have been kept (applying \"keep-flags\" policy)\n100%|██████████| 1000/1000 [00:08<00:00, 115.59it/s]\n"
}
],
2020-08-02 18:13:54 +02:00
"source": [
"# Get phonemes from metadata\n",
"phonemes = []\n",
"\n",
2020-08-03 16:55:38 +02:00
"with Pool(cpu_count()-1) as p:\n",
" \n",
" phonemes = list(tqdm(p.imap(get_phoneme_from_sequence, CONFIG_METADATA), total=len(CONFIG_METADATA)))\n",
" phonemes = [i for sub in phonemes for i in sub]"
2020-08-02 18:13:54 +02:00
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 7,
2020-08-03 12:03:12 +02:00
"metadata": {
2020-08-03 16:55:38 +02:00
"Collapsed": "false",
"tags": []
2020-08-03 12:03:12 +02:00
},
2020-08-03 16:55:38 +02:00
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Dataset contains 43 different ipa phonemes.\nDataset consists of 47638 phonemes\n"
}
],
2020-08-02 18:13:54 +02:00
"source": [
"s = \"\"\n",
"phonemeString = s.join(phonemes)\n",
"\n",
"d = {}\n",
"collections._count_elements(d, phonemeString)\n",
"sorted_d = dict(sorted(d.items(), key=operator.itemgetter(1),reverse=True))\n",
"\n",
"# remove useless keys\n",
"sorted_d.pop(' ', None)\n",
"sorted_d.pop('ˈ ', None)\n",
"\n",
"phonemesSum = len(phonemeString.replace(\" \",\"\"))\n",
"\n",
"print(\"Dataset contains \" + str(len(sorted_d)) + \" different ipa phonemes.\")\n",
"print(\"Dataset consists of \" + str(phonemesSum) + \" phonemes\")"
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 8,
2020-08-03 12:03:12 +02:00
"metadata": {
2020-08-03 16:55:38 +02:00
"Collapsed": "false",
"tags": []
2020-08-03 12:03:12 +02:00
},
2020-08-03 16:55:38 +02:00
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "5 rarest phonemes\nʒ --> 2 occurrences\nɒ --> 2 occurrences\n( --> 4 occurrences\n) --> 4 occurrences\nɐ --> 30 occurrences\n"
}
],
2020-08-02 18:13:54 +02:00
"source": [
"print(\"5 rarest phonemes\")\n",
"\n",
"rareList = dict(sorted(sorted_d.items(), key=operator.itemgetter(1), reverse=False)[:5])\n",
"for key, value in rareList.items():\n",
" print(key + \" --> \" + str(value) + \" occurrences\")"
]
},
{
"cell_type": "code",
2020-08-03 16:55:38 +02:00
"execution_count": 9,
2020-08-03 12:03:12 +02:00
"metadata": {
"Collapsed": "false"
},
2020-08-03 16:55:38 +02:00
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 3600x3600 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"2889.9625pt\" version=\"1.1\" viewBox=\"0 0 2967.364844 2889.9625\" width=\"2967.364844pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 2889.9625 \nL 2967.364844 2889.9625 \nL 2967.364844 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 117.739062 2774.98125 \nL 2907.739062 2774.98125 \nL 2907.739062 56.98125 \nL 117.739062 56.98125 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 180.526705 \nL 2774.88192 180.526705 \nL 2774.88192 226.711921 \nL 117.739062 226.711921 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 238.258225 \nL 2400.818119 238.258225 \nL 2400.818119 284.443442 \nL 117.739062 284.443442 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 295.989746 \nL 2095.938533 295.989746 \nL 2095.938533 342.174963 \nL 117.739062 342.174963 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_6\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 353.721267 \nL 1901.87095 353.721267 \nL 1901.87095 399.906484 \nL 117.739062 399.906484 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_7\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 411.452788 \nL 1842.067647 411.452788 \nL 1842.067647 457.638004 \nL 117.739062 457.638004 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_8\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 469.184309 \nL 1730.669336 469.184309 \nL 1730.669336 515.369525 \nL 117.739062 515.369525 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_9\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 526.915829 \nL 1454.518788 526.915829 \nL 1454.518788 573.101046 \nL 117.739062 573.101046 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_10\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 584.64735 \nL 1357.191843 584.64735 \nL 1357.191843 630.832567 \nL 117.739062 630.832567 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_11\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 642.378871 \nL 1109.184025 642.378871 \nL 1109.184025 688.564088 \nL 117.739062 688.564088 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_12\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 700.110392 \nL 1027.10106 700.110392 \nL 1027.10106 746.295609 \nL 117.739062 746.295609 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_13\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 757.841913 \nL 917.46167 757.841913 \nL 917.46167 804.027129 \nL 117.739062 804.027129 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_14\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 815.573434 \nL 901.045077 815.573434 \nL 901.045077 861.75865 \nL 117.739062 861.75865 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_15\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 873.304954 \nL 851.208991 873.304954 \nL 851.208991 919.490171 \nL 117.739062 919.490171 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_16\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 931.036475 \nL 804.890746 931.036475 \nL 804.890746 977.221692 \nL 117.739062 977.221692 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_17\">\n <path clip-path=\"url(#p19ebdbb6a6)\" d=\"M 117.739062 988.767996 \nL 756.81358 988.767996 \nL 756.81358 1034.953213 \nL 117.73
"image/png": "iVBORw0KGgoAAAANSUhEUgAAC5gAAAtJCAYAAADghjwVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOydd7glRdG439pdYMlpgZW4gCQByUgSLgoKiBkxy2L+mbPI9ymrnxEVs2BkTShJERUVQVZAJKkICJIvIErOYWFD/f6oOezcnj7nTM8JM2dvvc8zz+6ZOx2mp0N1dXW1qCqO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziOM6XuDDiO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO4zjNwA3MHcdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMANzB3HcRzHcRzHcRzHcRzHcRzHcRzHcRzHcRzHcRzHcRzHcZwMNzB3HMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxADcwdxzHcRzHcRzHcRzHcRzHcRzHcRzHcRzHcRzHcRzHcRzHcTLcwNxxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMcB3MC8MYjIbBHR4JpVd74cx3EcZ9QRkVVF5B0icrKIXCci94rIomDMvazufDpLN3XLeiIyJ0x/WGk7kwcRWUtE7svVs5tFZHrd+XIcx+kVEZkbjKPjNeRhVkSWmD3sfNRNA2Sq2uuC02xEZCxSR8eGkO54kObcQafp9EZddcVxlkaq9oHedw4H7++cyUYT+hafPzohXif6j4hMF5Hrc+X5oIisXXe+HGcUcX1bsxGRbQPbinPrzlPTacq6fGTsn1NHPpzhICLriMhDue99vYgs14+4p/UjEsdxHMdxnCYiIm8FjgZWrjsvjuM4k4CjgdVyv/9HVefXlRnHmeyIyDrANsBGWNucDjwI3AdcD/xdVZ+oL4eO4ziO4ziO4ziO4ziOM5J8GNg09/toVb0zNRIRmQZsAmwIbACsCqwALAIewPR4/wSuVtVFvWbacRwnFVW9QkR+DLwuu/VMEXmNqv64znw5jjMRVb1DRI4BPpbd2hT4IPDJXuN2A/OAzOvRTQlB5mOC3QPAtcBfgfOAc1R1cb/z5ziO4zhOOUTko8An6s6H4zjOZEBEdgcOy926DPhJTdlxnEmJiGwEHAA8C9gHWKdLkMdF5C/At4FT3djccRzHcRzHcRzHcRzHcTojIhsDR+Ru/Rc4pmTYzYG9gD2BHYGtgDLeRR8TkTOB44Ff98vYXETWAHbOXTthxu4TUFXpR3qO44wsHwVezpL+6mgR+aWqPlRjnhzHKfIF4K1A61SVj4jID1X1ll4indJztpzp2KLt5sDBwFHAWcANIvL+bMeh4ziO4zhDRER2AuZE/qTALcDlwD9y1zVDy5wzMvixUUsf/T4KVERmR+Kb1bcMjxZfAPJK5k+oatcj3/yo7NFDROYF32te3Xma7IjI+0TkQmAcOA44lO7G5WDK4DHgBOB6EXnOoPI4WfH24jiOM1hcHnccx+k//dadOI5TDtdFO85I8SnMTqjF51X10ZJhzwW+B7we2J5yxuUAywMvBE4D/iYie5QMNwER2UZEPiwiJ4nIjcA9wO+xd3oxEeNyx3GczDh1bu7WU4D315ObpRcRmRvIg+N158kZLCIyJ5wH9BJftunjS7lbK9AHp5xuYD44ZmFGFheKyGY158VxHMdxJhvvoyjnfAaYoaobqep2qrp97np5DXl0HMdZKhCRg4G8QvsqTNHtOM5w+CLwjB7j2AD4vYh8tg/5cRzHcRzHcRzHcRzHcZylDhHZFnhF7tY92OmAw+TpwHki8sYKYd8IfBZ4GbBxX3PlOM7SzueB/OkJ7xORNevKjOM4bfkm8EDu92tEZKteInTv2uV4BLi+zd9WAGYAq7f5+07AWSKyp6r+exCZcxzHcRxnCSIyBXh+cPtkVT2yjvw4juMszYiIAJ8Mbn+ujPdyx3EGzi3AecA/gbswhdJqwNbAAcAWkTAfFpFFqvo/Q8ul4ziO4ziO4ziO4ziO44wGn2TiSZ5fVdVHeojvNuBvmD3SjcCDmH1Syw5pG+C5wHpBuCnAt0XkUVU9oYf0HcdxSqGqN4jIKUDLcd/KwIeBD9WXK8dxQlT1QRE5DmufAFOBj2OnH1fCDczLcamqjnV6QEQ2xXYqvgcT9PJsCJwM7D6Q3DmO4ziOk2crbEKT58Q6MuI4TUFV5wBzas6Gs3RyALBd7vc9eJ/rOHVyM/AD4Eeq2m6jfGtzyIuBY4G1gz8fKSJ/UNV5A8vliKGqs4HZNWfDcRzHcRzH6SOqOqvuPEwGsnmFdHvOcZz+oarjeLtznL4jIk9jooOrhcB3EqO5HTgT+C1wrqreViJdAV4CfAk7hfDJPwFfF5GzVfWOxHyEPAFcCVyKGaCt1mN8juMsnRzLEgNzgLeKyCdV9cG6MtREfF3eaQDfwjZ/tOYELxWRTVX1hiqRTelbtiY5qnqDqn4K2Ba4OPLIbiLysiFny3Ecx3EmIxtG7l0z9Fw4juNMDt4X/P6Bqj5eS04cZ3LzN+AFwMaqelQn43IANX6Onbp2S+SRrw0gj47jOI7jOI7jOI7jOI4zqryXiZs3TlfV/6ZEoKrbq+rrVPWnZYzLszCqqqcCu2FezvOsDrw1JQ+YYfzlwPeBtwG7Aiur6k6q+hbsFETHcZwCqvon4NrcrZWBN9WUHcdx2qCqNwFn5W5NwZxmV8INzPuMqt4OHIztPAx5y5Cz4ziO4ziTkVUj93o5ns5xHMeJICLbAPsFt79XR14cZ5LzomwB6FeqqikBVfXfwCFAGG4bEdmhbzl0HMdxHMdxHMdxHMdxnBFFRNYEXhPcHqouXFX/A7w98qcUR5efA1ZR1e1U9Q2qeqyqXqKqT/Qnl47jTAK+H/x+p4i4/anjNI9QTjlcRFapEpE38AGgqncBR0f+tJeIrDDs/DiO4zjOJGN65F6SsZXjOI5TisOD31ep6lW15MRxJjGq+ssew18C/Dryp4N7iddxHMdxHMdxHMdxHMdxlhJexcT1x/uBP9SQjzOBu4J7W5Q17lTV/6rqY/3PluM4k4iTg98bAfvWkRHHcTryayA/5q9I2qa0J5nWl+w4MX4OHBPcWw7YGrikaqQisgx2RM3TgBnAAkyAvBa4WFUXVY27Q5qrZWnOBNYCpgJ3ZtclmUH9wMk8JO6Q5WMqcDdwK3C+qg7EM2327rsA62DvvlyWbuvdk448qpiHDYGdsUF5ReAh4HrgAlW9LyGerbDyewqwLPYON2Dlt3AA+RZgW2BTrOzWxDwI3wWMY+XX93T7gYgsi5X5+li+VwceBx7Eyuyq7LSCfqS1dpbW2tm1CPs2dwAXquqD/UinSYjIOsCOWL1YG9vsdBdL3vneIeRhQ6w9bIQd27MoS/9EVX20RPi1gS2x+r0aS9rmvcBtWH/88IDyvgywe5b3p2S37wb+CVw6iHGgTT62ALbAvuEMrI3chfXLF6nq/GHkow6yzWLPANbF6vF07N3vBP6eeQIdRj42A7bD+qqVgCeA21X1h0NKfwo2Rm6K1cVlsDbwL6wOPD6EPDRCRukndch6vSAiawE7YeU/A1gB6w/vBq4Bri7TrzpxMnlqE+DpWD1fBZvDPQY8DPwbk6uuHbZcJSJTMaV6nlOGmYeyZHndBBu718PKcSpwX3b9C7hCVRcPOB+bAVthbWUGJgM9BPwny8O1/WjjmSy7KzY+rI3JKfdgfeM/VfW6XtMYNllb2BrYBht/VwDmA9er6mkl42jS+L0TVheXw77Nf7B5Wen53QA4A3h+cG+TOjLi9E4d+pOqZH30bsDGWPsEaxet+c1AxzcRWQnYE+sz18HmNf/F+oWrB5l2Lg+tPm4zTAcxA9sg+yD23a4Gbkw9oaDPeVwWeCo2lrZkEjD5/15sHL1miPkZWh3Pvs9O2PdZl4nznotHbe4rIk8DtmeifvDfwJ/7rccQkdW
},
"metadata": {
"needs_background": "light"
}
}
],
2020-08-02 18:13:54 +02:00
"source": [
"# create plot from analysis result\n",
"\n",
"x = []\n",
"y = []\n",
"\n",
"for key, value in sorted_d.items():\n",
" x.append(key)\n",
" y.append(value)\n",
"\n",
"plt.figure(figsize=(50,50))\n",
"plt.title(\"Phoneme coverage for \" + CONFIG_RUN_NAME + \" (\" + CONFIG_RUN_DESC + \")\", fontsize=50)\n",
"plt.xticks(fontsize=50)\n",
"plt.yticks(fontsize=50)\n",
"plt.barh(x,y, align='center', alpha=1.0)\n",
"plt.gca().invert_yaxis()\n",
"plt.ylabel('phoneme', fontsize=50)\n",
"plt.xlabel('occurrences', fontsize=50)\n",
"\n",
"for i, v in enumerate(y):\n",
" plt.text(v + 2, i - .2, str(v), fontsize=20)\n",
" plt.text(v + 2, i + .2, \"(\" + str(round(100/phonemesSum * v,2)) + \"%)\", fontsize=20)\n",
" \n",
" \n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
2020-08-03 12:03:12 +02:00
"metadata": {
"Collapsed": "false"
},
2020-08-02 18:13:54 +02:00
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2020-08-03 16:55:38 +02:00
"version": "3.6.9-final"
2020-08-02 18:13:54 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 4
2020-08-03 16:55:38 +02:00
}