2021-03-10 15:56:40 +00:00
|
|
|
import logging
|
|
|
|
|
from threading import Thread
|
|
|
|
|
import os
|
|
|
|
|
from datetime import datetime
|
2021-03-22 19:34:45 +00:00
|
|
|
import requests
|
2021-03-24 20:30:25 +00:00
|
|
|
import traceback
|
2021-04-04 18:52:36 +01:00
|
|
|
import configparser
|
|
|
|
|
import shutil
|
2021-05-02 17:09:05 +01:00
|
|
|
import zipfile
|
|
|
|
|
import librosa
|
|
|
|
|
import time
|
2021-05-02 17:44:18 +01:00
|
|
|
from unidecode import unidecode
|
2021-03-10 15:56:40 +00:00
|
|
|
|
|
|
|
|
from main import socketio
|
2021-04-01 19:12:38 +01:00
|
|
|
from dataset.audio_processing import convert_audio
|
2021-04-20 12:23:11 +01:00
|
|
|
from dataset.clip_generator import clip_generator
|
2021-04-07 20:26:51 +01:00
|
|
|
from dataset.analysis import save_dataset_info
|
2021-03-10 15:56:40 +00:00
|
|
|
|
|
|
|
|
|
2021-03-22 19:34:45 +00:00
|
|
|
LOGGING_URL = "https://voice-cloning-app-logging.herokuapp.com/"
|
2021-04-20 12:23:11 +01:00
|
|
|
CONFIG_FILE = "config.ini"
|
2021-03-22 19:34:45 +00:00
|
|
|
|
|
|
|
|
|
2021-03-10 15:56:40 +00:00
|
|
|
class SocketIOHandler(logging.Handler):
|
|
|
|
|
def emit(self, record):
|
|
|
|
|
text = record.getMessage()
|
|
|
|
|
if text.startswith("Progress"):
|
|
|
|
|
text = text.split("-")[1]
|
|
|
|
|
current, total = text.split("/")
|
|
|
|
|
socketio.emit("progress", {"number": current, "total": total}, namespace="/voice")
|
|
|
|
|
elif text.startswith("Status"):
|
|
|
|
|
socketio.emit("status", {"text": text.replace("Status -", "")}, namespace="/voice")
|
|
|
|
|
else:
|
|
|
|
|
socketio.emit("logs", {"text": text}, namespace="/voice")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Data
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
logger = logging.getLogger("voice")
|
|
|
|
|
logger.addHandler(SocketIOHandler())
|
|
|
|
|
thread = None
|
|
|
|
|
|
|
|
|
|
|
2021-04-04 18:52:36 +01:00
|
|
|
def update_config(data):
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Writes data to a config file
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
data : dict
|
|
|
|
|
Dictionary data to write to config file
|
|
|
|
|
"""
|
2021-04-04 18:52:36 +01:00
|
|
|
config = configparser.ConfigParser()
|
2021-04-07 20:26:51 +01:00
|
|
|
config["DEFAULT"] = data
|
2021-04-04 18:52:36 +01:00
|
|
|
|
2021-04-20 12:23:11 +01:00
|
|
|
with open(CONFIG_FILE, "w") as f:
|
2021-04-04 18:52:36 +01:00
|
|
|
config.write(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_config():
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Gets data from config file
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
dict
|
|
|
|
|
Data returned from config file
|
|
|
|
|
"""
|
2021-04-04 18:52:36 +01:00
|
|
|
config = configparser.ConfigParser()
|
2021-04-20 12:23:11 +01:00
|
|
|
config.read(CONFIG_FILE)
|
2021-04-07 20:26:51 +01:00
|
|
|
return config["DEFAULT"]
|
2021-04-04 18:52:36 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def can_send_logs():
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Checks whether logging is allowed.
|
|
|
|
|
Uses config file. If config is not found, defaults to True
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
bool
|
|
|
|
|
Whether logging is allowed
|
|
|
|
|
"""
|
2021-04-04 18:52:36 +01:00
|
|
|
config = get_config()
|
|
|
|
|
if config.get("send_logs") and config["send_logs"] == "False":
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2021-03-22 19:34:45 +00:00
|
|
|
def send_error_log(error):
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Sends error log to server if allowed.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
error : dict
|
|
|
|
|
Error object to send (contains type, text & stacktrace)
|
|
|
|
|
"""
|
2021-04-04 18:52:36 +01:00
|
|
|
if can_send_logs():
|
|
|
|
|
try:
|
|
|
|
|
response = requests.post(LOGGING_URL, data=error)
|
|
|
|
|
if response.status_code != 201:
|
|
|
|
|
print("error logging recieved invalid response")
|
|
|
|
|
except:
|
|
|
|
|
print("error logging failed")
|
2021-03-22 19:34:45 +00:00
|
|
|
|
|
|
|
|
|
2021-03-10 15:56:40 +00:00
|
|
|
def background_task(func, **kwargs):
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Runs a background task.
|
|
|
|
|
If function errors out it will send an error log to the error logging server and page.
|
|
|
|
|
Sends 'done' message to frontend when complete.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
func : function
|
|
|
|
|
Function to run in background
|
|
|
|
|
kwargs : kwargs
|
|
|
|
|
Kwargs to pass to function
|
|
|
|
|
"""
|
2021-03-10 15:56:40 +00:00
|
|
|
exception = False
|
|
|
|
|
try:
|
|
|
|
|
socketio.sleep(5)
|
|
|
|
|
func(logging=logger, **kwargs)
|
|
|
|
|
except Exception as e:
|
2021-03-22 19:34:45 +00:00
|
|
|
error = {"type": e.__class__.__name__, "text": str(e), "stacktrace": traceback.format_exc()}
|
|
|
|
|
send_error_log(error)
|
|
|
|
|
socketio.emit("error", error, namespace="/voice")
|
2021-05-02 17:09:05 +01:00
|
|
|
raise e
|
2021-03-10 15:56:40 +00:00
|
|
|
|
2021-05-02 17:09:05 +01:00
|
|
|
socketio.emit("done", {"text": None}, namespace="/voice")
|
2021-03-10 15:56:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def start_progress_thread(func, **kwargs):
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Starts a background task using socketio.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
func : function
|
|
|
|
|
Function to run in background
|
|
|
|
|
kwargs : kwargs
|
|
|
|
|
Kwargs to pass to function
|
|
|
|
|
"""
|
2021-03-10 15:56:40 +00:00
|
|
|
global thread
|
|
|
|
|
print("Starting Thread")
|
|
|
|
|
thread = socketio.start_background_task(background_task, func=func, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_next_url(urls, path):
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Returns the URL of the next step in the voice cloning process.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
urls : dict
|
|
|
|
|
Frontend url paths and names
|
|
|
|
|
path : str
|
|
|
|
|
Current URL
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
str
|
|
|
|
|
URL of next step or '' if not found
|
|
|
|
|
"""
|
2021-03-10 15:56:40 +00:00
|
|
|
urls = list(urls.keys())
|
|
|
|
|
next_url_index = urls.index(path) + 1
|
|
|
|
|
return urls[next_url_index] if next_url_index < len(urls) else ""
|
|
|
|
|
|
|
|
|
|
|
2021-04-01 19:12:38 +01:00
|
|
|
def get_suffix():
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Generates a filename suffix using the currrent datetime.
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
|
-------
|
|
|
|
|
str
|
|
|
|
|
String suffix
|
|
|
|
|
"""
|
2021-03-10 15:56:40 +00:00
|
|
|
return datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
|
2021-04-04 18:52:36 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def delete_folder(path):
|
2021-04-20 12:23:11 +01:00
|
|
|
"""Deletes a folder.
|
|
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
|
----------
|
|
|
|
|
path : str
|
|
|
|
|
Path to folder
|
|
|
|
|
|
|
|
|
|
Raises
|
|
|
|
|
-------
|
|
|
|
|
AssertionError
|
|
|
|
|
If folder is not found
|
|
|
|
|
"""
|
2021-04-04 18:52:36 +01:00
|
|
|
assert os.path.isdir(path), f"{path} does not exist"
|
|
|
|
|
shutil.rmtree(path)
|
2021-05-02 17:09:05 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def import_dataset(dataset, dataset_directory, audio_folder, logging):
|
|
|
|
|
try:
|
|
|
|
|
with zipfile.ZipFile(dataset, mode="r") as z:
|
|
|
|
|
files_list = z.namelist()
|
2021-05-02 17:09:23 +01:00
|
|
|
assert (
|
|
|
|
|
"metadata.csv" in files_list
|
|
|
|
|
), "Dataset missing metadata.csv. Make sure this file is in the root of the zip file"
|
2021-05-02 17:09:05 +01:00
|
|
|
|
|
|
|
|
folders = [x.split("/")[0] for x in files_list if "/" in x]
|
2021-05-02 17:09:23 +01:00
|
|
|
assert (
|
|
|
|
|
"wavs" in folders
|
|
|
|
|
), "Dataset missing wavs folder. Make sure this folder is in the root of the zip file"
|
2021-05-02 17:09:05 +01:00
|
|
|
|
|
|
|
|
wavs = [x for x in files_list if x.startswith("wavs/") and x.endswith(".wav")]
|
|
|
|
|
assert wavs, "No wavs found in wavs folder"
|
|
|
|
|
|
2021-05-02 17:44:18 +01:00
|
|
|
metadata = z.read("metadata.csv").decode("utf-8", "ignore").replace("\r\n", "\n")
|
|
|
|
|
num_metadata_rows = len([row for row in metadata.split("\n") if row])
|
2021-05-02 17:09:23 +01:00
|
|
|
assert (
|
|
|
|
|
len(wavs) == num_metadata_rows
|
|
|
|
|
), f"Number of wavs and labels do not match. metadata: {num_metadata_rows}, wavs: {len(wavs)}"
|
2021-05-02 17:09:05 +01:00
|
|
|
|
|
|
|
|
logging.info("Creating directory")
|
|
|
|
|
os.makedirs(dataset_directory, exist_ok=False)
|
|
|
|
|
os.makedirs(audio_folder, exist_ok=False)
|
|
|
|
|
|
|
|
|
|
# Save metadata
|
|
|
|
|
logging.info("Saving files")
|
2021-05-02 17:44:18 +01:00
|
|
|
with open(os.path.join(dataset_directory, "metadata.csv"), "w", encoding="utf-8") as f:
|
2021-05-02 17:09:05 +01:00
|
|
|
f.write(metadata)
|
|
|
|
|
|
|
|
|
|
# Save wavs
|
|
|
|
|
total_wavs = len(wavs)
|
|
|
|
|
clip_lengths = []
|
|
|
|
|
filenames = {}
|
|
|
|
|
for i in range(total_wavs):
|
|
|
|
|
wav = wavs[i]
|
|
|
|
|
data = z.read(wav)
|
|
|
|
|
path = os.path.join(dataset_directory, "wavs", wav.split("/")[1])
|
|
|
|
|
with open(path, "wb") as f:
|
|
|
|
|
f.write(data)
|
|
|
|
|
new_path = convert_audio(path)
|
|
|
|
|
clip_lengths.append(librosa.get_duration(filename=new_path))
|
|
|
|
|
filenames[path] = new_path
|
|
|
|
|
logging.info(f"Progress - {i+1}/{total_wavs}")
|
|
|
|
|
|
|
|
|
|
# Get around "file in use" by using delay
|
|
|
|
|
logging.info("Deleting temp files")
|
|
|
|
|
for old_path, new_path in filenames.items():
|
|
|
|
|
os.remove(old_path)
|
|
|
|
|
os.rename(new_path, old_path)
|
|
|
|
|
|
|
|
|
|
# Create info file
|
|
|
|
|
logging.info("Creating info file")
|
|
|
|
|
save_dataset_info(
|
|
|
|
|
os.path.join(dataset_directory, "metadata.csv"),
|
|
|
|
|
os.path.join(dataset_directory, "wavs"),
|
|
|
|
|
os.path.join(dataset_directory, "info.json"),
|
2021-05-02 17:09:23 +01:00
|
|
|
clip_lengths=clip_lengths,
|
2021-05-02 17:09:05 +01:00
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
os.remove(dataset)
|
|
|
|
|
raise e
|
2021-05-02 17:09:23 +01:00
|
|
|
|
2021-05-02 17:09:05 +01:00
|
|
|
os.remove(dataset)
|