Files
Voice-Cloning-App/application/utils.py

251 lines
7.0 KiB
Python
Raw Normal View History

2021-03-10 15:56:40 +00:00
import logging
from threading import Thread
import os
from datetime import datetime
2021-03-22 19:34:45 +00:00
import requests
2021-03-24 20:30:25 +00:00
import traceback
2021-04-04 18:52:36 +01:00
import configparser
import shutil
2021-05-02 17:09:05 +01:00
import zipfile
import librosa
import time
2021-03-10 15:56:40 +00:00
from main import socketio
2021-04-01 19:12:38 +01:00
from dataset.audio_processing import convert_audio
2021-04-20 12:23:11 +01:00
from dataset.clip_generator import clip_generator
2021-04-07 20:26:51 +01:00
from dataset.analysis import save_dataset_info
2021-03-10 15:56:40 +00:00
2021-03-22 19:34:45 +00:00
LOGGING_URL = "https://voice-cloning-app-logging.herokuapp.com/"
2021-04-20 12:23:11 +01:00
CONFIG_FILE = "config.ini"
2021-03-22 19:34:45 +00:00
2021-03-10 15:56:40 +00:00
class SocketIOHandler(logging.Handler):
def emit(self, record):
text = record.getMessage()
if text.startswith("Progress"):
text = text.split("-")[1]
current, total = text.split("/")
socketio.emit("progress", {"number": current, "total": total}, namespace="/voice")
elif text.startswith("Status"):
socketio.emit("status", {"text": text.replace("Status -", "")}, namespace="/voice")
else:
socketio.emit("logs", {"text": text}, namespace="/voice")
# Data
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("voice")
logger.addHandler(SocketIOHandler())
thread = None
2021-04-04 18:52:36 +01:00
def update_config(data):
2021-04-20 12:23:11 +01:00
"""Writes data to a config file
Parameters
----------
data : dict
Dictionary data to write to config file
"""
2021-04-04 18:52:36 +01:00
config = configparser.ConfigParser()
2021-04-07 20:26:51 +01:00
config["DEFAULT"] = data
2021-04-04 18:52:36 +01:00
2021-04-20 12:23:11 +01:00
with open(CONFIG_FILE, "w") as f:
2021-04-04 18:52:36 +01:00
config.write(f)
def get_config():
2021-04-20 12:23:11 +01:00
"""Gets data from config file
Returns
-------
dict
Data returned from config file
"""
2021-04-04 18:52:36 +01:00
config = configparser.ConfigParser()
2021-04-20 12:23:11 +01:00
config.read(CONFIG_FILE)
2021-04-07 20:26:51 +01:00
return config["DEFAULT"]
2021-04-04 18:52:36 +01:00
def can_send_logs():
2021-04-20 12:23:11 +01:00
"""Checks whether logging is allowed.
Uses config file. If config is not found, defaults to True
Returns
-------
bool
Whether logging is allowed
"""
2021-04-04 18:52:36 +01:00
config = get_config()
if config.get("send_logs") and config["send_logs"] == "False":
return False
else:
return True
2021-03-22 19:34:45 +00:00
def send_error_log(error):
2021-04-20 12:23:11 +01:00
"""Sends error log to server if allowed.
Parameters
----------
error : dict
Error object to send (contains type, text & stacktrace)
"""
2021-04-04 18:52:36 +01:00
if can_send_logs():
try:
response = requests.post(LOGGING_URL, data=error)
if response.status_code != 201:
print("error logging recieved invalid response")
except:
print("error logging failed")
2021-03-22 19:34:45 +00:00
2021-03-10 15:56:40 +00:00
def background_task(func, **kwargs):
2021-04-20 12:23:11 +01:00
"""Runs a background task.
If function errors out it will send an error log to the error logging server and page.
Sends 'done' message to frontend when complete.
Parameters
----------
func : function
Function to run in background
kwargs : kwargs
Kwargs to pass to function
"""
2021-03-10 15:56:40 +00:00
exception = False
try:
socketio.sleep(5)
func(logging=logger, **kwargs)
except Exception as e:
2021-03-22 19:34:45 +00:00
error = {"type": e.__class__.__name__, "text": str(e), "stacktrace": traceback.format_exc()}
send_error_log(error)
socketio.emit("error", error, namespace="/voice")
2021-05-02 17:09:05 +01:00
raise e
2021-03-10 15:56:40 +00:00
2021-05-02 17:09:05 +01:00
socketio.emit("done", {"text": None}, namespace="/voice")
2021-03-10 15:56:40 +00:00
def start_progress_thread(func, **kwargs):
2021-04-20 12:23:11 +01:00
"""Starts a background task using socketio.
Parameters
----------
func : function
Function to run in background
kwargs : kwargs
Kwargs to pass to function
"""
2021-03-10 15:56:40 +00:00
global thread
print("Starting Thread")
thread = socketio.start_background_task(background_task, func=func, **kwargs)
def get_next_url(urls, path):
2021-04-20 12:23:11 +01:00
"""Returns the URL of the next step in the voice cloning process.
Parameters
----------
urls : dict
Frontend url paths and names
path : str
Current URL
Returns
-------
str
URL of next step or '' if not found
"""
2021-03-10 15:56:40 +00:00
urls = list(urls.keys())
next_url_index = urls.index(path) + 1
return urls[next_url_index] if next_url_index < len(urls) else ""
2021-04-01 19:12:38 +01:00
def get_suffix():
2021-04-20 12:23:11 +01:00
"""Generates a filename suffix using the currrent datetime.
Returns
-------
str
String suffix
"""
2021-03-10 15:56:40 +00:00
return datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
2021-04-04 18:52:36 +01:00
def delete_folder(path):
2021-04-20 12:23:11 +01:00
"""Deletes a folder.
Parameters
----------
path : str
Path to folder
Raises
-------
AssertionError
If folder is not found
"""
2021-04-04 18:52:36 +01:00
assert os.path.isdir(path), f"{path} does not exist"
shutil.rmtree(path)
2021-05-02 17:09:05 +01:00
def import_dataset(dataset, dataset_directory, audio_folder, logging):
try:
with zipfile.ZipFile(dataset, mode="r") as z:
files_list = z.namelist()
assert "metadata.csv" in files_list, "Dataset missing metadata.csv. Make sure this file is in the root of the zip file"
folders = [x.split("/")[0] for x in files_list if "/" in x]
assert "wavs" in folders, "Dataset missing wavs folder. Make sure this folder is in the root of the zip file"
wavs = [x for x in files_list if x.startswith("wavs/") and x.endswith(".wav")]
assert wavs, "No wavs found in wavs folder"
metadata = z.read("metadata.csv")
num_metadata_rows = len([row for row in metadata.decode("utf-8").split("\n") if row])
assert len(wavs) == num_metadata_rows, f"Number of wavs and labels do not match. metadata: {num_metadata_rows}, wavs: {len(wavs)}"
logging.info("Creating directory")
os.makedirs(dataset_directory, exist_ok=False)
os.makedirs(audio_folder, exist_ok=False)
# Save metadata
logging.info("Saving files")
with open(os.path.join(dataset_directory, "metadata.csv"), "wb") as f:
f.write(metadata)
# Save wavs
total_wavs = len(wavs)
clip_lengths = []
filenames = {}
for i in range(total_wavs):
wav = wavs[i]
data = z.read(wav)
path = os.path.join(dataset_directory, "wavs", wav.split("/")[1])
with open(path, "wb") as f:
f.write(data)
new_path = convert_audio(path)
clip_lengths.append(librosa.get_duration(filename=new_path))
filenames[path] = new_path
logging.info(f"Progress - {i+1}/{total_wavs}")
# Get around "file in use" by using delay
logging.info("Deleting temp files")
for old_path, new_path in filenames.items():
os.remove(old_path)
os.rename(new_path, old_path)
# Create info file
logging.info("Creating info file")
save_dataset_info(
os.path.join(dataset_directory, "metadata.csv"),
os.path.join(dataset_directory, "wavs"),
os.path.join(dataset_directory, "info.json"),
clip_lengths=clip_lengths
)
except Exception as e:
os.remove(dataset)
raise e
os.remove(dataset)