Mangio-RVC-Fork/my_utils.py

import ffmpeg
import numpy as np

import os
import sys

from shlex import quote as RQuote
import random

import csv

platform_stft_mapping = {
    'linux': 'stftpitchshift',
    'darwin': 'stftpitchshift',
    'win32': 'stftpitchshift.exe',
}

stft = platform_stft_mapping.get(sys.platform)

def CSVutil(file, rw, type, *args):
    if type == 'formanting':
        if rw == 'r':
            with open(file) as fileCSVread:
                csv_reader = list(csv.reader(fileCSVread))
                return (
                    csv_reader[0][0], csv_reader[0][1], csv_reader[0][2]
                ) if csv_reader is not None else (lambda: exec('raise ValueError("No data")'))()
        else:
            if args:
                doformnt = args[0]
            else:
                doformnt = False
            qfr = args[1] if len(args) > 1 else 1.0
            tmb = args[2] if len(args) > 2 else 1.0
            with open(file, rw, newline='') as fileCSVwrite:
                csv_writer = csv.writer(fileCSVwrite, delimiter=',')
                csv_writer.writerow([doformnt, qfr, tmb])
    elif type == 'stop':
        stop = args[0] if args else False
        with open(file, rw, newline='') as fileCSVwrite:
            csv_writer = csv.writer(fileCSVwrite, delimiter=',')
            csv_writer.writerow([stop])

def load_audio(file, sr, DoFormant, Quefrency, Timbre):
    converted = False
    DoFormant, Quefrency, Timbre = CSVutil('csvdb/formanting.csv', 'r', 'formanting')
    try:
        file = (
            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        )
        file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")

        if (DoFormant.lower() == 'true'):
            numerator = round(random.uniform(1,4), 4)

            if not file.endswith(".wav"):

                if not os.path.isfile(f"{file_formanted}.wav"):
                    converted = True
                    #print(f"\nfile = {file}\n")
                    #print(f"\nfile_formanted = {file_formanted}\n")
                    converting = (
                        ffmpeg.input(file_formanted, threads = 0)
                        .output(f"{RQuote(file_formanted)}.wav")
                        .run(
                            cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                        )
                    )
                else:
                    pass
            file_formanted = f"{file_formanted}.wav" if not file_formanted.endswith(".wav") else file_formanted

            print(f" · Formanting {file_formanted}...\n")

            command = (
                f'{RQuote(stft)} -i "{RQuote(file_formanted)}" -q "{RQuote(Quefrency)}" '
                f'-t "{RQuote(Timbre)}" -o "{RQuote(file_formanted)}FORMANTED_{RQuote(str(numerator))}.wav"'
            )
            os.system(command)

            print(f" · Formanted {file_formanted}!\n")

            out, _ = (
                ffmpeg.input(f"{file_formanted}FORMANTED_{str(numerator)}.wav", threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(
                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                )
            )

            try: os.remove(f"{file_formanted}FORMANTED_{str(numerator)}.wav")
            except Exception as e: pass; print(f"couldn't remove formanted type of file due to {e}")

        else:
            out, _ = (
                ffmpeg.input(file, threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(
                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                )
            )
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")

    if converted:
        try: os.remove(file_formanted)
        except Exception as e: pass; print(f"Couldn't remove converted type of file due to {e}")
        converted = False

    return np.frombuffer(out, np.float32).flatten()


def check_audio_duration(file):
    try:
        # Strip whitespaces and unnecessary characters from the file name
        file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")

        # Probe the audio file for information
        probe = ffmpeg.probe(file)

        # Extract the duration from the probe result
        duration = float(probe['streams'][0]['duration'])

        # If the duration is less than 0.75 seconds, print the message and exit the loop
        if duration < 0.76:
            print(
                f"\n------------\n"
                f"Audio file, {file.split('/')[-1]}, under ~0.76s detected - file is too short. Target at least 1-2s for best results."
                f"\n------------\n\n"
            )
            return False

        return True
    except Exception as e:
        raise RuntimeError(f"Failed to check audio duration: {e}")