InfiniteMusic/data.py

import librosa
import numpy as np
from pathlib import Path
from multiprocessing import Pool, cpu_count
import mlflow

SAMPLE_RATE = 22050

#@mlflow.trace
def process_file(file_path):
    """
    Load 10 second chunks single song.
    """
    y, sr = librosa.load(file_path, mono=True, sr=SAMPLE_RATE)
    if(not sr == SAMPLE_RATE):
        return []
    size = int(SAMPLE_RATE * 10)
    sample_len = len(y)

    file_chunks = []
    for start_pos in range(0, sample_len, size):
        end = start_pos + size
        if end <= sample_len:
            chunk = y[start_pos:end]
            #chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE)
            #chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40)
            file_chunks.append(chunk)
    return file_chunks

#@mlflow.trace
def load():
    """
    Load 10 second chunks of songs.
    """
    audio = []
    files = list(Path("./data/").glob("*.mp3"))
    #files = files[:12]
    with Pool(cpu_count()) as pool:
        chunk_list = pool.map(process_file, files)
    for l in chunk_list:
        audio.extend(l)
    return audio


##DEP
def audio_split(audio):
    """
    Split 10 seconds of audio to 2 5 second clips
    """
    size = int(SAMPLE_RATE*5)
    x = audio[:size]
    y = audio[size:size*2]

    x = librosa.feature.melspectrogram(y=x, sr=SAMPLE_RATE)
    y = librosa.feature.melspectrogram(y=y, sr=SAMPLE_RATE)

    x = ((librosa.amplitude_to_db(x,ref=np.max)+80)/80)
    y = ((librosa.amplitude_to_db(y,ref=np.max)+80)/80)

    return x,y

def dataset(chunks):
    """
    convert 10 second chunks to dataset
    """
    x,y=[],[]
    with Pool(cpu_count()) as pool:
        audio_list = pool.map(audio_split,chunks)
    for (ax,ay) in audio_list:
        x.append(ax)
        y.append(ay)
    return x,y