2025-11-12 12:11:57 -05:00

76 lines
1.8 KiB
Python

import librosa
import numpy as np
from pathlib import Path
from multiprocessing import Pool, cpu_count
import mlflow
SAMPLE_RATE = 22050
def spec_to_audio(spec):
"""
Convert a normalized mel-spectrogram back to audio.
"""
spec = (spec * 80) - 80
spec = librosa.db_to_amplitude(spec)*80
audio = librosa.feature.inverse.mel_to_audio(spec,sr=SAMPLE_RATE)
return audio
def process_file(file_path):
"""
Load 10 second chunks single song.
"""
y, sr = librosa.load(file_path, mono=True, sr=SAMPLE_RATE)
if(not sr == SAMPLE_RATE):
return []
size = int(SAMPLE_RATE * 10)
sample_len = len(y)
file_chunks = []
for start_pos in range(0, sample_len, size):
end = start_pos + size
if end <= sample_len:
chunk = y[start_pos:end]
file_chunks.append(chunk)
return file_chunks
def load():
"""
Load 10 second chunks of songs.
"""
audio = []
files = list(Path("./data/").glob("*.mp3"))
#files = files[:12]
with Pool(cpu_count()) as pool:
chunk_list = pool.map(process_file, files)
for l in chunk_list:
audio.extend(l)
return audio
def audio_split(audio):
"""
Split 10 seconds of audio to 2 5 second clips
"""
size = int(SAMPLE_RATE*5)
x = audio[:size]
y = audio[size:size*2]
x = librosa.feature.melspectrogram(y=x, sr=SAMPLE_RATE)
y = librosa.feature.melspectrogram(y=y, sr=SAMPLE_RATE)
x = ((librosa.amplitude_to_db(x,ref=np.max)+80)/80)
y = ((librosa.amplitude_to_db(y,ref=np.max)+80)/80)
return x,y
def dataset(chunks):
"""
convert 10 second chunks to dataset
"""
x,y=[],[]
with Pool(cpu_count()) as pool:
audio_list = pool.map(audio_split,chunks)
for (ax,ay) in audio_list:
x.append(ax)
y.append(ay)
return x,y