cleanup/rewrite
This commit is contained in:
65
data.py
Normal file
65
data.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import librosa
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from multiprocessing import Pool, cpu_count
|
||||
|
||||
SAMPLE_RATE = 22050
|
||||
|
||||
def process_file(file_path):
|
||||
"""
|
||||
Load 10 second chunks single song.
|
||||
"""
|
||||
y, sr = librosa.load(file_path, mono=True, sr=SAMPLE_RATE)
|
||||
size = int(SAMPLE_RATE * 10)
|
||||
sample_len = len(y)
|
||||
|
||||
file_chunks = []
|
||||
for start_pos in range(0, sample_len, size):
|
||||
end = start_pos + size
|
||||
if end <= sample_len:
|
||||
chunk = y[start_pos:end]
|
||||
file_chunks.append(chunk)
|
||||
return file_chunks
|
||||
|
||||
def load():
|
||||
"""
|
||||
Load 10 second chunks of songs.
|
||||
"""
|
||||
audio = []
|
||||
files = list(Path("./data/").glob("*.mp3"))
|
||||
with Pool(cpu_count()) as pool:
|
||||
chunk_list = pool.map(process_file, files)
|
||||
for l in chunk_list:
|
||||
audio.extend(l)
|
||||
return audio
|
||||
|
||||
def audio_split(audio):
|
||||
"""
|
||||
Split 10 seconds of audio to 2 5 second clips
|
||||
"""
|
||||
size = int(SAMPLE_RATE*5)
|
||||
x = audio[:size]
|
||||
y = audio[size:size*2]
|
||||
|
||||
x = librosa.feature.melspectrogram(y=x, sr=SAMPLE_RATE)
|
||||
y = librosa.feature.melspectrogram(y=y, sr=SAMPLE_RATE)
|
||||
|
||||
ma,mi = x.max(), x.min()
|
||||
x = (x - mi) / (ma - mi)
|
||||
|
||||
ma,mi = y.max(), y.min()
|
||||
y = (y - mi) / (ma - mi)
|
||||
|
||||
return x,y
|
||||
|
||||
def detaset(chunks):
|
||||
"""
|
||||
convert 10 second chunks to dataset
|
||||
"""
|
||||
x,y=[],[]
|
||||
with Pool(cpu_count()) as pool:
|
||||
audio_list = pool.map(audio_split,chunks)
|
||||
for (ax,ay) in audio_list:
|
||||
x.append(ax)
|
||||
y.append(ay)
|
||||
return x,y
|
||||
Reference in New Issue
Block a user