diff --git a/data.py b/data.py index 4c4f3c8..6c83a51 100644 --- a/data.py +++ b/data.py @@ -6,7 +6,15 @@ import mlflow SAMPLE_RATE = 22050 -#@mlflow.trace +def spec_to_audio(spec): + """ + Convert a normalized mel-spectrogram back to audio. + """ + spec = (spec * 80) - 80 + spec = librosa.db_to_amplitude(spec)*80 + audio = librosa.feature.inverse.mel_to_audio(spec,sr=SAMPLE_RATE) + return audio + def process_file(file_path): """ Load 10 second chunks single song. diff --git a/run.py b/run.py new file mode 100644 index 0000000..eef1d5c --- /dev/null +++ b/run.py @@ -0,0 +1,43 @@ +import numpy as np +import random +import time +from tinygrad import Tensor, nn +from tinygrad.nn.state import safe_load, load_state_dict +import librosa +import sounddevice as sd +from model import gen +from data import spec_to_audio + +SAMPLE_RATE = 22050 + +def load_model(filepath="model.safetensors"): + """Loads the model structure and weights.""" + model = gen() + state_dict = safe_load(filepath) + load_state_dict(model, state_dict) + return model + +def load_data(filepath="data.npz"): + """Loads the pre-processed spectrogram data.""" + print(f"Loading data from {filepath}...") + data = np.load(filepath) + x = data["arr_0"] + return x + +def play_spec(spec,i): + """Converts a spectrogram numpy array to audio and plays it.""" + audio = spec_to_audio(spec) + sd.wait() + print(f"chunk:{i}") + sd.play(audio, samplerate=SAMPLE_RATE) + +def run_prediction_loop(model, data_x): + current_spect = data_x[0:1] + for i in range(10): + play_spec(current_spect[0][0],i) + current_spect = model(Tensor(current_spect)).numpy() + +if __name__ == "__main__": + model = load_model() + data_x = load_data() + run_prediction_loop(model, data_x) diff --git a/train.py b/train.py index e28c5ef..e912da5 100644 --- a/train.py +++ b/train.py @@ -1,6 +1,7 @@ import mlflow import numpy as np from tinygrad import Device,Tensor,nn,TinyJit +from tinygrad.nn.state import safe_save, get_state_dict import matplotlib.pyplot as plt import time import show @@ -43,8 +44,7 @@ mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RA show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default") print("training") -pl = 0 -eshape = (BATCH_SIZE, 1, 128, 431) +eshape = (BATCH_SIZE, 1, 128, 216) for epoch in range(0,EPOCHS): print(f"\n--- Starting Epoch {epoch} ---\n") loss=0 @@ -65,3 +65,7 @@ for epoch in range(0,EPOCHS): mlflow.log_metric("loss", loss, step=epoch) print(f"loss of {loss}") + +show.logSpec(mdl(Tensor(x[0:1])).numpy()[0][0],EPOCHS) +state_dict = get_state_dict(mdl) +safe_save(state_dict, "model.safetensors")