diff --git a/data.py b/data.py index 137ecef..b77f312 100644 --- a/data.py +++ b/data.py @@ -22,9 +22,6 @@ def process_file(file_path): end = start_pos + size if end <= sample_len: chunk = y[start_pos:end] - chunk = librosa.feature.melspectrogram(y=chunk, sr=SAMPLE_RATE) - chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+80)/80) - #chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE) #chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40) file_chunks.append(chunk) diff --git a/dataInit.py b/dataInit.py index 8eb3a6e..6622179 100644 --- a/dataInit.py +++ b/dataInit.py @@ -1,14 +1,13 @@ import data import numpy as np -x = data.load() +x,y = data.dataset(data.load()) size=len(x) -print(size) x_np = np.stack(x) x_np = np.expand_dims(x_np, axis=1) -#y_np = np.stack(y) -#y_np = np.expand_dims(y_np, axis=1) +y_np = np.stack(y) +y_np = np.expand_dims(y_np, axis=1) -np.savez_compressed("data",x_np) +np.savez_compressed("data",x_np,y_np) diff --git a/model.py b/model.py index 93add80..08b9683 100644 --- a/model.py +++ b/model.py @@ -1,73 +1,34 @@ from tinygrad import Tensor, nn -class gen: - def __init__(self, input_channels=1, height=128, width=431, latent_dim=64): - self.height = height - self.width = width - self.latent_dim = latent_dim - +class Gen: + def __init__(self, height=128, width=216, latent_dim=128): self.w = width // 4 self.h = height // 4 - self.h = 32 # Output height after 2 strides - self.w = 108 # Output width after 2 strides - self.flattened_size = 128 * self.h * self.w + self.flat = 128 * self.h * self.w + self.ld = latent_dim + self.d1 = nn.Linear(latent_dim, self.flat) + self.d2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1) + self.d3 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1) + + def __call__(self, noise: Tensor) -> Tensor: + x = self.d1(noise).relu() + x = x.reshape(noise.shape[0], 128, self.h, self.w) + x = self.d2(x).relu() + x = self.d3(x) + return x.tanh() - self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1) +class Check: + def __init__(self, height=128, width=216): + self.w = width // 4 + self.h = height // 4 + self.flat = 128 * self.h * self.w + self.e1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1) self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) - - self.el = nn.Linear(self.flattened_size, self.latent_dim) - - self.q = nn.Linear(self.latent_dim,self.latent_dim) - self.k = nn.Linear(self.latent_dim,self.latent_dim) - self.v = nn.Linear(self.latent_dim,self.latent_dim) - - self.dl = nn.Linear(self.latent_dim, self.flattened_size) - - self.d1 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1) - self.d2 = nn.ConvTranspose2d(64, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1) + self.out = nn.Linear(self.flat, 1) def __call__(self, x: Tensor) -> Tensor: - y, shape = self.encode(x) - z = self.atten(y) - return self.decode(z, shape) - - def encode(self, x: Tensor): - x = self.e1(x).leakyrelu() - x = self.e2(x).leakyrelu() - b, c, h, w = x.shape - - flattened_size = c * h * w - - - x = x.reshape(shape=(b, flattened_size)) - z = self.el(x) - return z, (c, h, w) - - def atten(self, x: Tensor): - q = self.q(x).relu() - k = self.k(x).relu() - v = self.v(x).relu() - return q.scaled_dot_product_attention(k,v) - - def decode(self, z: Tensor, shape): - x = self.dl(z).leakyrelu() - x = x.reshape(shape=(-1, 128, self.h, self.w)) - x = self.d1(x).leakyrelu() - x = self.d2(x).sigmoid() - - # Crop or pad to match input size - out_h, out_w = x.shape[2], x.shape[3] - if out_h > self.height: - x = x[:, :, :self.height, :] - elif out_h < self.height: - pad_h = self.height - out_h - x = x.pad2d((0, 0, 0, pad_h)) - - if out_w > self.width: - x = x[:, :, :, :self.width] - elif out_w < self.width: - pad_w = self.width - out_w - x = x.pad2d((0, pad_w, 0, 0)) - - return x + x = self.e1(x).relu() + x = self.e2(x).relu() + x = x.reshape(x.shape[0], -1) + return self.out(x)#.sigmoid() diff --git a/train.py b/train.py index 14f6de3..de503f4 100644 --- a/train.py +++ b/train.py @@ -1,71 +1,106 @@ +#!/usr/bin/env python +# coding: utf-8 +import data +import model as model +import show import mlflow import numpy as np -from tinygrad import Device,Tensor,nn,TinyJit -import matplotlib.pyplot as plt -import time -import show -from model import gen +from tinygrad import nn,TinyJit,Tensor -BATCH_SIZE = 16 -EPOCHS = 100 -LEARNING_RATE = 1e-5 -print(Device.DEFAULT) -mdl = gen() -opt = nn.optim.AdamW(nn.state.get_parameters(mdl), lr=LEARNING_RATE) -volume = 0.1 +mlflow.set_tracking_uri("http://127.0.0.1:5000") +mlflow.start_run(experiment_id=804883409598823668) +#hyper +BACH_SIZE=32 +BATCH_SIZE=BACH_SIZE +glr=2e-4 +dlr=1e-5 +epochs=100 -def spec_loss(pred, target, eps=1e-6): - # spectral convergence - sc = ((target - pred).square().sum()) ** 0.5 / ((target.square().sum()) ** 0.5 + eps) - # log magnitude difference - log_mag = ((target.abs() + eps).log() - (pred.abs() + eps).log()).abs().mean() - return sc + log_mag +#dataset +x = data.load() +size=len(x) +x_np = np.stack(x) +x_np = np.expand_dims(x_np, axis=1) +permutation = np.random.permutation(size) +x_np = x_np[permutation] + +train = x_np[30:] +test = x_np[0:30] + +print("Train:"+str(len(train))) +print("Test:"+str(len(test))) + + +#model +gen = model.Gen() +dif = model.Check() +genOpt = nn.optim.AdamW(nn.state.get_parameters(gen), lr=glr) +difOpt = nn.optim.AdamW(nn.state.get_parameters(dif), lr=dlr) + + +#train @TinyJit -def step_gen(x): +def step_dis(x:Tensor): Tensor.training = True - noise = Tensor.rand_like(x).tanh() - y = x+(noise*volume) - y = y.clamp(0,1) - loss = spec_loss(mdl(y),x) - opt.zero_grad() + real = Tensor.ones((BATCH_SIZE,1)) + fake = Tensor.zeros((BACH_SIZE,1)) + noise = Tensor.randn(BACH_SIZE, gen.ld) + fake_data = gen(noise).detach() + fake_loss = dif(fake_data).binary_crossentropy_logits(fake) + real_loss = dif(x).binary_crossentropy_logits(real) + loss = (fake_loss + real_loss)/2 loss.backward() - opt.step() + difOpt.step() return loss.numpy() -print("loading") -x = np.load("data.npz")["arr_0"] -#x= x[0:64] -run_name = f"tinygrad_autoencoder_{int(time.time())}" -mlflow.set_tracking_uri("http://127.0.0.1:5000") -mlflow.start_run() -mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RATE, "data size":len(x)}) +@TinyJit +def step_gen(): + Tensor.training = True + real = Tensor.ones((BATCH_SIZE,1)) + noise = Tensor.randn(BACH_SIZE, gen.ld) + fake_data = gen(noise).detach() + loss = dif(fake_data).binary_crossentropy_logits(real) + loss.backward() + genOpt.step() + return loss.numpy() -show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default") -print("training") -pl = 0 -eshape = (BATCH_SIZE, 1, 128, 431) -for epoch in range(0,EPOCHS): - print(f"\n--- Starting Epoch {epoch} ---\n") - loss=0 - for i in range(0,len(x),BATCH_SIZE): - tx=Tensor(x[i:i+BATCH_SIZE]) +eshape = (BACH_SIZE, 1, 128, 216) + +mlflow.log_param("generator_learning_rate", glr) +mlflow.log_param("discim_learning_rate", dlr) +mlflow.log_param("epochs", epochs) +mlflow.log_param("train size", len(train)) +mlflow.log_param("test size", len(test)) +for e in range(0,epochs): + print(f"\n--- Starting Epoch {e} ---\n") + dl=0 + gl=0 + + for i in range(0,size,BACH_SIZE): + tx=Tensor(train[i:i+BACH_SIZE]) if(tx.shape != eshape): continue - loss += step_gen(tx) + #steps + dl+=step_dis(tx) + gl+=step_gen() - loss /= (len(x)/BATCH_SIZE) - if epoch%5==0: - noise = Tensor.rand_like(Tensor(x[0:1])).tanh() - y = Tensor(x[0:1]) + (noise*volume) - show.logSpec(mdl(y).numpy()[0][0],epoch) - if(pl - loss < 0.03 and epoch > 25): - show.logSpec(y.numpy()[0][0],f"volume_{volume}") - volume *= 2 - pl = loss + dl /= (size/BACH_SIZE) + gl /= (size/BACH_SIZE) + if e%5==0: + noise = Tensor.randn(BACH_SIZE, gen.ld) + show.logSpec(gen(noise).numpy()[0][0],e) + #todo test on test data + mlflow.log_metric("gen_loss", gl, step=e) + mlflow.log_metric("dis_loss", dl, step=e) + print(f"loss of gen:{gl} dis:{dl}") - mlflow.log_metric("volume", volume, step=epoch) - mlflow.log_metric("loss", loss, step=epoch) - print(f"loss of {loss}") + +#save +noise = Tensor.randn(BACH_SIZE, gen.ld) +show.logSpec(gen(noise).numpy()[0][0],epochs) +from tinygrad.nn.state import safe_save, get_state_dict +safe_save(get_state_dict(gen),"music.safetensors") +mlflow.log_artifact("music.safetensors")