Compare commits

..

No commits in common. "df4cdc8e25a027628fd9ff158d34a68eb26d79bc" and "1a328d313fa6ee5b076bffa150ffc7cec16b8bed" have entirely different histories.

4 changed files with 118 additions and 126 deletions

View File

@ -22,9 +22,6 @@ def process_file(file_path):
end = start_pos + size end = start_pos + size
if end <= sample_len: if end <= sample_len:
chunk = y[start_pos:end] chunk = y[start_pos:end]
chunk = librosa.feature.melspectrogram(y=chunk, sr=SAMPLE_RATE)
chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+80)/80)
#chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE) #chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE)
#chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40) #chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40)
file_chunks.append(chunk) file_chunks.append(chunk)

View File

@ -1,14 +1,13 @@
import data import data
import numpy as np import numpy as np
x = data.load() x,y = data.dataset(data.load())
size=len(x) size=len(x)
print(size)
x_np = np.stack(x) x_np = np.stack(x)
x_np = np.expand_dims(x_np, axis=1) x_np = np.expand_dims(x_np, axis=1)
#y_np = np.stack(y) y_np = np.stack(y)
#y_np = np.expand_dims(y_np, axis=1) y_np = np.expand_dims(y_np, axis=1)
np.savez_compressed("data",x_np) np.savez_compressed("data",x_np,y_np)

View File

@ -1,73 +1,34 @@
from tinygrad import Tensor, nn from tinygrad import Tensor, nn
class gen: class Gen:
def __init__(self, input_channels=1, height=128, width=431, latent_dim=64): def __init__(self, height=128, width=216, latent_dim=128):
self.height = height
self.width = width
self.latent_dim = latent_dim
self.w = width // 4 self.w = width // 4
self.h = height // 4 self.h = height // 4
self.h = 32 # Output height after 2 strides self.flat = 128 * self.h * self.w
self.w = 108 # Output width after 2 strides self.ld = latent_dim
self.flattened_size = 128 * self.h * self.w self.d1 = nn.Linear(latent_dim, self.flat)
self.d2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d3 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1)
def __call__(self, noise: Tensor) -> Tensor:
x = self.d1(noise).relu()
x = x.reshape(noise.shape[0], 128, self.h, self.w)
x = self.d2(x).relu()
x = self.d3(x)
return x.tanh()
self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1) class Check:
def __init__(self, height=128, width=216):
self.w = width // 4
self.h = height // 4
self.flat = 128 * self.h * self.w
self.e1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1)
self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
self.out = nn.Linear(self.flat, 1)
self.el = nn.Linear(self.flattened_size, self.latent_dim)
self.q = nn.Linear(self.latent_dim,self.latent_dim)
self.k = nn.Linear(self.latent_dim,self.latent_dim)
self.v = nn.Linear(self.latent_dim,self.latent_dim)
self.dl = nn.Linear(self.latent_dim, self.flattened_size)
self.d1 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d2 = nn.ConvTranspose2d(64, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
def __call__(self, x: Tensor) -> Tensor: def __call__(self, x: Tensor) -> Tensor:
y, shape = self.encode(x) x = self.e1(x).relu()
z = self.atten(y) x = self.e2(x).relu()
return self.decode(z, shape) x = x.reshape(x.shape[0], -1)
return self.out(x)#.sigmoid()
def encode(self, x: Tensor):
x = self.e1(x).leakyrelu()
x = self.e2(x).leakyrelu()
b, c, h, w = x.shape
flattened_size = c * h * w
x = x.reshape(shape=(b, flattened_size))
z = self.el(x)
return z, (c, h, w)
def atten(self, x: Tensor):
q = self.q(x).relu()
k = self.k(x).relu()
v = self.v(x).relu()
return q.scaled_dot_product_attention(k,v)
def decode(self, z: Tensor, shape):
x = self.dl(z).leakyrelu()
x = x.reshape(shape=(-1, 128, self.h, self.w))
x = self.d1(x).leakyrelu()
x = self.d2(x).sigmoid()
# Crop or pad to match input size
out_h, out_w = x.shape[2], x.shape[3]
if out_h > self.height:
x = x[:, :, :self.height, :]
elif out_h < self.height:
pad_h = self.height - out_h
x = x.pad2d((0, 0, 0, pad_h))
if out_w > self.width:
x = x[:, :, :, :self.width]
elif out_w < self.width:
pad_w = self.width - out_w
x = x.pad2d((0, pad_w, 0, 0))
return x

143
train.py
View File

@ -1,71 +1,106 @@
#!/usr/bin/env python
# coding: utf-8
import data
import model as model
import show
import mlflow import mlflow
import numpy as np import numpy as np
from tinygrad import Device,Tensor,nn,TinyJit from tinygrad import nn,TinyJit,Tensor
import matplotlib.pyplot as plt
import time
import show
from model import gen
BATCH_SIZE = 16 mlflow.set_tracking_uri("http://127.0.0.1:5000")
EPOCHS = 100 mlflow.start_run(experiment_id=804883409598823668)
LEARNING_RATE = 1e-5 #hyper
print(Device.DEFAULT) BACH_SIZE=32
mdl = gen() BATCH_SIZE=BACH_SIZE
opt = nn.optim.AdamW(nn.state.get_parameters(mdl), lr=LEARNING_RATE) glr=2e-4
volume = 0.1 dlr=1e-5
epochs=100
def spec_loss(pred, target, eps=1e-6):
# spectral convergence
sc = ((target - pred).square().sum()) ** 0.5 / ((target.square().sum()) ** 0.5 + eps)
# log magnitude difference
log_mag = ((target.abs() + eps).log() - (pred.abs() + eps).log()).abs().mean()
return sc + log_mag
#dataset
x = data.load()
size=len(x)
x_np = np.stack(x)
x_np = np.expand_dims(x_np, axis=1)
permutation = np.random.permutation(size)
x_np = x_np[permutation]
train = x_np[30:]
test = x_np[0:30]
print("Train:"+str(len(train)))
print("Test:"+str(len(test)))
#model
gen = model.Gen()
dif = model.Check()
genOpt = nn.optim.AdamW(nn.state.get_parameters(gen), lr=glr)
difOpt = nn.optim.AdamW(nn.state.get_parameters(dif), lr=dlr)
#train
@TinyJit @TinyJit
def step_gen(x): def step_dis(x:Tensor):
Tensor.training = True Tensor.training = True
noise = Tensor.rand_like(x).tanh() real = Tensor.ones((BATCH_SIZE,1))
y = x+(noise*volume) fake = Tensor.zeros((BACH_SIZE,1))
y = y.clamp(0,1) noise = Tensor.randn(BACH_SIZE, gen.ld)
loss = spec_loss(mdl(y),x) fake_data = gen(noise).detach()
opt.zero_grad() fake_loss = dif(fake_data).binary_crossentropy_logits(fake)
real_loss = dif(x).binary_crossentropy_logits(real)
loss = (fake_loss + real_loss)/2
loss.backward() loss.backward()
opt.step() difOpt.step()
return loss.numpy() return loss.numpy()
print("loading") @TinyJit
x = np.load("data.npz")["arr_0"] def step_gen():
#x= x[0:64] Tensor.training = True
run_name = f"tinygrad_autoencoder_{int(time.time())}" real = Tensor.ones((BATCH_SIZE,1))
mlflow.set_tracking_uri("http://127.0.0.1:5000") noise = Tensor.randn(BACH_SIZE, gen.ld)
mlflow.start_run() fake_data = gen(noise).detach()
mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RATE, "data size":len(x)}) loss = dif(fake_data).binary_crossentropy_logits(real)
loss.backward()
genOpt.step()
return loss.numpy()
show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default")
print("training") eshape = (BACH_SIZE, 1, 128, 216)
pl = 0
eshape = (BATCH_SIZE, 1, 128, 431) mlflow.log_param("generator_learning_rate", glr)
for epoch in range(0,EPOCHS): mlflow.log_param("discim_learning_rate", dlr)
print(f"\n--- Starting Epoch {epoch} ---\n") mlflow.log_param("epochs", epochs)
loss=0 mlflow.log_param("train size", len(train))
for i in range(0,len(x),BATCH_SIZE): mlflow.log_param("test size", len(test))
tx=Tensor(x[i:i+BATCH_SIZE]) for e in range(0,epochs):
print(f"\n--- Starting Epoch {e} ---\n")
dl=0
gl=0
for i in range(0,size,BACH_SIZE):
tx=Tensor(train[i:i+BACH_SIZE])
if(tx.shape != eshape): if(tx.shape != eshape):
continue continue
loss += step_gen(tx) #steps
dl+=step_dis(tx)
gl+=step_gen()
loss /= (len(x)/BATCH_SIZE) dl /= (size/BACH_SIZE)
if epoch%5==0: gl /= (size/BACH_SIZE)
noise = Tensor.rand_like(Tensor(x[0:1])).tanh() if e%5==0:
y = Tensor(x[0:1]) + (noise*volume) noise = Tensor.randn(BACH_SIZE, gen.ld)
show.logSpec(mdl(y).numpy()[0][0],epoch) show.logSpec(gen(noise).numpy()[0][0],e)
if(pl - loss < 0.03 and epoch > 25): #todo test on test data
show.logSpec(y.numpy()[0][0],f"volume_{volume}") mlflow.log_metric("gen_loss", gl, step=e)
volume *= 2 mlflow.log_metric("dis_loss", dl, step=e)
pl = loss print(f"loss of gen:{gl} dis:{dl}")
mlflow.log_metric("volume", volume, step=epoch)
mlflow.log_metric("loss", loss, step=epoch) #save
print(f"loss of {loss}") noise = Tensor.randn(BACH_SIZE, gen.ld)
show.logSpec(gen(noise).numpy()[0][0],epochs)
from tinygrad.nn.state import safe_save, get_state_dict
safe_save(get_state_dict(gen),"music.safetensors")
mlflow.log_artifact("music.safetensors")