Compare commits

..

No commits in common. "df4cdc8e25a027628fd9ff158d34a68eb26d79bc" and "1a328d313fa6ee5b076bffa150ffc7cec16b8bed" have entirely different histories.

4 changed files with 118 additions and 126 deletions

View File

@ -22,9 +22,6 @@ def process_file(file_path):
end = start_pos + size
if end <= sample_len:
chunk = y[start_pos:end]
chunk = librosa.feature.melspectrogram(y=chunk, sr=SAMPLE_RATE)
chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+80)/80)
#chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE)
#chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40)
file_chunks.append(chunk)

View File

@ -1,14 +1,13 @@
import data
import numpy as np
x = data.load()
x,y = data.dataset(data.load())
size=len(x)
print(size)
x_np = np.stack(x)
x_np = np.expand_dims(x_np, axis=1)
#y_np = np.stack(y)
#y_np = np.expand_dims(y_np, axis=1)
y_np = np.stack(y)
y_np = np.expand_dims(y_np, axis=1)
np.savez_compressed("data",x_np)
np.savez_compressed("data",x_np,y_np)

View File

@ -1,73 +1,34 @@
from tinygrad import Tensor, nn
class gen:
def __init__(self, input_channels=1, height=128, width=431, latent_dim=64):
self.height = height
self.width = width
self.latent_dim = latent_dim
class Gen:
def __init__(self, height=128, width=216, latent_dim=128):
self.w = width // 4
self.h = height // 4
self.h = 32 # Output height after 2 strides
self.w = 108 # Output width after 2 strides
self.flattened_size = 128 * self.h * self.w
self.flat = 128 * self.h * self.w
self.ld = latent_dim
self.d1 = nn.Linear(latent_dim, self.flat)
self.d2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d3 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1)
def __call__(self, noise: Tensor) -> Tensor:
x = self.d1(noise).relu()
x = x.reshape(noise.shape[0], 128, self.h, self.w)
x = self.d2(x).relu()
x = self.d3(x)
return x.tanh()
self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1)
class Check:
def __init__(self, height=128, width=216):
self.w = width // 4
self.h = height // 4
self.flat = 128 * self.h * self.w
self.e1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1)
self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
self.el = nn.Linear(self.flattened_size, self.latent_dim)
self.q = nn.Linear(self.latent_dim,self.latent_dim)
self.k = nn.Linear(self.latent_dim,self.latent_dim)
self.v = nn.Linear(self.latent_dim,self.latent_dim)
self.dl = nn.Linear(self.latent_dim, self.flattened_size)
self.d1 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d2 = nn.ConvTranspose2d(64, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
self.out = nn.Linear(self.flat, 1)
def __call__(self, x: Tensor) -> Tensor:
y, shape = self.encode(x)
z = self.atten(y)
return self.decode(z, shape)
def encode(self, x: Tensor):
x = self.e1(x).leakyrelu()
x = self.e2(x).leakyrelu()
b, c, h, w = x.shape
flattened_size = c * h * w
x = x.reshape(shape=(b, flattened_size))
z = self.el(x)
return z, (c, h, w)
def atten(self, x: Tensor):
q = self.q(x).relu()
k = self.k(x).relu()
v = self.v(x).relu()
return q.scaled_dot_product_attention(k,v)
def decode(self, z: Tensor, shape):
x = self.dl(z).leakyrelu()
x = x.reshape(shape=(-1, 128, self.h, self.w))
x = self.d1(x).leakyrelu()
x = self.d2(x).sigmoid()
# Crop or pad to match input size
out_h, out_w = x.shape[2], x.shape[3]
if out_h > self.height:
x = x[:, :, :self.height, :]
elif out_h < self.height:
pad_h = self.height - out_h
x = x.pad2d((0, 0, 0, pad_h))
if out_w > self.width:
x = x[:, :, :, :self.width]
elif out_w < self.width:
pad_w = self.width - out_w
x = x.pad2d((0, pad_w, 0, 0))
return x
x = self.e1(x).relu()
x = self.e2(x).relu()
x = x.reshape(x.shape[0], -1)
return self.out(x)#.sigmoid()

143
train.py
View File

@ -1,71 +1,106 @@
#!/usr/bin/env python
# coding: utf-8
import data
import model as model
import show
import mlflow
import numpy as np
from tinygrad import Device,Tensor,nn,TinyJit
import matplotlib.pyplot as plt
import time
import show
from model import gen
from tinygrad import nn,TinyJit,Tensor
BATCH_SIZE = 16
EPOCHS = 100
LEARNING_RATE = 1e-5
print(Device.DEFAULT)
mdl = gen()
opt = nn.optim.AdamW(nn.state.get_parameters(mdl), lr=LEARNING_RATE)
volume = 0.1
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.start_run(experiment_id=804883409598823668)
#hyper
BACH_SIZE=32
BATCH_SIZE=BACH_SIZE
glr=2e-4
dlr=1e-5
epochs=100
def spec_loss(pred, target, eps=1e-6):
# spectral convergence
sc = ((target - pred).square().sum()) ** 0.5 / ((target.square().sum()) ** 0.5 + eps)
# log magnitude difference
log_mag = ((target.abs() + eps).log() - (pred.abs() + eps).log()).abs().mean()
return sc + log_mag
#dataset
x = data.load()
size=len(x)
x_np = np.stack(x)
x_np = np.expand_dims(x_np, axis=1)
permutation = np.random.permutation(size)
x_np = x_np[permutation]
train = x_np[30:]
test = x_np[0:30]
print("Train:"+str(len(train)))
print("Test:"+str(len(test)))
#model
gen = model.Gen()
dif = model.Check()
genOpt = nn.optim.AdamW(nn.state.get_parameters(gen), lr=glr)
difOpt = nn.optim.AdamW(nn.state.get_parameters(dif), lr=dlr)
#train
@TinyJit
def step_gen(x):
def step_dis(x:Tensor):
Tensor.training = True
noise = Tensor.rand_like(x).tanh()
y = x+(noise*volume)
y = y.clamp(0,1)
loss = spec_loss(mdl(y),x)
opt.zero_grad()
real = Tensor.ones((BATCH_SIZE,1))
fake = Tensor.zeros((BACH_SIZE,1))
noise = Tensor.randn(BACH_SIZE, gen.ld)
fake_data = gen(noise).detach()
fake_loss = dif(fake_data).binary_crossentropy_logits(fake)
real_loss = dif(x).binary_crossentropy_logits(real)
loss = (fake_loss + real_loss)/2
loss.backward()
opt.step()
difOpt.step()
return loss.numpy()
print("loading")
x = np.load("data.npz")["arr_0"]
#x= x[0:64]
run_name = f"tinygrad_autoencoder_{int(time.time())}"
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.start_run()
mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RATE, "data size":len(x)})
@TinyJit
def step_gen():
Tensor.training = True
real = Tensor.ones((BATCH_SIZE,1))
noise = Tensor.randn(BACH_SIZE, gen.ld)
fake_data = gen(noise).detach()
loss = dif(fake_data).binary_crossentropy_logits(real)
loss.backward()
genOpt.step()
return loss.numpy()
show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default")
print("training")
pl = 0
eshape = (BATCH_SIZE, 1, 128, 431)
for epoch in range(0,EPOCHS):
print(f"\n--- Starting Epoch {epoch} ---\n")
loss=0
for i in range(0,len(x),BATCH_SIZE):
tx=Tensor(x[i:i+BATCH_SIZE])
eshape = (BACH_SIZE, 1, 128, 216)
mlflow.log_param("generator_learning_rate", glr)
mlflow.log_param("discim_learning_rate", dlr)
mlflow.log_param("epochs", epochs)
mlflow.log_param("train size", len(train))
mlflow.log_param("test size", len(test))
for e in range(0,epochs):
print(f"\n--- Starting Epoch {e} ---\n")
dl=0
gl=0
for i in range(0,size,BACH_SIZE):
tx=Tensor(train[i:i+BACH_SIZE])
if(tx.shape != eshape):
continue
loss += step_gen(tx)
#steps
dl+=step_dis(tx)
gl+=step_gen()
loss /= (len(x)/BATCH_SIZE)
if epoch%5==0:
noise = Tensor.rand_like(Tensor(x[0:1])).tanh()
y = Tensor(x[0:1]) + (noise*volume)
show.logSpec(mdl(y).numpy()[0][0],epoch)
if(pl - loss < 0.03 and epoch > 25):
show.logSpec(y.numpy()[0][0],f"volume_{volume}")
volume *= 2
pl = loss
dl /= (size/BACH_SIZE)
gl /= (size/BACH_SIZE)
if e%5==0:
noise = Tensor.randn(BACH_SIZE, gen.ld)
show.logSpec(gen(noise).numpy()[0][0],e)
#todo test on test data
mlflow.log_metric("gen_loss", gl, step=e)
mlflow.log_metric("dis_loss", dl, step=e)
print(f"loss of gen:{gl} dis:{dl}")
mlflow.log_metric("volume", volume, step=epoch)
mlflow.log_metric("loss", loss, step=epoch)
print(f"loss of {loss}")
#save
noise = Tensor.randn(BACH_SIZE, gen.ld)
show.logSpec(gen(noise).numpy()[0][0],epochs)
from tinygrad.nn.state import safe_save, get_state_dict
safe_save(get_state_dict(gen),"music.safetensors")
mlflow.log_artifact("music.safetensors")