Compare commits

..

2 Commits

Author SHA1 Message Date
k
df4cdc8e25 playing with denoiseing 2025-11-10 22:34:17 -05:00
k
c84c100cb8 updated data 2025-11-08 00:10:50 -05:00
4 changed files with 133 additions and 125 deletions

View File

@ -22,6 +22,9 @@ def process_file(file_path):
end = start_pos + size
if end <= sample_len:
chunk = y[start_pos:end]
chunk = librosa.feature.melspectrogram(y=chunk, sr=SAMPLE_RATE)
chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+80)/80)
#chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE)
#chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40)
file_chunks.append(chunk)

View File

@ -1,13 +1,14 @@
import data
import numpy as np
x,y = data.dataset(data.load())
x = data.load()
size=len(x)
print(size)
x_np = np.stack(x)
x_np = np.expand_dims(x_np, axis=1)
y_np = np.stack(y)
y_np = np.expand_dims(y_np, axis=1)
#y_np = np.stack(y)
#y_np = np.expand_dims(y_np, axis=1)
np.savez_compressed("data",x_np,y_np)
np.savez_compressed("data",x_np)

View File

@ -1,34 +1,73 @@
from tinygrad import Tensor, nn
class Gen:
def __init__(self, height=128, width=216, latent_dim=128):
class gen:
def __init__(self, input_channels=1, height=128, width=431, latent_dim=64):
self.height = height
self.width = width
self.latent_dim = latent_dim
self.w = width // 4
self.h = height // 4
self.flat = 128 * self.h * self.w
self.ld = latent_dim
self.d1 = nn.Linear(latent_dim, self.flat)
self.d2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d3 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1)
def __call__(self, noise: Tensor) -> Tensor:
x = self.d1(noise).relu()
x = x.reshape(noise.shape[0], 128, self.h, self.w)
x = self.d2(x).relu()
x = self.d3(x)
return x.tanh()
self.h = 32 # Output height after 2 strides
self.w = 108 # Output width after 2 strides
self.flattened_size = 128 * self.h * self.w
class Check:
def __init__(self, height=128, width=216):
self.w = width // 4
self.h = height // 4
self.flat = 128 * self.h * self.w
self.e1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1)
self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1)
self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
self.out = nn.Linear(self.flat, 1)
self.el = nn.Linear(self.flattened_size, self.latent_dim)
self.q = nn.Linear(self.latent_dim,self.latent_dim)
self.k = nn.Linear(self.latent_dim,self.latent_dim)
self.v = nn.Linear(self.latent_dim,self.latent_dim)
self.dl = nn.Linear(self.latent_dim, self.flattened_size)
self.d1 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d2 = nn.ConvTranspose2d(64, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
def __call__(self, x: Tensor) -> Tensor:
x = self.e1(x).relu()
x = self.e2(x).relu()
x = x.reshape(x.shape[0], -1)
return self.out(x)#.sigmoid()
y, shape = self.encode(x)
z = self.atten(y)
return self.decode(z, shape)
def encode(self, x: Tensor):
x = self.e1(x).leakyrelu()
x = self.e2(x).leakyrelu()
b, c, h, w = x.shape
flattened_size = c * h * w
x = x.reshape(shape=(b, flattened_size))
z = self.el(x)
return z, (c, h, w)
def atten(self, x: Tensor):
q = self.q(x).relu()
k = self.k(x).relu()
v = self.v(x).relu()
return q.scaled_dot_product_attention(k,v)
def decode(self, z: Tensor, shape):
x = self.dl(z).leakyrelu()
x = x.reshape(shape=(-1, 128, self.h, self.w))
x = self.d1(x).leakyrelu()
x = self.d2(x).sigmoid()
# Crop or pad to match input size
out_h, out_w = x.shape[2], x.shape[3]
if out_h > self.height:
x = x[:, :, :self.height, :]
elif out_h < self.height:
pad_h = self.height - out_h
x = x.pad2d((0, 0, 0, pad_h))
if out_w > self.width:
x = x[:, :, :, :self.width]
elif out_w < self.width:
pad_w = self.width - out_w
x = x.pad2d((0, pad_w, 0, 0))
return x

157
train.py
View File

@ -1,106 +1,71 @@
#!/usr/bin/env python
# coding: utf-8
import data
import model as model
import show
import mlflow
import numpy as np
from tinygrad import nn,TinyJit,Tensor
from tinygrad import Device,Tensor,nn,TinyJit
import matplotlib.pyplot as plt
import time
import show
from model import gen
BATCH_SIZE = 16
EPOCHS = 100
LEARNING_RATE = 1e-5
print(Device.DEFAULT)
mdl = gen()
opt = nn.optim.AdamW(nn.state.get_parameters(mdl), lr=LEARNING_RATE)
volume = 0.1
def spec_loss(pred, target, eps=1e-6):
# spectral convergence
sc = ((target - pred).square().sum()) ** 0.5 / ((target.square().sum()) ** 0.5 + eps)
# log magnitude difference
log_mag = ((target.abs() + eps).log() - (pred.abs() + eps).log()).abs().mean()
return sc + log_mag
@TinyJit
def step_gen(x):
Tensor.training = True
noise = Tensor.rand_like(x).tanh()
y = x+(noise*volume)
y = y.clamp(0,1)
loss = spec_loss(mdl(y),x)
opt.zero_grad()
loss.backward()
opt.step()
return loss.numpy()
print("loading")
x = np.load("data.npz")["arr_0"]
#x= x[0:64]
run_name = f"tinygrad_autoencoder_{int(time.time())}"
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.start_run(experiment_id=804883409598823668)
#hyper
BACH_SIZE=32
BATCH_SIZE=BACH_SIZE
glr=2e-4
dlr=1e-5
epochs=100
mlflow.start_run()
mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RATE, "data size":len(x)})
show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default")
#dataset
x = data.load()
size=len(x)
x_np = np.stack(x)
x_np = np.expand_dims(x_np, axis=1)
permutation = np.random.permutation(size)
x_np = x_np[permutation]
train = x_np[30:]
test = x_np[0:30]
print("Train:"+str(len(train)))
print("Test:"+str(len(test)))
#model
gen = model.Gen()
dif = model.Check()
genOpt = nn.optim.AdamW(nn.state.get_parameters(gen), lr=glr)
difOpt = nn.optim.AdamW(nn.state.get_parameters(dif), lr=dlr)
#train
@TinyJit
def step_dis(x:Tensor):
Tensor.training = True
real = Tensor.ones((BATCH_SIZE,1))
fake = Tensor.zeros((BACH_SIZE,1))
noise = Tensor.randn(BACH_SIZE, gen.ld)
fake_data = gen(noise).detach()
fake_loss = dif(fake_data).binary_crossentropy_logits(fake)
real_loss = dif(x).binary_crossentropy_logits(real)
loss = (fake_loss + real_loss)/2
loss.backward()
difOpt.step()
return loss.numpy()
@TinyJit
def step_gen():
Tensor.training = True
real = Tensor.ones((BATCH_SIZE,1))
noise = Tensor.randn(BACH_SIZE, gen.ld)
fake_data = gen(noise).detach()
loss = dif(fake_data).binary_crossentropy_logits(real)
loss.backward()
genOpt.step()
return loss.numpy()
eshape = (BACH_SIZE, 1, 128, 216)
mlflow.log_param("generator_learning_rate", glr)
mlflow.log_param("discim_learning_rate", dlr)
mlflow.log_param("epochs", epochs)
mlflow.log_param("train size", len(train))
mlflow.log_param("test size", len(test))
for e in range(0,epochs):
print(f"\n--- Starting Epoch {e} ---\n")
dl=0
gl=0
for i in range(0,size,BACH_SIZE):
tx=Tensor(train[i:i+BACH_SIZE])
print("training")
pl = 0
eshape = (BATCH_SIZE, 1, 128, 431)
for epoch in range(0,EPOCHS):
print(f"\n--- Starting Epoch {epoch} ---\n")
loss=0
for i in range(0,len(x),BATCH_SIZE):
tx=Tensor(x[i:i+BATCH_SIZE])
if(tx.shape != eshape):
continue
#steps
dl+=step_dis(tx)
gl+=step_gen()
loss += step_gen(tx)
dl /= (size/BACH_SIZE)
gl /= (size/BACH_SIZE)
if e%5==0:
noise = Tensor.randn(BACH_SIZE, gen.ld)
show.logSpec(gen(noise).numpy()[0][0],e)
#todo test on test data
mlflow.log_metric("gen_loss", gl, step=e)
mlflow.log_metric("dis_loss", dl, step=e)
print(f"loss of gen:{gl} dis:{dl}")
loss /= (len(x)/BATCH_SIZE)
if epoch%5==0:
noise = Tensor.rand_like(Tensor(x[0:1])).tanh()
y = Tensor(x[0:1]) + (noise*volume)
show.logSpec(mdl(y).numpy()[0][0],epoch)
if(pl - loss < 0.03 and epoch > 25):
show.logSpec(y.numpy()[0][0],f"volume_{volume}")
volume *= 2
pl = loss
#save
noise = Tensor.randn(BACH_SIZE, gen.ld)
show.logSpec(gen(noise).numpy()[0][0],epochs)
from tinygrad.nn.state import safe_save, get_state_dict
safe_save(get_state_dict(gen),"music.safetensors")
mlflow.log_artifact("music.safetensors")
mlflow.log_metric("volume", volume, step=epoch)
mlflow.log_metric("loss", loss, step=epoch)
print(f"loss of {loss}")