Compare commits

...

2 Commits

Author SHA1 Message Date
k
df4cdc8e25 playing with denoiseing 2025-11-10 22:34:17 -05:00
k
c84c100cb8 updated data 2025-11-08 00:10:50 -05:00
4 changed files with 133 additions and 125 deletions

View File

@ -22,6 +22,9 @@ def process_file(file_path):
end = start_pos + size end = start_pos + size
if end <= sample_len: if end <= sample_len:
chunk = y[start_pos:end] chunk = y[start_pos:end]
chunk = librosa.feature.melspectrogram(y=chunk, sr=SAMPLE_RATE)
chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+80)/80)
#chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE) #chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE)
#chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40) #chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40)
file_chunks.append(chunk) file_chunks.append(chunk)

View File

@ -1,13 +1,14 @@
import data import data
import numpy as np import numpy as np
x,y = data.dataset(data.load()) x = data.load()
size=len(x) size=len(x)
print(size)
x_np = np.stack(x) x_np = np.stack(x)
x_np = np.expand_dims(x_np, axis=1) x_np = np.expand_dims(x_np, axis=1)
y_np = np.stack(y) #y_np = np.stack(y)
y_np = np.expand_dims(y_np, axis=1) #y_np = np.expand_dims(y_np, axis=1)
np.savez_compressed("data",x_np,y_np) np.savez_compressed("data",x_np)

View File

@ -1,34 +1,73 @@
from tinygrad import Tensor, nn from tinygrad import Tensor, nn
class Gen: class gen:
def __init__(self, height=128, width=216, latent_dim=128): def __init__(self, input_channels=1, height=128, width=431, latent_dim=64):
self.height = height
self.width = width
self.latent_dim = latent_dim
self.w = width // 4 self.w = width // 4
self.h = height // 4 self.h = height // 4
self.flat = 128 * self.h * self.w self.h = 32 # Output height after 2 strides
self.ld = latent_dim self.w = 108 # Output width after 2 strides
self.d1 = nn.Linear(latent_dim, self.flat) self.flattened_size = 128 * self.h * self.w
self.d2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d3 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1)
def __call__(self, noise: Tensor) -> Tensor:
x = self.d1(noise).relu()
x = x.reshape(noise.shape[0], 128, self.h, self.w)
x = self.d2(x).relu()
x = self.d3(x)
return x.tanh()
class Check: self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1)
def __init__(self, height=128, width=216):
self.w = width // 4
self.h = height // 4
self.flat = 128 * self.h * self.w
self.e1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1)
self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
self.out = nn.Linear(self.flat, 1)
self.el = nn.Linear(self.flattened_size, self.latent_dim)
self.q = nn.Linear(self.latent_dim,self.latent_dim)
self.k = nn.Linear(self.latent_dim,self.latent_dim)
self.v = nn.Linear(self.latent_dim,self.latent_dim)
self.dl = nn.Linear(self.latent_dim, self.flattened_size)
self.d1 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.d2 = nn.ConvTranspose2d(64, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
def __call__(self, x: Tensor) -> Tensor: def __call__(self, x: Tensor) -> Tensor:
x = self.e1(x).relu() y, shape = self.encode(x)
x = self.e2(x).relu() z = self.atten(y)
x = x.reshape(x.shape[0], -1) return self.decode(z, shape)
return self.out(x)#.sigmoid()
def encode(self, x: Tensor):
x = self.e1(x).leakyrelu()
x = self.e2(x).leakyrelu()
b, c, h, w = x.shape
flattened_size = c * h * w
x = x.reshape(shape=(b, flattened_size))
z = self.el(x)
return z, (c, h, w)
def atten(self, x: Tensor):
q = self.q(x).relu()
k = self.k(x).relu()
v = self.v(x).relu()
return q.scaled_dot_product_attention(k,v)
def decode(self, z: Tensor, shape):
x = self.dl(z).leakyrelu()
x = x.reshape(shape=(-1, 128, self.h, self.w))
x = self.d1(x).leakyrelu()
x = self.d2(x).sigmoid()
# Crop or pad to match input size
out_h, out_w = x.shape[2], x.shape[3]
if out_h > self.height:
x = x[:, :, :self.height, :]
elif out_h < self.height:
pad_h = self.height - out_h
x = x.pad2d((0, 0, 0, pad_h))
if out_w > self.width:
x = x[:, :, :, :self.width]
elif out_w < self.width:
pad_w = self.width - out_w
x = x.pad2d((0, pad_w, 0, 0))
return x

157
train.py
View File

@ -1,106 +1,71 @@
#!/usr/bin/env python
# coding: utf-8
import data
import model as model
import show
import mlflow import mlflow
import numpy as np import numpy as np
from tinygrad import nn,TinyJit,Tensor from tinygrad import Device,Tensor,nn,TinyJit
import matplotlib.pyplot as plt
import time
import show
from model import gen
BATCH_SIZE = 16
EPOCHS = 100
LEARNING_RATE = 1e-5
print(Device.DEFAULT)
mdl = gen()
opt = nn.optim.AdamW(nn.state.get_parameters(mdl), lr=LEARNING_RATE)
volume = 0.1
def spec_loss(pred, target, eps=1e-6):
# spectral convergence
sc = ((target - pred).square().sum()) ** 0.5 / ((target.square().sum()) ** 0.5 + eps)
# log magnitude difference
log_mag = ((target.abs() + eps).log() - (pred.abs() + eps).log()).abs().mean()
return sc + log_mag
@TinyJit
def step_gen(x):
Tensor.training = True
noise = Tensor.rand_like(x).tanh()
y = x+(noise*volume)
y = y.clamp(0,1)
loss = spec_loss(mdl(y),x)
opt.zero_grad()
loss.backward()
opt.step()
return loss.numpy()
print("loading")
x = np.load("data.npz")["arr_0"]
#x= x[0:64]
run_name = f"tinygrad_autoencoder_{int(time.time())}"
mlflow.set_tracking_uri("http://127.0.0.1:5000") mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.start_run(experiment_id=804883409598823668) mlflow.start_run()
#hyper mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RATE, "data size":len(x)})
BACH_SIZE=32
BATCH_SIZE=BACH_SIZE
glr=2e-4
dlr=1e-5
epochs=100
show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default")
#dataset print("training")
x = data.load() pl = 0
size=len(x) eshape = (BATCH_SIZE, 1, 128, 431)
x_np = np.stack(x) for epoch in range(0,EPOCHS):
x_np = np.expand_dims(x_np, axis=1) print(f"\n--- Starting Epoch {epoch} ---\n")
permutation = np.random.permutation(size) loss=0
x_np = x_np[permutation] for i in range(0,len(x),BATCH_SIZE):
tx=Tensor(x[i:i+BATCH_SIZE])
train = x_np[30:]
test = x_np[0:30]
print("Train:"+str(len(train)))
print("Test:"+str(len(test)))
#model
gen = model.Gen()
dif = model.Check()
genOpt = nn.optim.AdamW(nn.state.get_parameters(gen), lr=glr)
difOpt = nn.optim.AdamW(nn.state.get_parameters(dif), lr=dlr)
#train
@TinyJit
def step_dis(x:Tensor):
Tensor.training = True
real = Tensor.ones((BATCH_SIZE,1))
fake = Tensor.zeros((BACH_SIZE,1))
noise = Tensor.randn(BACH_SIZE, gen.ld)
fake_data = gen(noise).detach()
fake_loss = dif(fake_data).binary_crossentropy_logits(fake)
real_loss = dif(x).binary_crossentropy_logits(real)
loss = (fake_loss + real_loss)/2
loss.backward()
difOpt.step()
return loss.numpy()
@TinyJit
def step_gen():
Tensor.training = True
real = Tensor.ones((BATCH_SIZE,1))
noise = Tensor.randn(BACH_SIZE, gen.ld)
fake_data = gen(noise).detach()
loss = dif(fake_data).binary_crossentropy_logits(real)
loss.backward()
genOpt.step()
return loss.numpy()
eshape = (BACH_SIZE, 1, 128, 216)
mlflow.log_param("generator_learning_rate", glr)
mlflow.log_param("discim_learning_rate", dlr)
mlflow.log_param("epochs", epochs)
mlflow.log_param("train size", len(train))
mlflow.log_param("test size", len(test))
for e in range(0,epochs):
print(f"\n--- Starting Epoch {e} ---\n")
dl=0
gl=0
for i in range(0,size,BACH_SIZE):
tx=Tensor(train[i:i+BACH_SIZE])
if(tx.shape != eshape): if(tx.shape != eshape):
continue continue
#steps loss += step_gen(tx)
dl+=step_dis(tx)
gl+=step_gen()
dl /= (size/BACH_SIZE) loss /= (len(x)/BATCH_SIZE)
gl /= (size/BACH_SIZE) if epoch%5==0:
if e%5==0: noise = Tensor.rand_like(Tensor(x[0:1])).tanh()
noise = Tensor.randn(BACH_SIZE, gen.ld) y = Tensor(x[0:1]) + (noise*volume)
show.logSpec(gen(noise).numpy()[0][0],e) show.logSpec(mdl(y).numpy()[0][0],epoch)
#todo test on test data if(pl - loss < 0.03 and epoch > 25):
mlflow.log_metric("gen_loss", gl, step=e) show.logSpec(y.numpy()[0][0],f"volume_{volume}")
mlflow.log_metric("dis_loss", dl, step=e) volume *= 2
print(f"loss of gen:{gl} dis:{dl}") pl = loss
mlflow.log_metric("volume", volume, step=epoch)
#save mlflow.log_metric("loss", loss, step=epoch)
noise = Tensor.randn(BACH_SIZE, gen.ld) print(f"loss of {loss}")
show.logSpec(gen(noise).numpy()[0][0],epochs)
from tinygrad.nn.state import safe_save, get_state_dict
safe_save(get_state_dict(gen),"music.safetensors")
mlflow.log_artifact("music.safetensors")