From df4cdc8e25a027628fd9ff158d34a68eb26d79bc Mon Sep 17 00:00:00 2001 From: k Date: Mon, 10 Nov 2025 22:34:17 -0500 Subject: [PATCH] playing with denoiseing --- model.py | 89 ++++++++++++++++++++++--------- train.py | 157 +++++++++++++++++++++---------------------------------- 2 files changed, 125 insertions(+), 121 deletions(-) diff --git a/model.py b/model.py index 08b9683..93add80 100644 --- a/model.py +++ b/model.py @@ -1,34 +1,73 @@ from tinygrad import Tensor, nn -class Gen: - def __init__(self, height=128, width=216, latent_dim=128): +class gen: + def __init__(self, input_channels=1, height=128, width=431, latent_dim=64): + self.height = height + self.width = width + self.latent_dim = latent_dim + self.w = width // 4 self.h = height // 4 - self.flat = 128 * self.h * self.w - self.ld = latent_dim - self.d1 = nn.Linear(latent_dim, self.flat) - self.d2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1) - self.d3 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1) - - def __call__(self, noise: Tensor) -> Tensor: - x = self.d1(noise).relu() - x = x.reshape(noise.shape[0], 128, self.h, self.w) - x = self.d2(x).relu() - x = self.d3(x) - return x.tanh() + self.h = 32 # Output height after 2 strides + self.w = 108 # Output width after 2 strides + self.flattened_size = 128 * self.h * self.w -class Check: - def __init__(self, height=128, width=216): - self.w = width // 4 - self.h = height // 4 - self.flat = 128 * self.h * self.w - self.e1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1) + self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1) self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) - self.out = nn.Linear(self.flat, 1) + + self.el = nn.Linear(self.flattened_size, self.latent_dim) + + self.q = nn.Linear(self.latent_dim,self.latent_dim) + self.k = nn.Linear(self.latent_dim,self.latent_dim) + self.v = nn.Linear(self.latent_dim,self.latent_dim) + + self.dl = nn.Linear(self.latent_dim, self.flattened_size) + + self.d1 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1) + self.d2 = nn.ConvTranspose2d(64, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1) def __call__(self, x: Tensor) -> Tensor: - x = self.e1(x).relu() - x = self.e2(x).relu() - x = x.reshape(x.shape[0], -1) - return self.out(x)#.sigmoid() + y, shape = self.encode(x) + z = self.atten(y) + return self.decode(z, shape) + + def encode(self, x: Tensor): + x = self.e1(x).leakyrelu() + x = self.e2(x).leakyrelu() + b, c, h, w = x.shape + + flattened_size = c * h * w + + + x = x.reshape(shape=(b, flattened_size)) + z = self.el(x) + return z, (c, h, w) + + def atten(self, x: Tensor): + q = self.q(x).relu() + k = self.k(x).relu() + v = self.v(x).relu() + return q.scaled_dot_product_attention(k,v) + + def decode(self, z: Tensor, shape): + x = self.dl(z).leakyrelu() + x = x.reshape(shape=(-1, 128, self.h, self.w)) + x = self.d1(x).leakyrelu() + x = self.d2(x).sigmoid() + + # Crop or pad to match input size + out_h, out_w = x.shape[2], x.shape[3] + if out_h > self.height: + x = x[:, :, :self.height, :] + elif out_h < self.height: + pad_h = self.height - out_h + x = x.pad2d((0, 0, 0, pad_h)) + + if out_w > self.width: + x = x[:, :, :, :self.width] + elif out_w < self.width: + pad_w = self.width - out_w + x = x.pad2d((0, pad_w, 0, 0)) + + return x diff --git a/train.py b/train.py index de503f4..14f6de3 100644 --- a/train.py +++ b/train.py @@ -1,106 +1,71 @@ -#!/usr/bin/env python -# coding: utf-8 -import data -import model as model -import show import mlflow import numpy as np -from tinygrad import nn,TinyJit,Tensor +from tinygrad import Device,Tensor,nn,TinyJit +import matplotlib.pyplot as plt +import time +import show +from model import gen +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE = 1e-5 +print(Device.DEFAULT) +mdl = gen() +opt = nn.optim.AdamW(nn.state.get_parameters(mdl), lr=LEARNING_RATE) +volume = 0.1 + +def spec_loss(pred, target, eps=1e-6): + # spectral convergence + sc = ((target - pred).square().sum()) ** 0.5 / ((target.square().sum()) ** 0.5 + eps) + # log magnitude difference + log_mag = ((target.abs() + eps).log() - (pred.abs() + eps).log()).abs().mean() + return sc + log_mag + + +@TinyJit +def step_gen(x): + Tensor.training = True + noise = Tensor.rand_like(x).tanh() + y = x+(noise*volume) + y = y.clamp(0,1) + loss = spec_loss(mdl(y),x) + opt.zero_grad() + loss.backward() + opt.step() + return loss.numpy() + +print("loading") +x = np.load("data.npz")["arr_0"] +#x= x[0:64] +run_name = f"tinygrad_autoencoder_{int(time.time())}" mlflow.set_tracking_uri("http://127.0.0.1:5000") -mlflow.start_run(experiment_id=804883409598823668) -#hyper -BACH_SIZE=32 -BATCH_SIZE=BACH_SIZE -glr=2e-4 -dlr=1e-5 -epochs=100 +mlflow.start_run() +mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RATE, "data size":len(x)}) +show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default") -#dataset -x = data.load() -size=len(x) -x_np = np.stack(x) -x_np = np.expand_dims(x_np, axis=1) -permutation = np.random.permutation(size) -x_np = x_np[permutation] - -train = x_np[30:] -test = x_np[0:30] - -print("Train:"+str(len(train))) -print("Test:"+str(len(test))) - - -#model -gen = model.Gen() -dif = model.Check() -genOpt = nn.optim.AdamW(nn.state.get_parameters(gen), lr=glr) -difOpt = nn.optim.AdamW(nn.state.get_parameters(dif), lr=dlr) - - -#train - -@TinyJit -def step_dis(x:Tensor): - Tensor.training = True - real = Tensor.ones((BATCH_SIZE,1)) - fake = Tensor.zeros((BACH_SIZE,1)) - noise = Tensor.randn(BACH_SIZE, gen.ld) - fake_data = gen(noise).detach() - fake_loss = dif(fake_data).binary_crossentropy_logits(fake) - real_loss = dif(x).binary_crossentropy_logits(real) - loss = (fake_loss + real_loss)/2 - loss.backward() - difOpt.step() - return loss.numpy() - -@TinyJit -def step_gen(): - Tensor.training = True - real = Tensor.ones((BATCH_SIZE,1)) - noise = Tensor.randn(BACH_SIZE, gen.ld) - fake_data = gen(noise).detach() - loss = dif(fake_data).binary_crossentropy_logits(real) - loss.backward() - genOpt.step() - return loss.numpy() - - -eshape = (BACH_SIZE, 1, 128, 216) - -mlflow.log_param("generator_learning_rate", glr) -mlflow.log_param("discim_learning_rate", dlr) -mlflow.log_param("epochs", epochs) -mlflow.log_param("train size", len(train)) -mlflow.log_param("test size", len(test)) -for e in range(0,epochs): - print(f"\n--- Starting Epoch {e} ---\n") - dl=0 - gl=0 - - for i in range(0,size,BACH_SIZE): - tx=Tensor(train[i:i+BACH_SIZE]) +print("training") +pl = 0 +eshape = (BATCH_SIZE, 1, 128, 431) +for epoch in range(0,EPOCHS): + print(f"\n--- Starting Epoch {epoch} ---\n") + loss=0 + for i in range(0,len(x),BATCH_SIZE): + tx=Tensor(x[i:i+BATCH_SIZE]) if(tx.shape != eshape): continue - #steps - dl+=step_dis(tx) - gl+=step_gen() + loss += step_gen(tx) - dl /= (size/BACH_SIZE) - gl /= (size/BACH_SIZE) - if e%5==0: - noise = Tensor.randn(BACH_SIZE, gen.ld) - show.logSpec(gen(noise).numpy()[0][0],e) - #todo test on test data - mlflow.log_metric("gen_loss", gl, step=e) - mlflow.log_metric("dis_loss", dl, step=e) - print(f"loss of gen:{gl} dis:{dl}") + loss /= (len(x)/BATCH_SIZE) + if epoch%5==0: + noise = Tensor.rand_like(Tensor(x[0:1])).tanh() + y = Tensor(x[0:1]) + (noise*volume) + show.logSpec(mdl(y).numpy()[0][0],epoch) + if(pl - loss < 0.03 and epoch > 25): + show.logSpec(y.numpy()[0][0],f"volume_{volume}") + volume *= 2 + pl = loss - -#save -noise = Tensor.randn(BACH_SIZE, gen.ld) -show.logSpec(gen(noise).numpy()[0][0],epochs) -from tinygrad.nn.state import safe_save, get_state_dict -safe_save(get_state_dict(gen),"music.safetensors") -mlflow.log_artifact("music.safetensors") + mlflow.log_metric("volume", volume, step=epoch) + mlflow.log_metric("loss", loss, step=epoch) + print(f"loss of {loss}")