4 changed files with 118 additions and 126 deletions
--- a/data.py
+++ b/data.py
@ -22,9 +22,6 @@ def process_file(file_path):
        end = start_pos + size
        if end <= sample_len:
            chunk = y[start_pos:end]
-            chunk = librosa.feature.melspectrogram(y=chunk, sr=SAMPLE_RATE)
-            chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+80)/80)
-
            #chunk = librosa.feature.melspectrogram(y=chunk,sr=SAMPLE_RATE)
            #chunk = ((librosa.amplitude_to_db(chunk,ref=np.max)+40)/40)
            file_chunks.append(chunk)
--- a/dataInit.py
+++ b/dataInit.py
@ -1,14 +1,13 @@
 import data
 import numpy as np

-x = data.load()
+x,y = data.dataset(data.load())
 size=len(x)
-print(size)

 x_np = np.stack(x)
 x_np = np.expand_dims(x_np, axis=1)

-#y_np = np.stack(y)
-#y_np = np.expand_dims(y_np, axis=1)
+y_np = np.stack(y)
+y_np = np.expand_dims(y_np, axis=1)

-np.savez_compressed("data",x_np)
+np.savez_compressed("data",x_np,y_np)
--- a/model.py
+++ b/model.py
@ -1,73 +1,34 @@
 from tinygrad import Tensor, nn

-class gen:
-    def __init__(self, input_channels=1, height=128, width=431, latent_dim=64):
-        self.height = height
-        self.width = width
-        self.latent_dim = latent_dim
-
+class Gen:
+    def __init__(self, height=128, width=216, latent_dim=128):
        self.w = width // 4
        self.h = height // 4
-        self.h = 32  # Output height after 2 strides
-        self.w = 108 # Output width after 2 strides
-        self.flattened_size = 128 * self.h * self.w
+        self.flat = 128 * self.h * self.w
+        self.ld = latent_dim
+        self.d1 = nn.Linear(latent_dim, self.flat)
+        self.d2 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
+        self.d3 = nn.ConvTranspose2d(64, 1, kernel_size=3, stride=2, padding=1, output_padding=1)
+
+    def __call__(self, noise: Tensor) -> Tensor:
+        x = self.d1(noise).relu()
+        x = x.reshape(noise.shape[0], 128, self.h, self.w)
+        x = self.d2(x).relu()
+        x = self.d3(x)
+        return x.tanh()


-        self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1)
+class Check:
+    def __init__(self, height=128, width=216):
+        self.w = width // 4
+        self.h = height // 4
+        self.flat = 128 * self.h * self.w
+        self.e1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1)
        self.e2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
-
-        self.el = nn.Linear(self.flattened_size, self.latent_dim)
-
-        self.q = nn.Linear(self.latent_dim,self.latent_dim)
-        self.k = nn.Linear(self.latent_dim,self.latent_dim)
-        self.v = nn.Linear(self.latent_dim,self.latent_dim)
-
-        self.dl = nn.Linear(self.latent_dim, self.flattened_size)
-
-        self.d1 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
-        self.d2 = nn.ConvTranspose2d(64, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
+        self.out = nn.Linear(self.flat, 1)

    def __call__(self, x: Tensor) -> Tensor:
-        y, shape = self.encode(x)
-        z = self.atten(y)
-        return self.decode(z, shape)
-
-    def encode(self, x: Tensor):
-        x = self.e1(x).leakyrelu()
-        x = self.e2(x).leakyrelu()
-        b, c, h, w = x.shape
-
-        flattened_size = c * h * w
-
-
-        x = x.reshape(shape=(b, flattened_size))
-        z = self.el(x)
-        return z, (c, h, w)
-
-    def atten(self, x: Tensor):
-        q = self.q(x).relu()
-        k = self.k(x).relu()
-        v = self.v(x).relu()
-        return q.scaled_dot_product_attention(k,v)
-
-    def decode(self, z: Tensor, shape):
-        x = self.dl(z).leakyrelu()
-        x = x.reshape(shape=(-1, 128, self.h, self.w))
-        x = self.d1(x).leakyrelu()
-        x = self.d2(x).sigmoid()
-
-        # Crop or pad to match input size
-        out_h, out_w = x.shape[2], x.shape[3]
-        if out_h > self.height:
-            x = x[:, :, :self.height, :]
-        elif out_h < self.height:
-            pad_h = self.height - out_h
-            x = x.pad2d((0, 0, 0, pad_h))
-
-        if out_w > self.width:
-            x = x[:, :, :, :self.width]
-        elif out_w < self.width:
-            pad_w = self.width - out_w
-            x = x.pad2d((0, pad_w, 0, 0))
-
-        return x
+        x = self.e1(x).relu()
+        x = self.e2(x).relu()
+        x = x.reshape(x.shape[0], -1)
+        return self.out(x)#.sigmoid()
--- a/train.py
+++ b/train.py
@ -1,71 +1,106 @@
+#!/usr/bin/env python
+# coding: utf-8
+import data
+import model as model
+import show
 import mlflow
 import numpy as np
-from tinygrad import Device,Tensor,nn,TinyJit
-import matplotlib.pyplot as plt
-import time
-import show
-from model import gen
+from tinygrad import nn,TinyJit,Tensor

-BATCH_SIZE = 16
-EPOCHS = 100
-LEARNING_RATE = 1e-5
-print(Device.DEFAULT)
-mdl = gen()
-opt = nn.optim.AdamW(nn.state.get_parameters(mdl), lr=LEARNING_RATE)
-volume = 0.1
+mlflow.set_tracking_uri("http://127.0.0.1:5000")
+mlflow.start_run(experiment_id=804883409598823668)
+#hyper
+BACH_SIZE=32
+BATCH_SIZE=BACH_SIZE
+glr=2e-4
+dlr=1e-5
+epochs=100

-def spec_loss(pred, target, eps=1e-6):
-    # spectral convergence
-    sc = ((target - pred).square().sum()) ** 0.5 / ((target.square().sum()) ** 0.5 + eps)
-    # log magnitude difference
-    log_mag = ((target.abs() + eps).log() - (pred.abs() + eps).log()).abs().mean()
-    return sc + log_mag

+#dataset
+x = data.load()
+size=len(x)
+x_np = np.stack(x)
+x_np = np.expand_dims(x_np, axis=1)
+permutation = np.random.permutation(size)
+x_np = x_np[permutation]
+
+train = x_np[30:]
+test = x_np[0:30]
+
+print("Train:"+str(len(train)))
+print("Test:"+str(len(test)))
+
+
+#model
+gen = model.Gen()
+dif = model.Check()
+genOpt = nn.optim.AdamW(nn.state.get_parameters(gen), lr=glr)
+difOpt = nn.optim.AdamW(nn.state.get_parameters(dif), lr=dlr)
+
+
+#train

@TinyJit
-def step_gen(x):
+def step_dis(x:Tensor):
    Tensor.training = True
-    noise = Tensor.rand_like(x).tanh()
-    y = x+(noise*volume)
-    y = y.clamp(0,1)
-    loss = spec_loss(mdl(y),x)
-    opt.zero_grad()
+    real = Tensor.ones((BATCH_SIZE,1))
+    fake = Tensor.zeros((BACH_SIZE,1))
+    noise = Tensor.randn(BACH_SIZE, gen.ld)
+    fake_data = gen(noise).detach()
+    fake_loss = dif(fake_data).binary_crossentropy_logits(fake)
+    real_loss = dif(x).binary_crossentropy_logits(real)
+    loss = (fake_loss + real_loss)/2
    loss.backward()
-    opt.step()
+    difOpt.step()
    return loss.numpy()

-print("loading")
-x = np.load("data.npz")["arr_0"]
-#x= x[0:64]
-run_name = f"tinygrad_autoencoder_{int(time.time())}"
-mlflow.set_tracking_uri("http://127.0.0.1:5000")
-mlflow.start_run()
-mlflow.log_params({"batch_size": BATCH_SIZE, "epochs": EPOCHS, "lr": LEARNING_RATE, "data size":len(x)})
+@TinyJit
+def step_gen():
+    Tensor.training = True
+    real = Tensor.ones((BATCH_SIZE,1))
+    noise = Tensor.randn(BACH_SIZE, gen.ld)
+    fake_data = gen(noise).detach()
+    loss = dif(fake_data).binary_crossentropy_logits(real)
+    loss.backward()
+    genOpt.step()
+    return loss.numpy()

-show.logSpec(Tensor(x[0:1]).numpy()[0][0],"default")

-print("training")
-pl = 0
-eshape = (BATCH_SIZE, 1, 128, 431)
-for epoch in range(0,EPOCHS):
-    print(f"\n--- Starting Epoch {epoch} ---\n")
-    loss=0
-    for i in range(0,len(x),BATCH_SIZE):
-        tx=Tensor(x[i:i+BATCH_SIZE])
+eshape = (BACH_SIZE, 1, 128, 216)
+
+mlflow.log_param("generator_learning_rate", glr)
+mlflow.log_param("discim_learning_rate", dlr)
+mlflow.log_param("epochs", epochs)
+mlflow.log_param("train size", len(train))
+mlflow.log_param("test size", len(test))
+for e in range(0,epochs):
+    print(f"\n--- Starting Epoch {e} ---\n")
+    dl=0
+    gl=0
+
+    for i in range(0,size,BACH_SIZE):
+        tx=Tensor(train[i:i+BACH_SIZE])
        if(tx.shape != eshape):
            continue
-        loss += step_gen(tx)
+        #steps
+        dl+=step_dis(tx)
+        gl+=step_gen()

-    loss /= (len(x)/BATCH_SIZE)
-    if epoch%5==0:
-        noise = Tensor.rand_like(Tensor(x[0:1])).tanh()
-        y = Tensor(x[0:1]) + (noise*volume)
-        show.logSpec(mdl(y).numpy()[0][0],epoch)
-        if(pl - loss < 0.03 and epoch > 25):
-            show.logSpec(y.numpy()[0][0],f"volume_{volume}")
-            volume *= 2
-    pl = loss
+    dl /= (size/BACH_SIZE)
+    gl /= (size/BACH_SIZE)
+    if e%5==0:
+        noise = Tensor.randn(BACH_SIZE, gen.ld)
+        show.logSpec(gen(noise).numpy()[0][0],e)
+    #todo test on test data
+    mlflow.log_metric("gen_loss", gl, step=e)
+    mlflow.log_metric("dis_loss", dl, step=e)
+    print(f"loss of gen:{gl} dis:{dl}")

-    mlflow.log_metric("volume", volume, step=epoch)
-    mlflow.log_metric("loss", loss, step=epoch)
-    print(f"loss of {loss}")
+
+#save
+noise = Tensor.randn(BACH_SIZE, gen.ld)
+show.logSpec(gen(noise).numpy()[0][0],epochs)
+from tinygrad.nn.state import safe_save, get_state_dict
+safe_save(get_state_dict(gen),"music.safetensors")
+mlflow.log_artifact("music.safetensors")