cleanup/rewrite
This commit is contained in:
parent
f1abc67462
commit
689e4df4aa
65
data.py
Normal file
65
data.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
from pathlib import Path
|
||||||
|
from multiprocessing import Pool, cpu_count
|
||||||
|
|
||||||
|
SAMPLE_RATE = 22050
|
||||||
|
|
||||||
|
def process_file(file_path):
|
||||||
|
"""
|
||||||
|
Load 10 second chunks single song.
|
||||||
|
"""
|
||||||
|
y, sr = librosa.load(file_path, mono=True, sr=SAMPLE_RATE)
|
||||||
|
size = int(SAMPLE_RATE * 10)
|
||||||
|
sample_len = len(y)
|
||||||
|
|
||||||
|
file_chunks = []
|
||||||
|
for start_pos in range(0, sample_len, size):
|
||||||
|
end = start_pos + size
|
||||||
|
if end <= sample_len:
|
||||||
|
chunk = y[start_pos:end]
|
||||||
|
file_chunks.append(chunk)
|
||||||
|
return file_chunks
|
||||||
|
|
||||||
|
def load():
|
||||||
|
"""
|
||||||
|
Load 10 second chunks of songs.
|
||||||
|
"""
|
||||||
|
audio = []
|
||||||
|
files = list(Path("./data/").glob("*.mp3"))
|
||||||
|
with Pool(cpu_count()) as pool:
|
||||||
|
chunk_list = pool.map(process_file, files)
|
||||||
|
for l in chunk_list:
|
||||||
|
audio.extend(l)
|
||||||
|
return audio
|
||||||
|
|
||||||
|
def audio_split(audio):
|
||||||
|
"""
|
||||||
|
Split 10 seconds of audio to 2 5 second clips
|
||||||
|
"""
|
||||||
|
size = int(SAMPLE_RATE*5)
|
||||||
|
x = audio[:size]
|
||||||
|
y = audio[size:size*2]
|
||||||
|
|
||||||
|
x = librosa.feature.melspectrogram(y=x, sr=SAMPLE_RATE)
|
||||||
|
y = librosa.feature.melspectrogram(y=y, sr=SAMPLE_RATE)
|
||||||
|
|
||||||
|
ma,mi = x.max(), x.min()
|
||||||
|
x = (x - mi) / (ma - mi)
|
||||||
|
|
||||||
|
ma,mi = y.max(), y.min()
|
||||||
|
y = (y - mi) / (ma - mi)
|
||||||
|
|
||||||
|
return x,y
|
||||||
|
|
||||||
|
def detaset(chunks):
|
||||||
|
"""
|
||||||
|
convert 10 second chunks to dataset
|
||||||
|
"""
|
||||||
|
x,y=[],[]
|
||||||
|
with Pool(cpu_count()) as pool:
|
||||||
|
audio_list = pool.map(audio_split,chunks)
|
||||||
|
for (ax,ay) in audio_list:
|
||||||
|
x.append(ax)
|
||||||
|
y.append(ay)
|
||||||
|
return x,y
|
||||||
25
model.py
25
model.py
@ -1,11 +1,11 @@
|
|||||||
from tinygrad import Tensor, nn
|
from tinygrad import Tensor, nn
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
class Model:
|
class Gen:
|
||||||
def __init__(self, input_channels=1, height=128, width=216, latent_dim=32):
|
def __init__(self, input_channels=1, height=128, width=216, latent_dim=32):
|
||||||
self.w = width // 8
|
self.w = width // 8
|
||||||
self.h = height // 8
|
self.h = height // 8
|
||||||
self.flattened_size = 128 * self.h * self.w
|
self.flattened_size = 256 * self.h * self.w
|
||||||
|
|
||||||
# Encoder
|
# Encoder
|
||||||
self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1)
|
self.e1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1)
|
||||||
@ -61,5 +61,24 @@ class Model:
|
|||||||
x = x.reshape(shape=(-1, 256, self.h, self.w))
|
x = x.reshape(shape=(-1, 256, self.h, self.w))
|
||||||
x = self.d1(x).relu()
|
x = self.d1(x).relu()
|
||||||
x = self.d2(x).relu()
|
x = self.d2(x).relu()
|
||||||
x = self.d3(x)
|
x = self.d3(x).sigmoid()
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
class Check():
|
||||||
|
def __init__(self, input_channels=1, height=128, width=216):
|
||||||
|
self.w = width // 8
|
||||||
|
self.h = height // 8
|
||||||
|
self.flattened_size = 256 * self.h * self.w
|
||||||
|
|
||||||
|
self.d1 = nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1)
|
||||||
|
self.d2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
|
||||||
|
self.d3 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
|
||||||
|
self.fc = nn.Linear(self.flattened_size, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(self, x: Tensor) -> Tensor:
|
||||||
|
x = self.d1(x).leakyrelu(0.2)
|
||||||
|
x = self.d2(x).leakyrelu(0.2)
|
||||||
|
x = self.d3(x).leakyrelu(0.2)
|
||||||
|
x = x.reshape(shape=(-1, self.flattened_size))
|
||||||
|
return self.fc(x)
|
||||||
478
music.ipynb
478
music.ipynb
File diff suppressed because one or more lines are too long
26
show.py
Normal file
26
show.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import IPython.display as ipd
|
||||||
|
import librosa
|
||||||
|
|
||||||
|
|
||||||
|
SAMPLE_RATE = 22050
|
||||||
|
|
||||||
|
def showSpec(spec):
|
||||||
|
plt.figure(figsize=(10, 4))
|
||||||
|
librosa.display.specshow(spec, sr=SAMPLE_RATE,
|
||||||
|
x_axis='time', y_axis='mel',
|
||||||
|
cmap='viridis')
|
||||||
|
plt.colorbar(format='%+2.0f dB')
|
||||||
|
plt.title('Mel spectrogram')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
def playSpec(spec):
|
||||||
|
S = librosa.feature.inverse.mel_to_stft(spec, sr=SAMPLE_RATE)
|
||||||
|
audio = librosa.griffinlim(S,n_iter=25,momentum=0.99)
|
||||||
|
|
||||||
|
plt.figure(figsize=(12,4))
|
||||||
|
plt.plot(audio)
|
||||||
|
plt.title('waveform')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
display(ipd.Audio(audio,rate=SAMPLE_RATE))
|
||||||
163
train.ipynb
Normal file
163
train.ipynb
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%load_ext autoreload\n",
|
||||||
|
"%autoreload 2\n",
|
||||||
|
"\n",
|
||||||
|
"import data\n",
|
||||||
|
"import show\n",
|
||||||
|
"import model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x,y = data.detaset(data.load())\n",
|
||||||
|
"len(x)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "2",
|
||||||
|
"metadata": {
|
||||||
|
"editable": true,
|
||||||
|
"slideshow": {
|
||||||
|
"slide_type": ""
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sample=x[420]\n",
|
||||||
|
"show.showSpec(sample)\n",
|
||||||
|
"show.playSpec(sample)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from tinygrad import nn\n",
|
||||||
|
"gen = model.Gen()\n",
|
||||||
|
"optimizer = nn.optim.AdamW(nn.state.get_parameters(gen), lr=1e-4)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"@TinyJit\n",
|
||||||
|
"def jit_step(X: Tensor, Y: Tensor,epoch) -> Tensor:\n",
|
||||||
|
" Tensor.training = True\n",
|
||||||
|
" optimizer.zero_grad()\n",
|
||||||
|
" _, loss = gen.__Lcall__(X,Y,epoch)\n",
|
||||||
|
" loss.backward()\n",
|
||||||
|
" optimizer.step()\n",
|
||||||
|
" return loss.realize()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"x_np, y_np = np.stack(x), np.stack(y)\n",
|
||||||
|
"x_np = np.expand_dims(x_np, axis=1)\n",
|
||||||
|
"y_np = np.expand_dims(y_np, axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6",
|
||||||
|
"metadata": {
|
||||||
|
"editable": true,
|
||||||
|
"slideshow": {
|
||||||
|
"slide_type": ""
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"size=len(x)\n",
|
||||||
|
"BACH_SIZE=32\n",
|
||||||
|
"eshape = (BACH_SIZE, 1, 128, 216)\n",
|
||||||
|
"\n",
|
||||||
|
"for e in range(0,12):\n",
|
||||||
|
" print(f\"\\n--- Starting Epoch {e} ---\\n\")\n",
|
||||||
|
" l=0\n",
|
||||||
|
" \n",
|
||||||
|
" permutation = np.random.permutation(size)\n",
|
||||||
|
" x_np = x_np[permutation]\n",
|
||||||
|
" y_np = y_np[permutation]\n",
|
||||||
|
" \n",
|
||||||
|
" for i in range(0,size,BACH_SIZE):\n",
|
||||||
|
" tx,ty=Tensor(x_np[i:i+BACH_SIZE]),Tensor(y_np[i:i+BACH_SIZE])\n",
|
||||||
|
" if(tx.shape != eshape or ty.shape != eshape):\n",
|
||||||
|
" continue\n",
|
||||||
|
" l+=jit_step(tx,ty,e).numpy()\n",
|
||||||
|
" \n",
|
||||||
|
" l /= (size/BACH_SIZE)\n",
|
||||||
|
" print(f\"loss of {l}\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7",
|
||||||
|
"metadata": {
|
||||||
|
"editable": true,
|
||||||
|
"slideshow": {
|
||||||
|
"slide_type": ""
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from tinygrad.nn.state import safe_save, get_state_dict\n",
|
||||||
|
"safe_save(get_state_dict(gen),\"music.safetensors\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user