-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmnist.py
62 lines (49 loc) · 2.06 KB
/
mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# %%
import pandas as pd
from tinygrad import nn, Tensor
from tinygrad.nn.optim import SGD
import numpy as np
# %%
# Splitting training and test sets
train_df = pd.read_csv("datasets/mnist-in-csv/mnist_train.csv")
test_df = pd.read_csv("datasets/mnist-in-csv/mnist_test.csv")
x_train = train_df.iloc[:, 1:].values # pixels
y_train = train_df.iloc[:, 0].values # labels
x_test = test_df.iloc[:, 1:].values # pixels
y_test = test_df.iloc[:, 0].values # labels
# %%
def activation(x: Tensor) -> Tensor:
return x.leakyrelu()
def sparse_categorical_crossentropy(self, Y, ignore_index=-1) -> Tensor:
loss_mask = Y != ignore_index
y_counter = Tensor.arange(self.shape[-1], requires_grad=False, device=self.device).unsqueeze(0).expand(Y.numel(), self.shape[-1])
y = ((y_counter == Y.flatten().reshape(-1, 1)).where(-1.0, 0) * loss_mask.reshape(-1, 1)).reshape(*Y.shape, self.shape[-1])
return self.log_softmax().mul(y).sum() / loss_mask.sum()
class Network:
def __init__(self):
self.layers = [nn.Linear(784, 128, bias=False), nn.Linear(128, 10, bias=False)]
self.opt = SGD([self.layers[0].weight, self.layers[1].weight], lr=3e-4)
def forward(self, x: Tensor):
x = self.layers[0](x)
x = activation(x)
x = self.layers[1](x)
return x
def train(self, epochs: int):
with Tensor.train():
for i in range(epochs):
# sample data
sample = np.random.randint(0, x_train.shape[0], size=64)
batch = Tensor(x_train[sample], requires_grad=False)
labels = Tensor(y_train[sample])
pred = self.forward(batch)
loss = sparse_categorical_crossentropy(pred, labels)
self.opt.zero_grad()
loss.backward()
self.opt.step()
Y = pred.argmax(axis=-1)
acc = (Y == labels).mean()
if i%100 == 0:
print(f"Step {i+1} | Loss: {loss.numpy()} | Accuracy: {acc.numpy()}")
net = Network()
#print(x_train[].shape)
net.train(1000)