-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_layer_mnist_reg.cc
124 lines (108 loc) · 3.84 KB
/
test_layer_mnist_reg.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include <iostream>
#include "leicht.hpp"
using namespace std;
unsigned int batchsize = 64;
double lr = 1e-1; // reference lr=1e-3
int maxiter = 1000;
int iepoch = 37800/batchsize;
int overfit = 10; // (DEBUG) let it overfit on howmany batches
int testevery = 100;
vector<double> validlosshist;
int
main(void)
{
cout << ">> Reading MNIST dataset" << endl;
Tensor<double> trainImages (37800, 784);
trainImages.setName("trainImages");
leicht_hdf5_read("mnist.th.h5", "/train/images", 0, 0, 37800, 784, trainImages.data);
Tensor<double> trainLabels (37800, 1);
trainLabels.setName("trainLabels");
leicht_hdf5_read("mnist.th.h5", "/train/labels", 0, 0, 37800, 1, trainLabels.data);
cout << ">> Reading MNIST validation dataset" << endl;
Tensor<double> valImages(4200, 784); valImages.setName("valImages");
leicht_hdf5_read("mnist.th.h5", "/val/images", 0, 0, 4200, 784, valImages.data);
Tensor<double> valLabels(4200, 1); valLabels.setName("valLabels");
leicht_hdf5_read("mnist.th.h5", "/val/labels", 0, 0, 4200, 1, valLabels.data);
cout << ">> Initialize Network" << endl;
Blob<double> image (batchsize, 784, "", false);
Blob<double> imageT (784, batchsize, "", false); imageT.setName("imageT");
Blob<double> label (1, batchsize, "", false); label.setName("label");
Blob<double> o1 (128, batchsize); o1.setName("o1");
Blob<double> o2 (128, batchsize); o2.setName("o2");
Blob<double> yhat (1, batchsize); yhat.setName("yhat");
Blob<double> loss (1); loss.setName("loss");
TransposeLayer<double> trans1;
LinearLayer<double> fc1 (128, 784);
ReluLayer<double> relu1;
LinearLayer<double> fc2 (128, 128);
ReluLayer<double> relu2;
LinearLayer<double> fc3 (1, 128);
MSELoss<double> loss1;
cout << ">> Start training" << endl;
for (int iteration = 0; iteration < maxiter; iteration++) {
cout << ">> Iteration " << iteration << "::" << endl;
// -- get batch
image.value.copy(
trainImages.data + (iteration%iepoch)*batchsize*784, batchsize*784);
label.value.copy(
trainLabels.data + (iteration%iepoch)*batchsize*1, batchsize*1);
image.value.scal_(1./255.);
// -- forward
trans1.forward(image, imageT);
fc1.forward(imageT, o1);
relu1.forward(o1, o1); // inplace relu
fc2.forward(o1, o2);
relu2.forward(o2, o2); // inplace relu
fc3.forward(o2, yhat);
loss1.forward(yhat, loss, label);
// -- zerograd
fc1.zeroGrad();
fc2.zeroGrad();
fc3.zeroGrad();
o1.zeroGrad();
o2.zeroGrad();
yhat.zeroGrad();
loss.zeroGrad();
// -- backward
loss1.backward(yhat, loss, label);
fc3.backward(o2, yhat);
relu2.backward(o2, o2);
fc2.backward(o1, o2);
relu1.backward(o1, o1); // inplace relu
fc1.backward(imageT, o1);
// -- report
loss1.report();
label.dump(true, false);
yhat.dump(true, false);
//fc1.dumpstat();
// update
fc1.update(lr);
fc2.update(lr);
if ((iteration+1)%testevery==0) {
cout << ">> Validate:" << endl;
vector<double> l;
for (int t = 0; t < 42; t++) {
// -- get batch
image.value.copy(valImages.data + t*batchsize*784, batchsize*784);
label.value.copy(valLabels.data + t*batchsize*1, batchsize*1);
image.value.scal_(1./255.);
// -- forward
trans1.forward(image, imageT);
fc1.forward(imageT, o1);
relu1.forward(o1, o1); // inplace relu
fc2.forward(o1, o2);
relu2.forward(o2, o2); // inplace relu
fc3.forward(o2, yhat);
loss1.forward(yhat, loss, label);
l.push_back(*loss.value.at(0));
}
//for (auto i : accuracy) cout << i << " " << endl;
double b = 0; for (auto i : l) b += i; b /= l.size();
cout << " * Loss: " << b << endl;
validlosshist.push_back(b);
}
}
// show history
for (auto i : validlosshist) cout << i << " "; cout << endl;
return 0;
}