forked from FlorianMarquardt/machine-learning-for-physicists
-
Notifications
You must be signed in to change notification settings - Fork 0
/
03_MultiLayer_ImageCompression.py
355 lines (249 loc) · 10.6 KB
/
03_MultiLayer_ImageCompression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# coding: utf-8
# # Training a network on representing an image (pure python) and interpreting the role of neurons
# Example code for the lecture series "Machine Learning for Physicists" by Florian Marquardt
#
# Lecture 3, Code shown in the lecture
#
# See https://machine-learning-for-physicists.org and the current course website linked there!
# This notebook shows how to:
# - train a neural network trying to approximate an image
# - 'opening the box': visualize what happens when some neurons are switched off
#
# This uses the python backpropagation code shown in lecture 2
# Note on programming style: I wanted to keep the code as close as possible to the backpropagation code in the lecture, which uses global variables (e.g. for the weights) for simplicity. However, if you wanted to nicely encapsulate this for usage by other people in larger projects, you would either (1) always make sure to pass along these variables as arguments (and never make them global) or (2) use object-oriented programming and turn this into a network class, which keeps these variables internally.
# ### Imports: only numpy and matplotlib
# In[129]:
from numpy import array, zeros, exp, random, dot, shape, transpose, reshape, meshgrid, linspace, sqrt
from scipy import ndimage # for image loading/processing
import imageio
import matplotlib.pyplot as plt # for plotting
import matplotlib
matplotlib.rcParams['figure.dpi']=300 # highres display
# for updating display
# (very simple animation)
from IPython.display import clear_output
from time import sleep
# # Backpropagation
# ## Implement backpropagation for a general (fully connected) network
# In[120]:
# implement a ReLU unit (rectified linear), which
# works better for training in this case
# def net_f_df(z): # calculate f(z) and f'(z)
# val=z*(z>0)
# return(val,z>0) # return both f and f'
def net_f_df(z):
return([1/(1+exp(-z)),
1/((1+exp(-z))*(1+exp(z))) ]) # sigmoid
def forward_step(y,w,b): # calculate values in next layer, from input y
z=dot(y,w)+b # w=weights, b=bias vector for next layer
return(net_f_df(z)) # apply nonlinearity and return result
def apply_net(y_in): # one forward pass through the network
global Weights, Biases, NumLayers
global y_layer, df_layer # for storing y-values and df/dz values
y=y_in # start with input values
y_layer[0]=y
for j in range(NumLayers): # loop through all layers [not counting input]
# j=0 corresponds to the first layer above the input
y,df=forward_step(y,Weights[j],Biases[j]) # one step, into layer j
df_layer[j]=df # store f'(z) [needed later in backprop]
y_layer[j+1]=y # store f(z) [also needed in backprop]
return(y)
def apply_net_simple(y_in): # one forward pass through the network
# no storage for backprop (this is used for simple tests)
y=y_in # start with input values
y_layer[0]=y
for j in range(NumLayers): # loop through all layers
# j=0 corresponds to the first layer above the input
y,df=forward_step(y,Weights[j],Biases[j]) # one step
return(y)
def backward_step(delta,w,df):
# delta at layer N, of batchsize x layersize(N))
# w between N-1 and N [layersize(N-1) x layersize(N) matrix]
# df = df/dz at layer N-1, of batchsize x layersize(N-1)
return( dot(delta,transpose(w))*df )
def backprop(y_target): # one backward pass through the network
# the result will be the 'dw_layer' matrices that contain
# the derivatives of the cost function with respect to
# the corresponding weight
global y_layer, df_layer, Weights, Biases, NumLayers
global dw_layer, db_layer # dCost/dw and dCost/db (w,b=weights,biases)
global batchsize
delta=(y_layer[-1]-y_target)*df_layer[-1]
dw_layer[-1]=dot(transpose(y_layer[-2]),delta)/batchsize
db_layer[-1]=delta.sum(0)/batchsize
for j in range(NumLayers-1):
delta=backward_step(delta,Weights[-1-j],df_layer[-2-j])
dw_layer[-2-j]=dot(transpose(y_layer[-3-j]),delta)/batchsize
db_layer[-2-j]=delta.sum(0)/batchsize
def gradient_step(eta): # update weights & biases (after backprop!)
global dw_layer, db_layer, Weights, Biases
for j in range(NumLayers):
Weights[j]-=eta*dw_layer[j]
Biases[j]-=eta*db_layer[j]
def train_net(y_in,y_target,eta): # one full training batch
# y_in is an array of size batchsize x (input-layer-size)
# y_target is an array of size batchsize x (output-layer-size)
# eta is the stepsize for the gradient descent
global y_out_result
y_out_result=apply_net(y_in)
backprop(y_target)
gradient_step(eta)
cost=0.5*((y_target-y_out_result)**2).sum()/batchsize
return(cost)
# ### Train net to reproduce a 2D function
# In[121]:
# load the pixel image!
face =imageio.imread('Smiley.png')
pixel_image=transpose(face[:,:,0]) # have to transpose...
pixel_image=pixel_image[:,::-1] # and flip... to get the right view!
pixel_image-=pixel_image.min()
pixel_image=(pixel_image.astype(dtype='float'))/pixel_image.max() # normalize between 0 and 1!
Npixels=shape(pixel_image)[0] # assuming a square image!
# In[122]:
# the function we want to have (desired outcome)
# this picks the pixels from the image
def myFunc(x0,x1):
global pixel_image, Npixels
# convert to integer coordinates (assuming input is 0..1)
x0int=(x0*Npixels*0.9999).astype(dtype='int')
x1int=(x1*Npixels*0.9999).astype(dtype='int')
return(pixel_image[x0int,x1int]) # extract color values at these pixels
# In[123]:
# check that this works:
Npixels_Test=50 # do the test output on a low-res grid! (saves time)
xrange=linspace(0,1,Npixels_Test)
X0,X1=meshgrid(xrange,xrange)
plt.imshow(myFunc(X0,X1),interpolation='nearest',origin='lower')
plt.axis('off')
plt.colorbar()
plt.show()
# In[124]:
# pick 'batchsize' random positions in the 2D square
def make_batch():
global batchsize
inputs=random.uniform(low=0,high=1,size=[batchsize,2])
targets=zeros([batchsize,1]) # must have right dimensions
targets[:,0]=myFunc(inputs[:,0],inputs[:,1])
return(inputs,targets)
# In[156]:
# set up all the weights and biases
NumLayers=3 # does not count input-layer (but does count output)
LayerSizes=[2,150,100,1] # input-layer,hidden-1,hidden-2,...,output-layer
Weights=[5*(1./sqrt(LayerSizes[j]))*random.randn(LayerSizes[j],LayerSizes[j+1]) for j in range(NumLayers)]
Biases=[zeros(LayerSizes[j+1]) for j in range(NumLayers)]
# set up all the helper variables
y_layer=[zeros(LayerSizes[j]) for j in range(NumLayers+1)]
df_layer=[zeros(LayerSizes[j+1]) for j in range(NumLayers)]
dw_layer=[zeros([LayerSizes[j],LayerSizes[j+1]]) for j in range(NumLayers)]
db_layer=[zeros(LayerSizes[j+1]) for j in range(NumLayers)]
# a 'test' batch that includes all the points on the image grid
test_batchsize=shape(X0)[0]*shape(X0)[1]
testsample=zeros([test_batchsize,2])
testsample[:,0]=X0.flatten()
testsample[:,1]=X1.flatten()
# check the output of this net
testoutput=apply_net_simple(testsample)
# show this!
myim=plt.imshow(reshape(testoutput,shape(X0)),origin='lower',interpolation='nearest')
plt.axis('off')
plt.colorbar()
plt.show()
# In[83]:
# define the batchsize
batchsize=1000
# Now: the training! (and plot the cost function)
eta=.01
batches=2000
costs=zeros(batches)
for k in range(batches):
y_in,y_target=make_batch()
costs[k]=train_net(y_in,y_target,eta)
plt.plot(costs)
plt.show()
# In[84]:
# a 'test' batch that includes all the points on the image grid
test_batchsize=shape(X0)[0]*shape(X0)[1]
testsample=zeros([test_batchsize,2])
testsample[:,0]=X0.flatten()
testsample[:,1]=X1.flatten()
# check the output of this net
testoutput=apply_net_simple(testsample)
# show this!
myim=plt.imshow(reshape(testoutput,shape(X0)),origin='lower',interpolation='nearest',vmin=0.0,vmax=1.0)
plt.axis('off')
plt.colorbar()
plt.show()
# # Animate learning progress
#
# You need about a million or so samples to get something reasonable looking
# i.e. at least resembling the shape
#
# EXERCISE: find better parameters, for weight initialization, for learning rate, for batchsize etc., to reduce the number of samples needed!
#
#
# In[ ]:
# set up all the weights and biases
NumLayers=3 # does not count input-layer (but does count output)
LayerSizes=[2,150,100,1] # input-layer,hidden-1,hidden-2,...,output-layer
Weights=[5*(1./sqrt(LayerSizes[j]))*random.randn(LayerSizes[j],LayerSizes[j+1]) for j in range(NumLayers)]
Biases=[zeros(LayerSizes[j+1]) for j in range(NumLayers)]
# set up all the helper variables
y_layer=[zeros(LayerSizes[j]) for j in range(NumLayers+1)]
df_layer=[zeros(LayerSizes[j+1]) for j in range(NumLayers)]
dw_layer=[zeros([LayerSizes[j],LayerSizes[j+1]]) for j in range(NumLayers)]
db_layer=[zeros(LayerSizes[j+1]) for j in range(NumLayers)]
# In[180]:
# test sample
Npixels_Test=30 # do the test output on a low-res grid! (saves time)
xrange=linspace(0,1,Npixels_Test)
X0,X1=meshgrid(xrange,xrange)
test_batchsize=shape(X0)[0]*shape(X0)[1]
testsample=zeros([test_batchsize,2])
testsample[:,0]=X0.flatten()
testsample[:,1]=X1.flatten()
# parameters
eta=1.0
nsteps=10000
nskip_steps=100
batchsize=200
samples_count=0
costs=zeros(nsteps)
for j in range(nsteps):
y_in,y_target=make_batch()
costs[j]=train_net(y_in,y_target,eta)
samples_count+=batchsize
testoutput=apply_net_simple(testsample)
if j%nskip_steps==0: # time to plot again!
clear_output(wait=True)
fig,ax = plt.subplots(ncols=2,nrows=1,figsize=(10,5))
img=ax[1].imshow(reshape(testoutput,shape(X0)),origin='lower',interpolation='nearest',vmin=0)
fig.colorbar(img,ax=ax[1])
ax[1].axis('off') # no axes
ax[0].plot(costs)
plt.show()
AllOldWeights=Weights # backup all weights
OldWeights=Weights[-1] # especially of last layer, which we will modify below!
# In[181]:
samples_count
# In[25]:
# save the resulting network
#savez_compressed("ImageCompression_Network_Smiley.npz",LayerSizes,Weights,Biases)
# In[182]:
# switch on only individual neurons of last hidden layer
# and plot the resulting pictures in a big 10x10 array!
#data=load("ImageCompression_Network_Smiley.npz")
#Weights=data['arr_1']
Nrow=10
BigImage=zeros([Nrow*Npixels_Test,Nrow*Npixels_Test])
for which in range(100):
Weights[-1]=OldWeights.copy()
Weights[-1][0:which-1,:]=0
Weights[-1][which+1:-1,:]=0
testoutput=apply_net_simple(testsample)
row=int(which/Nrow)
column=which%Nrow
BigImage[Npixels_Test*row:Npixels_Test*(row+1),Npixels_Test*column:Npixels_Test*(column+1)]=reshape(testoutput,shape(X0))
#print(row,column)
myim=plt.imshow(BigImage,origin='lower',interpolation='nearest',vmin=0.0,vmax=1.0)
plt.axis('off')
plt.show()