forked from FlorianMarquardt/machine-learning-for-physicists
-
Notifications
You must be signed in to change notification settings - Fork 0
/
09_Tutorial_BoltzmannMachine.py
208 lines (158 loc) · 5.91 KB
/
09_Tutorial_BoltzmannMachine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# coding: utf-8
# # Tutorial: Boltzmann Machines
#
# This shows how to train a Boltzmann machine, to sample from an observed probability distribution.
# Example code for the lecture series "Machine Learning for Physicists" by Florian Marquardt
#
# Lecture 9, Tutorial (this is discussed in session 9)
#
# See https://machine-learning-for-physicists.org and the current course website linked there!
#
# This notebook is distributed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license:
#
# https://creativecommons.org/licenses/by-sa/4.0/
# This notebook shows how to:
# - use a Boltzmann machine to sample from an observed high-dimensional probability distribution (e.g. produce images that look similar to observed training images)
#
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['figure.dpi']=300 # highres display
from IPython.display import clear_output
from time import sleep
# In[2]:
def BoltzmannStep(v,b,w):
"""
Perform a single step of the Markov chain,
going from visible units v to hidden units h,
according to biases b and weights w.
z_j = b_j + sum_i v_i w_ij
and P(h_j=1|v) = 1/(exp(-z_j)+1)
Note: you can go from h to v, by inserting
instead of v the h, instead of b the a, and
instead of w the transpose of w
"""
batchsize=np.shape(v)[0]
hidden_dim=np.shape(w)[1]
z=b+np.dot(v,w)
P=1/(np.exp(-z)+1)
# now, the usual trick to obtain 0 or 1 according
# to a given probability distribution:
# just produce uniform (in [0,1]) random numbers and
# check whether they are below the cutoff given by P
p=np.random.uniform(size=[batchsize,hidden_dim])
return(np.array(p<=P,dtype='int'))
def BoltzmannSequence(v,a,b,w,drop_h_prime=False):
"""
Perform one sequence of steps v -> h -> v' -> h'
of a Boltzmann machine, with the given
weights w and biases a and b!
All the arrays have a shape [batchsize,num_neurons]
(where num_neurons is num_visible for v and
num_hidden for h)
You can set drop_h_prime to True if you want to
use this routine to generate arbitrarily long sequences
by calling it repeatedly (then don't use h')
Returns: v,h,v',h'
"""
h=BoltzmannStep(v,b,w)
v_prime=BoltzmannStep(h,a,np.transpose(w))
if not drop_h_prime:
h_prime=BoltzmannStep(v_prime,b,w)
else:
h_prime=np.zeros(np.shape(h))
return(v,h,v_prime,h_prime)
def trainStep(v,a,b,w):
"""
Given a set of randomly selected training samples
v (of shape [batchsize,num_neurons_visible]),
and given biases a,b and weights w: update
those biases and weights according to the
contrastive-divergence update rules:
delta w_ij = eta ( <v_i h_j> - <v'_i h'_j> )
delta a_i = eta ( <v_i> - <v'_i>)
delta b_j = eta ( <h_j> - <h'_j>)
Returns delta_a, delta_b, delta_w, but without the eta factor!
It is up to you to update a,b,w!
"""
v,h,v_prime,h_prime=BoltzmannSequence(v,a,b,w)
return( np.average(v,axis=0)-np.average(v_prime,axis=0) ,
np.average(h,axis=0)-np.average(h_prime,axis=0) ,
np.average(v[:,:,None]*h[:,None,:],axis=0)-
np.average(v_prime[:,:,None]*h_prime[:,None,:],axis=0) )
# In[3]:
def produce_samples_random_segment(batchsize,num_visible,max_distance=3):
"""
Produce 'batchsize' samples, of length num_visible.
Returns array v of shape [batchsize,num_visible]
This here: produces randomly placed segments of
size 2*max_distance.
"""
random_pos=num_visible*np.random.uniform(size=batchsize)
j=np.arange(0,num_visible)
return( np.array( np.abs(j[None,:]-random_pos[:,None])<=max_distance, dtype='int' ) )
# In[4]:
# Now: the training
num_visible=20
num_hidden=10
batchsize=50
eta=0.1
nsteps=500
skipsteps=10
a=np.random.randn(num_visible)
b=np.random.randn(num_hidden)
w=np.random.randn(num_visible,num_hidden)
test_samples=np.zeros([num_visible,nsteps])
for j in range(nsteps):
v=produce_samples_random_segment(batchsize,num_visible)
da,db,dw=trainStep(v,a,b,w)
a+=eta*da
b+=eta*db
w+=eta*dw
test_samples[:,j]=v[0,:]
if j%skipsteps==0 or j==nsteps-1:
clear_output(wait=True)
plt.figure(figsize=(10,3))
plt.imshow(test_samples,origin='lower',aspect='auto',interpolation='none')
plt.axis('off')
plt.show()
# In[5]:
# Now: visualize the typical samples generated (from some starting point)
# run several times to continue this. It basically is a random walk
# through the space of all possible configurations, hopefully according
# to the probability distribution that has been trained!
nsteps=100
test_samples=np.zeros([num_visible,nsteps])
v_prime=np.zeros(num_visible)
h=np.zeros(num_hidden)
h_prime=np.zeros(num_hidden)
for j in range(nsteps):
v,h,v_prime,h_prime=BoltzmannSequence(v,a,b,w,drop_h_prime=True) # step from v via h to v_prime!
test_samples[:,j]=v[0,:]
v=np.copy(v_prime) # use the new v as a starting point for next step!
if j%skipsteps==0 or j==nsteps-1:
clear_output(wait=True)
plt.imshow(test_samples,origin='lower',interpolation='none')
plt.show()
# In[6]:
# Now show the weight matrix
plt.figure(figsize=(0.2*num_visible,0.2*num_hidden))
plt.imshow(w,origin='lower',aspect='auto')
plt.show()
# # Tutorial: train on differently shaped samples (e.g. a bar with both random position and random width)
#
# ...or two bars, located always at the same distance!
# # Homework: train on MNIST images!
#
# Here the visible units must be a flattened version of the images, something like
#
# v=np.reshape(image,[batchsize,widh*height])
#
# ...if image is of shape [batchsize,width,height], meaning an array holding the pixel values for all images in a batch!
#
# ...to plot the results, you would use
#
# np.reshape(v,[batchsize,width,height])
#
# to get back to the image format!