-
Notifications
You must be signed in to change notification settings - Fork 37
/
stylenet_patch.py
executable file
·342 lines (292 loc) · 16.9 KB
/
stylenet_patch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
import time
import inspect
import os
import numpy as np
import skimage
import skimage.io
import skimage.transform
import tensorflow as tf
from neural_style import custom_vgg19
import stylenet_core
def get_filename(file):
return os.path.splitext(os.path.basename(file))[0]
def render(content_file, style_file,
content_region_file=None, style_region_file=None,
random_init=False, load_saved_mapping=True, load_trained_image=False, blur_mapping=True,
height=None, width=None,
content_ratio=0., style3_ratio=3., style4_ratio=1., gram_ratio=0.001, diff_ratio=0.,
epochs=300, output_file="./train/output%d.jpg"):
"""
Render the synthesis with single generation.
- Best used if style has high similarity with the content
- If any ratio is set to 0, the corresponding Tensor will not be generated
- Pure Gram Matrix synthesis is best for painting abstract style. (gram_ratio = 1 and all others 0)
:param content_file: String file path of content image
:param style_file: String file path of style image
:param content_region_file: String file path of region mapping of content
:param style_region_file: String file path of region mapping of image
:param random_init: True to init the image with random
:param load_saved_mapping: True to use saved mapping file
:param load_trained_image: True to use saved training
:param blur_mapping: True to blur the mapping before calculate the max argument
:param height: int of height of result image
:param width: int of width of result image. Leaving None with height will scaled
according aspect ratio
:param content_ratio: float32 of weight of content cost
:param style3_ratio: float32 of weight of patch cost of conv3 layer
:param style4_ratio: float32 of weight of patch cost of conv4 layer
:param gram_ratio: float32 of weight of gram matrix cost
:param diff_ratio: float32 of weight of local different cost
:param epochs: int of number of epochs to train
:param output_file: String file name of output file. %d will be replaced running number
"""
print "render started:"
# print info:
frame = inspect.currentframe()
args, _, _, values = inspect.getargvalues(frame)
for i in args:
print " %s = %s" % (i, values[i])
content_np = stylenet_core.load_image(content_file, height, width)
style_np = stylenet_core.load_image(style_file, content_np.shape[0], content_np.shape[1])
content_batch = np.expand_dims(content_np, 0)
style_batch = np.expand_dims(style_np, 0)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
with tf.Session(config=tf.ConfigProto(gpu_options=(gpu_options), log_device_placement=False)) as sess:
start_time = time.time()
contents = tf.constant(content_batch, dtype=tf.float32, shape=content_batch.shape)
styles = tf.constant(style_batch, dtype=tf.float32, shape=style_batch.shape)
if random_init:
var_image = tf.Variable(tf.truncated_normal(content_batch.shape, 0.5, 0.1))
else:
var_image = tf.Variable(contents)
vgg_content = custom_vgg19.Vgg19()
with tf.name_scope("content_vgg"):
vgg_content.build(contents)
vgg_style = custom_vgg19.Vgg19()
with tf.name_scope("style_vgg"):
vgg_style.build(styles)
vgg_var = custom_vgg19.Vgg19()
with tf.name_scope("variable_vgg"):
vgg_var.build(var_image)
with tf.name_scope("cost"):
# style:
# TODO change file name based on out file name
style3file = "./train/%s-style_map_3" % (
get_filename(content_file) + "-" + get_filename(style_file))
style4file = "./train/%s-style_map_4" % (
get_filename(content_file) + "-" + get_filename(style_file))
if content_region_file is None or style_region_file is None:
if style3_ratio is 0:
style_cost_3 = tf.constant(0.0)
else:
style_cost_3 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv3_1,
vgg_content.conv3_1,
vgg_style.conv3_1,
style3file,
load_saved_mapping=load_saved_mapping)
if style4_ratio is 0:
style_cost_4 = tf.constant(0.0)
else:
style_cost_4 = stylenet_core.get_style_cost_patch2(sess, vgg_var.conv4_1,
vgg_content.conv4_1,
vgg_style.conv4_1,
style4file,
load_saved_mapping=load_saved_mapping)
else:
content_regions_np = stylenet_core.load_image(content_region_file, content_np.shape[0],
content_np.shape[1])
style_regions_np = stylenet_core.load_image(style_region_file, content_np.shape[0],
content_np.shape[1])
content_regions_batch = np.expand_dims(content_regions_np, 0)
style_regions_batch = np.expand_dims(style_regions_np, 0)
content_regions = tf.constant(content_regions_batch, dtype=tf.float32,
shape=content_regions_batch.shape)
style_regions = tf.constant(style_regions_batch, dtype=tf.float32,
shape=style_regions_batch.shape)
content_regions = vgg_var.avg_pool(content_regions, None)
content_regions = vgg_var.avg_pool(content_regions, None)
style_regions = vgg_var.avg_pool(style_regions, None)
style_regions = vgg_var.avg_pool(style_regions, None)
if style3_ratio is 0:
style_cost_3 = tf.constant(0.0)
else:
style_cost_3 = stylenet_core.get_style_cost_patch2(sess,
vgg_var.conv3_1,
vgg_content.conv3_1,
vgg_style.conv3_1,
style3file,
content_regions,
style_regions,
load_saved_mapping,
blur_mapping=blur_mapping)
content_regions = vgg_var.avg_pool(content_regions, None)
style_regions = vgg_var.avg_pool(style_regions, None)
if style4_ratio is 0:
style_cost_4 = tf.constant(0.0)
else:
style_cost_4 = stylenet_core.get_style_cost_patch2(sess,
vgg_var.conv4_1,
vgg_content.conv4_1,
vgg_style.conv4_1,
style4file,
content_regions,
style_regions,
load_saved_mapping,
blur_mapping=blur_mapping)
if gram_ratio is 0:
style_cost_gram = tf.constant(0.0)
else:
style_cost_gram = stylenet_core.get_style_cost_gram(sess, vgg_style, vgg_var)
# content:
if content_ratio is 0:
content_cost = tf.constant(.0)
else:
fixed_content = stylenet_core.get_constant(sess, vgg_content.conv4_2)
content_cost = stylenet_core.l2_norm_cost(vgg_var.conv4_2 - fixed_content)
# # smoothness:
if diff_ratio is 0:
diff_cost = tf.constant(.0)
else:
diff_filter_h = tf.constant([0, 0, 0, 0, -1, 1, 0, 0, 0], tf.float32, [3, 3, 1, 1])
diff_filter_h = tf.concat(2, [diff_filter_h, diff_filter_h, diff_filter_h])
diff_filter_v = tf.constant([0, 0, 0, 0, -1, 0, 0, 1, 0], tf.float32, [3, 3, 1, 1])
diff_filter_v = tf.concat(2, [diff_filter_v, diff_filter_v, diff_filter_v])
diff_filter = tf.concat(3, [diff_filter_h, diff_filter_v])
filtered_input = tf.nn.conv2d(var_image, diff_filter, [1, 1, 1, 1], "VALID")
diff_cost = stylenet_core.l2_norm_cost(filtered_input) * 1e7
content_cost = content_cost * content_ratio
style_cost_3 = style_cost_3 * style3_ratio
style_cost_4 = style_cost_4 * style4_ratio
style_cost_gram = style_cost_gram * gram_ratio
diff_cost = diff_cost * diff_ratio
cost = content_cost + style_cost_3 + style_cost_4 + style_cost_gram + diff_cost
with tf.name_scope("train"):
global_step = tf.Variable(0, name='global_step', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=0.02)
gvs = optimizer.compute_gradients(cost)
training = optimizer.apply_gradients(gvs, global_step=global_step)
print "Net generated:", (time.time() - start_time)
start_time = time.time()
with tf.name_scope("image_out"):
image_out = tf.clip_by_value(tf.squeeze(var_image, [0]), 0, 1)
saver = tf.train.Saver()
checkpoint = tf.train.get_checkpoint_state("./train")
if checkpoint and checkpoint.model_checkpoint_path and load_trained_image:
saver.restore(sess, checkpoint.model_checkpoint_path)
print "save restored:", checkpoint.model_checkpoint_path
else:
tf.initialize_all_variables().run()
print "all variables init"
print "Var init: %d" % (time.time() - start_time)
step_out = 0
start_time = time.time()
for i in xrange(epochs):
if i % 5 == 0:
img = sess.run(image_out)
img_out_path = output_file % step_out
skimage.io.imsave(img_out_path, img)
print "img saved: ", img_out_path
step_out, content_out, style_patch3_out, style_patch4_out, style_gram_out, diff_cost_out, cost_out \
, _ = sess.run(
[global_step, content_cost, style_cost_3, style_cost_4, style_cost_gram, diff_cost, cost,
training])
duration = time.time() - start_time
print "Step %d: cost:%.10f\t(%.1f sec)" % (step_out, cost_out, duration), \
"\t content:%.5f, style_3:%.5f, style_4:%.5f, gram:%.5f, diff_cost_out:%.5f" \
% (content_out, style_patch3_out, style_patch4_out, style_gram_out, diff_cost_out)
if (i + 1) % 10 == 0:
saved_path = saver.save(sess, "./train/saves-" + get_filename(content_file),
global_step=global_step)
print "net saved: ", saved_path
img = sess.run(image_out)
img_out_path = output_file % step_out
skimage.io.imsave(img_out_path, img)
print "img saved: ", img_out_path
def render_gen(content_file, style_file,
content_region_file=None, style_region_file=None,
random_init=False, load_saved_mapping=True, load_trained_image=False, blur_mapping=True,
height=None, width=None,
content_ratio=0, style3_ratio=3., style4_ratio=1., gram_ratio=0.001, diff_ratio=0.,
gen_epochs=80, max_gen=3, pyramid=True, max_reduction_ratio=.8, final_epochs=200):
"""
Render the image by generation method.
- Best used if the style has low similarity with the content.
- max_reduction_ratio can be set to lower, e.g. 0.4, to improve synthesis effect, but less content will be
preserved
- content_ratio, gram_ratio will be set to 0 in final generation becuase of low effectiveness
- blur_mapping will be switched off except the last generation to prevent severe content destruction
:param content_file: String file path of content image
:param style_file: String file path of style image
:param content_region_file: String file path of region mapping of content
:param style_region_file: String file path of region mapping of image
:param random_init: True to init the image with random
:param load_saved_mapping: True to use saved mapping file
:param load_trained_image: True to use saved training
:param blur_mapping: True to blur the mapping before calculate the max argument. Only applied
to last generation
:param height: int of height of result image
:param width: int of width of result image. Leaving None with height will scaled
according aspect ratio
:param content_ratio: float32 of weight of content cost, will be 0 for last generation
:param style3_ratio: float32 of weight of patch cost of conv3 layer
:param style4_ratio: float32 of weight of patch cost of conv4 layer
:param gram_ratio: float32 of weight of gram matrix cost, will be 0 for last generation
:param diff_ratio: float32 of weight of local different cost
:param gen_epochs: int of epochs of each generations, except the last generation
:param max_gen: int of number of generations
:param pyramid: True to pre-scale the image based on reduction ration
:param max_reduction_ratio: float32 of 0.0 to 1.0 of percentage of first reduction ratio in pyramid
:param final_epochs: int of epoch of training last generation
"""
for gen in xrange(max_gen):
if gen is 0:
gen_content_file = content_file
height = stylenet_core.load_image(content_file, height, width).shape[0]
else:
gen_content_file = ("./train/output-g" + str(gen - 1) + "-%d.jpg") % gen_epochs
output_file = "./train/output-g" + str(gen) + "-%d.jpg"
output_file_final = output_file % gen_epochs
if os.path.isfile(output_file_final):
print output_file_final, "exist. move to next generation"
continue
tf.reset_default_graph()
ot = time.time()
print "----------- %d generation started -----------" % gen
if pyramid and gen == max_gen - 1:
h = height
epochs = final_epochs
cr = 0
gr = 0
bm = blur_mapping
else:
h = int(height * (gen * (1.0 - max_reduction_ratio) / max_gen + max_reduction_ratio))
epochs = gen_epochs
cr = content_ratio
gr = gram_ratio
bm = False
render(
content_file=gen_content_file,
style_file=style_file,
content_region_file=content_region_file,
style_region_file=style_region_file,
random_init=random_init,
load_saved_mapping=load_saved_mapping,
load_trained_image=load_trained_image,
blur_mapping=bm,
height=h,
width=width,
content_ratio=cr,
style3_ratio=style3_ratio,
style4_ratio=style4_ratio,
gram_ratio=gr,
diff_ratio=diff_ratio,
epochs=epochs,
output_file=output_file)
print "----------- %d generation finished in %d sec -----------\n" % (gen, time.time() - ot)
if __name__ == "__main__":
# for testing:
# no generation
# render("./test_data/cat_h.jpg", "./test_data/cat-water-colour.jpg", height=500)
# with generation
render_gen("./images/husky_paint.jpg", "./test_data/husky_real.jpg",
"./images/husky_paint_region.jpg", "./test_data/husky_real_region.jpg", height=500)