-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstyle_transfer.py
253 lines (183 loc) · 7.12 KB
/
style_transfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
"""
Tensorflow Implementation of Gradient Mask based Style Transfer
Academic Paper:
Author: Ying Chan
GitHub: https://github.com/yc5915/Tensorflow-Implementations/
"""
import skimage.io
import skimage.transform
import tensorflow as tf
import numpy as np
"""
Model Creation Functions
"""
def VGG19(img, vgg_path='vgg19.npy'):
"""
Description:
Creates a pre-trained VGG19 model without the Fully Connected Layers.
This allows the input image to have a variable size.
link to download vgg19.npy @ https://github.com/machrisaa/tensorflow-vgg
Args:
img: TF Tensor with shape [N,H,W,C]. Values should be in range 0.0-1.0
vgg_path: path to vgg19.npy
Returns:
vgg: dictionary of layers in VGG19 model
"""
# Scale img to 0.0-255.0 and split into R, G, B channels
red, green, blue = tf.split(img * 255.0, num_or_size_splits=3, axis=3)
# Normalize the input image by subtracting mean
# Gather the channels in B, G, R order
vgg_mean = [103.939, 116.779, 123.68]
bgr = tf.concat(axis=3, values=[
blue - vgg_mean[0],
green - vgg_mean[1],
red - vgg_mean[2],
])
# Load pre-trained VGG19 parameters
vgg_params = np.load(vgg_path, encoding='latin1').item()
# Note that we do not include FC layers
vgg_layers = ["conv1_1", "conv1_2", "pool1",
"conv2_1", "conv2_2", "pool2",
"conv3_1", "conv3_2", "conv3_3", "conv3_4", "pool3",
"conv4_1", "conv4_2", "conv4_3", "conv4_4", "pool4",
"conv5_1", "conv5_2", "conv5_3", "conv5_4", "pool5"]
# Dictionary to store the layers
vgg = {}
# Create VGG19 model and load pre-trained parameters
curr = bgr
for layer in vgg_layers:
if layer[:4] == "conv":
curr = tf.nn.conv2d(curr, filter=vgg_params[layer][0],
strides=[1, 1, 1, 1], padding='SAME')
curr += vgg_params[layer][1]
curr = tf.nn.relu(curr)
elif layer[:4] == "pool":
curr = tf.nn.max_pool(curr, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
vgg[layer] = curr
return vgg
def FeatureLayer(layer):
"""
Description:
Flattens 2D images into 1D vectors
Args:
layer: TF Tensor with shape [N,H,W,C] (expecting N=1 image)
Returns:
Flattened layer with shape [D,C] where:
D = NxHxW No. of Features in a Column Vector
C Number of Column Vectors
"""
shape = layer.get_shape().as_list()
return tf.reshape(layer, shape=[-1, shape[3]])
def StyleLayer(layer):
"""
Description:
Calculates a Gramm Matrix from Array of Column Vectors
(uses all Features in a Column Vector)
Args:
layer: TF Tensor with shape [D,C] (expecting FeatureLayer output)
Returns:
Gramm Matrix with shape [C,C]
"""
return tf.matmul(a=layer, b=layer, transpose_a=True)
"""
Helper Functions
"""
def NHWC(img):
"""
Description:
Transforms image with shape [H,W,C] to [N,H,W,C] where N = 1
"""
return np.expand_dims(a=img, axis=0).astype(np.float32)
"""
Parameters
"""
# Define layers of VGG19 model to use for Content and Style
content_layers = ["conv4_2"]
style_layers = ["conv2_1","conv3_1"]
# Load images & scale values from 0.0-255.0 to 0.0-1.0
content_img = skimage.io.imread("images\\lion.jpg") / 255.0
style_img = skimage.io.imread("images\\wave.jpg") / 255.0
"""
Evaluate Content and Style Image
"""
print("Building Model to Evaluate Content & Style Image")
tf.reset_default_graph()
# Build a Model with variable sized image
img = tf.placeholder(tf.float32, shape=[1,None,None,3])
vgg = VGG19(tf.Variable(img, dtype=tf.float32, validate_shape=False))
# for every layer in style and content layers, create a feature layer
for layer in set([*content_layers, *style_layers]):
vgg["feat_" + layer] = FeatureLayer(vgg[layer])
# for every layer in style layers, create a style layer
for layer in style_layers:
pool = int(layer[4])
vgg["style_" + layer] = StyleLayer(vgg["feat_" + layer])
# Dictionaries to store Content and Style Layer outputs
content = {}
style = {}
with tf.Session() as sess:
print("Evaluating Content Image")
# initialize img as Content Image
sess.run(tf.global_variables_initializer(), feed_dict={img: NHWC(content_img)})
for layer in content_layers:
name = "feat_" + layer
content[name] = vgg[name].eval()
print("Evaluating Style Image")
# initialize img as Style Image
sess.run(tf.global_variables_initializer(), feed_dict={img: NHWC(style_img)})
for layer in style_layers:
name = "style_" + layer
style[name] = vgg[name].eval()
"""
Style Transfer Model
"""
print("Building Model to Transfer Style")
tf.reset_default_graph()
# Build a Model with Content Image (alternatively can use random noise)
# We use Variable instead of placeholder as we wish to make use of Tensorflow's optimizers
img = tf.Variable(NHWC(content_img))
vgg = VGG19(img)
# for every layer in style and content layers, create a feature layer
for layer in set([*content_layers, *style_layers]):
vgg["feat_" + layer] = FeatureLayer(vgg[layer])
# for every layer in style layers, create a style layer
for layer in style_layers:
pool = int(layer[4])
vgg["style_" + layer] = StyleLayer(vgg["feat_" + layer])
"""
Loss Function
"""
print("Creating Loss Function")
content_loss = 0
for layer in content_layers:
name = "feat_" + layer
content_loss += tf.reduce_mean(tf.square(content[name] - vgg[name]))
content_loss /= len(content_layers) # each content layer has equal weight
style_loss = 0
for layer in style_layers:
name = "style_" + layer
style_loss += tf.reduce_mean(tf.square(style[name] - vgg[name]))
style_loss /= len(style_layers) # each style layer has equal weight
# Total variation loss (encourages adjacent pixels to be similar color)
tv_loss = tf.reduce_mean(((img[:,1:,:-1,:] - img[:,:-1,:-1,:])**2 + (img[:,:-1,1:,:] - img[:,:-1,:-1,:])**2)**1.25)
loss = 5*content_loss + 25*style_loss + 1*tv_loss
"""
Backprop
"""
print("Stylising")
# Backpropagate error to img using scipy's L-BFGS optimizer
train_step =tf.contrib.opt.ScipyOptimizerInterface(loss,
var_list=[img],
options={"maxiter":200, "disp" : True})
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_step.minimize(sess,
fetches=[loss],
loss_callback=lambda x : print("loss: %f"%x))
# img is TF Tensor with shape [N,H,W,C]. Discard the first dimension
stylised_img = img.eval()[0]
# Clip the values to 0.0-1.0
stylised_img = np.clip(stylised_img, 0, 1)
# Show img
skimage.io.imshow(stylised_img)