Skip to content

Commit 2e5a141

Browse files
committed
refacoted model to work on whole image, refactored dataset loading
1 parent b2084b7 commit 2e5a141

8 files changed

+155
-90
lines changed

eval.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,14 @@ def test_model(sess, model, images, labels, patch_size, output_dir=None, categor
2626
image = image_to_np_array(image_f, float_cols=True)
2727
labels = labels_to_np_array(label_f)
2828
h, w, _ = image.shape
29+
image = image[:h//2, :w//2, :]
30+
h, w, _ = image.shape
31+
labels = labels[:h, :w]
2932
predicted_labels = np.zeros([h, w], dtype=np.uint8)
3033
pixels_correct = 0
3134
error_for_image = 0
3235
i = 0
36+
3337
for y in range(patch_size, h - patch_size):
3438
# # for debug, only do first 10K
3539
# if i > 1e4:
@@ -39,12 +43,12 @@ def test_model(sess, model, images, labels, patch_size, output_dir=None, categor
3943
i += 1
4044
input_image = get_patch(image, (y, x), patch_size)
4145
input_image = np.append(input_image,
42-
np.zeros(shape=[patch_size, patch_size, 1], dtype=np.float32),
46+
np.zeros(shape=[patch_size, patch_size, model.num_classes], dtype=np.float32),
4347
axis=2)
4448
input_label = labels[y, x]
45-
feed_dict = {model.inpt: [input_image], model.output: [[input_label]]}
49+
feed_dict = {model.inpt: [input_image], model.output: input_label}
4650

47-
error, logits = sess.run([model.error, model.logits], feed_dict=feed_dict)
51+
error, logits = sess.run([model.errors[1], model.logits[1]], feed_dict=feed_dict)
4852
error_for_image += error
4953
output_label = np.argmax(logits)
5054
if output_label == input_label:
@@ -54,8 +58,8 @@ def test_model(sess, model, images, labels, patch_size, output_dir=None, categor
5458
if i % 1000 == 0:
5559
print "%d/%d pixels done..." % (i, (h - 2 * patch_size) * (w - 2 * patch_size))
5660

57-
print "Tested on image %s: Accuracy is %.2f%%, error per pixel is %f." % (
58-
image_f, (100.0 * pixels_correct) / i, error_for_image / i)
61+
# print "Tested on image %s: Accuracy is %.2f%%, error per pixel is %f." % (
62+
# image_f, (100.0 * pixels_correct) / i, error_for_image / i)
5963
if output_dir is not None:
6064
if category_colors is None:
6165
raise ValueError("Color index not provided, can't output images.")
@@ -74,6 +78,7 @@ def main():
7478
parser.add_argument('--labels', type=str, nargs='+', help='Filename of test labels')
7579
parser.add_argument('--output_dir', type=str, default=None,
7680
help='Directory to store model output. By default no output is generated.')
81+
parser.add_argument('--patch_size', type=int, default=67, help='Size of input patches')
7782
args = parser.parse_args()
7883

7984
# load class labels
@@ -85,7 +90,7 @@ def main():
8590
sess = tf.Session()
8691
restore_model(sess, args.model)
8792

88-
test_model(sess, model, args.images, args.labels, patch_size=23, output_dir=args.output_dir,
93+
test_model(sess, model, args.images, args.labels, patch_size=args.patch_size, output_dir=args.output_dir,
8994
category_colors=category_colors)
9095

9196

model.py

+22-32
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,12 @@ def __init__(self, hidden_size_1, hidden_size_2, batch_size, num_classes, learni
1616
self.num_layers = num_layers
1717

1818
# Set up placeholders for input and output
19-
print "params:", batch_size, hidden_size_1, hidden_size_2, self.num_classes
20-
self.inpt = tf.placeholder(dtype=tf.float32, shape=[batch_size, None, None, 3+self.num_classes])
21-
print "**** input", self.inpt.get_shape()
22-
self.output = tf.placeholder(tf.int32, [1, 1])
19+
self.inpt = tf.placeholder(dtype=tf.float32, shape=[batch_size, None, None, 3 + self.num_classes])
20+
self.output = tf.placeholder(tf.int32, [batch_size, None, None])
2321

2422
# Set up variable weights for model. These are shared across recurrent layers
2523

26-
W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 3+self.num_classes, self.hidden_size_1], stddev=0.1))
24+
self.W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 3 + self.num_classes, self.hidden_size_1], stddev=0.1))
2725
b_conv1 = tf.Variable(tf.constant(0.1, shape=[self.hidden_size_1]))
2826

2927
W_conv2 = tf.Variable(tf.truncated_normal([8, 8, self.hidden_size_1, self.hidden_size_2], stddev=0.1))
@@ -35,45 +33,37 @@ def __init__(self, hidden_size_1, hidden_size_2, batch_size, num_classes, learni
3533
self.logits = []
3634
self.errors = []
3735
current_input = self.inpt
36+
current_output = self.output
3837
for i in range(self.num_layers):
39-
h_conv1 = tf.nn.conv2d(current_input, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1
40-
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
38+
# scale output down by a stride of 2, to match convolution output
39+
current_output = tf.strided_slice(current_output, [0, 0, 0], [0, 0, 0], strides=[1, 2, 2], end_mask=7)
4140

41+
# convolution steps
42+
h_conv1 = tf.nn.conv2d(current_input, self.W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1
43+
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
4244
tanh = tf.tanh(h_pool1)
43-
print "**** tanh", tanh.get_shape()
44-
4545
h_conv2 = tf.nn.conv2d(tanh, W_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2
46-
print "&&&& h_conv2", h_conv2.get_shape()
47-
4846
h_conv3 = tf.nn.conv2d(h_conv2, W_conv3, strides=[1, 1, 1, 1], padding='SAME') + b_conv3
49-
print "&&&& h_conv3", h_conv3.get_shape()
50-
51-
# # figure out the frickin logits reshaping
52-
# # h_conv3 shape is [batch_size x width x height x num_categories]
53-
# conv3_shape = tf.shape(h_conv3)
54-
# conv3_height = conv3_shape[1]
55-
# conv3_width = conv3_shape[2]
56-
#
57-
# # TODO don't hardcode this slice
58-
# center_pixel = tf.slice(h_conv3, begin=[0, conv3_height / 2, conv3_width / 2, 0],
59-
# size=[1, 1, 1, self.num_classes])
60-
6147
current_logits = h_conv3
62-
logits_shape = tf.shape(current_logits)
63-
center_logit = tf.slice(current_logits, begin=[0, logits_shape[1] / 2, logits_shape[2] / 2, 0],
64-
size=[-1, 1, 1, -1])
65-
center_logit = tf.reshape(center_logit, shape=[1, 1, num_classes])
66-
current_error = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(center_logit, self.output))
48+
49+
# tensorflow 11 doesn't have multidimensional softmax, we need to get predictions manually :-(
50+
# (predictions are what's passed to the next iteration/layer of the CNN
51+
exp_logits = tf.exp(current_logits)
52+
predictions = exp_logits / tf.reduce_sum(exp_logits, reduction_indices=[3], keep_dims=True)
53+
54+
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(current_logits, current_output)
55+
error_for_all_pixel = tf.reduce_mean(cross_entropy, reduction_indices=[0])
56+
error_for_image = tf.reduce_mean(error_for_all_pixel)
6757
self.logits.append(current_logits)
68-
self.errors.append(current_error)
58+
self.errors.append(error_for_image)
6959

7060
# extracts RGB channels from input image. Only keeps every other pixel, since convolution scales down the
7161
# output. The shape of this should have the same height and width and the logits.
7262
rgb = tf.strided_slice(current_input, [0, 0, 0, 0], [0, 0, 0, 3], strides=[1, 2, 2, 1], end_mask=7)
73-
current_input = tf.concat(concat_dim=3, values=[rgb, current_logits])
74-
print "Current Input Shape: ", current_input.get_shape()
63+
current_input = tf.concat(concat_dim=3, values=[rgb, predictions])
7564

76-
self.train_step = tf.train.AdamOptimizer(learning_rate).minimize(tf.add_n(self.errors))
65+
self.loss = tf.add_n(self.errors)
66+
self.train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
7767

7868

7969
def save_model(sess, path, saver=None):

preprocessing.py

+98-9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
import numpy as np
88
from PIL import Image
9+
import os
10+
11+
from os.path import isfile
912

1013

1114
def read_object_classes(classes_map_filename):
@@ -19,6 +22,7 @@ def read_object_classes(classes_map_filename):
1922
3. an array of ID -> category name
2023
2. a dictionary of category name -> ID
2124
"""
25+
# TODO handle different potential formats better
2226
format_description = "Each line should contain 5 elements: (float R, float G, float B, int ID, str Name)."
2327
ids = set()
2428
ids_to_cols = {}
@@ -30,9 +34,17 @@ def read_object_classes(classes_map_filename):
3034
vals = line.split()
3135
if len(vals) == 0:
3236
continue
33-
rgb = tuple([int(255 * float(s)) for s in vals[:3]])
34-
category_num = int(vals[3])
35-
category_name = vals[4]
37+
elif len(vals) == 2:
38+
has_cols = False
39+
category_num = int(vals[0])
40+
category_name = vals[1]
41+
elif len(vals) == 5:
42+
has_cols = True
43+
rgb = tuple([int(255 * float(s)) for s in vals[:3]])
44+
category_num = int(vals[3])
45+
category_name = vals[4]
46+
else:
47+
raise ValueError("Category map must have either 2 or 5 columns")
3648

3749
# check for duplicate categories
3850
if category_num in ids:
@@ -45,7 +57,8 @@ def read_object_classes(classes_map_filename):
4557
ids.add(category_num)
4658
ids_to_names[category_num] = category_name
4759
names_to_ids[category_name] = category_num
48-
ids_to_cols[category_num] = rgb
60+
if has_cols:
61+
ids_to_cols[category_num] = rgb
4962

5063
except (ValueError, IndexError) as e:
5164
sys.stderr.write("%s %s\n" % (format_description, e))
@@ -56,7 +69,8 @@ def read_object_classes(classes_map_filename):
5669
category_names = [None] * (max_id + 1)
5770
for cat_id in ids:
5871
category_names[cat_id] = ids_to_names[cat_id]
59-
category_colors[cat_id] = ids_to_cols[cat_id]
72+
if has_cols:
73+
category_colors[cat_id] = ids_to_cols[cat_id]
6074

6175
return category_colors, category_names, names_to_ids
6276

@@ -92,6 +106,13 @@ def labels_to_np_array(lab_filename):
92106
return data
93107

94108

109+
def text_labels_to_np_array(lab_filename):
110+
label_file = open(lab_filename, 'r')
111+
# TODO right now were just ignoring negative ("unknown") labels. Need a nicer way to do this in long term
112+
labels = [map(lambda n: max(0, int(n)), l.split()) for l in label_file.readlines()]
113+
return np.array(labels, dtype=np.int8)
114+
115+
95116
def save_labels_array(labels, output_filename, colors):
96117
"""
97118
Saves a numpy array of labels to an paletted image.
@@ -120,9 +141,77 @@ def get_patch(array, center, patch_size):
120141
"""
121142
rounded_width = patch_size // 2
122143
return array[center[0] - rounded_width: center[0] + rounded_width + 1,
123-
center[1] - rounded_width: center[1] + rounded_width + 1]
124-
125-
if __name__ == '__main__':
144+
center[1] - rounded_width: center[1] + rounded_width + 1]
145+
146+
147+
def from_games_dataset(data_dir, train_fraction=None, num_train=None):
148+
labels_dir = os.path.join(data_dir, 'labels')
149+
images_dir = os.path.join(data_dir, 'images')
150+
151+
# TODO get only image files
152+
labels = [os.path.join(labels_dir, f) for f in os.listdir(labels_dir) if
153+
isfile(os.path.join(labels_dir, f)) and not f.startswith('.')]
154+
labels = sorted(labels)
155+
images = [os.path.join(images_dir, f) for f in os.listdir(images_dir) if
156+
isfile(os.path.join(images_dir, f)) and not f.startswith('.')]
157+
images = sorted(images)
158+
train_files = zip(labels, images)
159+
160+
# if specified, only choose subset of training data
161+
if train_fraction is not None and num_train is None:
162+
num_train = int(len(train_files) * train_fraction)
163+
if num_train is not None:
164+
train_files = train_files[:num_train]
165+
166+
for label_f, image_f in train_files:
167+
print "Current image:", os.path.basename(image_f)
168+
if os.path.basename(label_f) != os.path.basename(image_f):
169+
print "UNEQUAL IMAGE NAMES!"
170+
image = image_to_np_array(image_f)
171+
labels = labels_to_np_array(label_f)
172+
yield image, labels
173+
174+
175+
# TODO negative label nums could mess up paletted output
176+
def stanford_bgrounds_dataset(data_dir, train_fraction=None, num_train=None):
177+
labels_dir = os.path.join(data_dir, 'labels')
178+
images_dir = os.path.join(data_dir, 'images')
179+
180+
# TODO get only image files
181+
labels = [os.path.join(labels_dir, f) for f in os.listdir(labels_dir) if
182+
isfile(os.path.join(labels_dir, f)) and not f.startswith('.') and f.endswith('.regions.txt')]
183+
labels = sorted(labels)
184+
images = [os.path.join(images_dir, f) for f in os.listdir(images_dir) if
185+
isfile(os.path.join(images_dir, f)) and not f.startswith('.')]
186+
images = sorted(images)
187+
train_files = zip(labels, images)
188+
189+
# if specified, only choose subset of training data
190+
if train_fraction is not None and num_train is None:
191+
num_train = int(len(train_files) * train_fraction)
192+
if num_train is not None:
193+
train_files = train_files[:num_train]
194+
195+
for label_f, image_f in train_files:
196+
if os.path.basename(label_f).split('.')[0] != os.path.basename(image_f).split('.')[0]:
197+
print "UNEQUAL IMAGE NAMES!", label_f, image_f
198+
image = image_to_np_array(image_f)
199+
labels = text_labels_to_np_array(label_f)
200+
yield image, labels
201+
202+
203+
# list of datasets for which we have iterators
204+
FROM_GAMES = 'from-games'
205+
SIFT_FLOW = 'sift-flow'
206+
STANFORD_BGROUND = 'stanford-bground'
207+
DATASETS = {FROM_GAMES: from_games_dataset, SIFT_FLOW: None, STANFORD_BGROUND: stanford_bgrounds_dataset}
208+
209+
210+
def main():
126211
colors_map, infile, outfile = sys.argv[1:]
127212
labels = labels_to_np_array(infile)
128-
save_labels_array()
213+
save_labels_array(labels, output_filename=outfile, colors=colors_map)
214+
215+
216+
if __name__ == '__main__':
217+
main()

test/00001_test.png

2.18 KB
Loading
Loading

test/images/img_00001.png

1.76 MB
Loading

test/labels/lab_00001.png

78.8 KB
Loading

0 commit comments

Comments
 (0)