@@ -16,14 +16,12 @@ def __init__(self, hidden_size_1, hidden_size_2, batch_size, num_classes, learni
16
16
self .num_layers = num_layers
17
17
18
18
# Set up placeholders for input and output
19
- print "params:" , batch_size , hidden_size_1 , hidden_size_2 , self .num_classes
20
- self .inpt = tf .placeholder (dtype = tf .float32 , shape = [batch_size , None , None , 3 + self .num_classes ])
21
- print "**** input" , self .inpt .get_shape ()
22
- self .output = tf .placeholder (tf .int32 , [1 , 1 ])
19
+ self .inpt = tf .placeholder (dtype = tf .float32 , shape = [batch_size , None , None , 3 + self .num_classes ])
20
+ self .output = tf .placeholder (tf .int32 , [batch_size , None , None ])
23
21
24
22
# Set up variable weights for model. These are shared across recurrent layers
25
23
26
- W_conv1 = tf .Variable (tf .truncated_normal ([8 , 8 , 3 + self .num_classes , self .hidden_size_1 ], stddev = 0.1 ))
24
+ self . W_conv1 = tf .Variable (tf .truncated_normal ([8 , 8 , 3 + self .num_classes , self .hidden_size_1 ], stddev = 0.1 ))
27
25
b_conv1 = tf .Variable (tf .constant (0.1 , shape = [self .hidden_size_1 ]))
28
26
29
27
W_conv2 = tf .Variable (tf .truncated_normal ([8 , 8 , self .hidden_size_1 , self .hidden_size_2 ], stddev = 0.1 ))
@@ -35,45 +33,37 @@ def __init__(self, hidden_size_1, hidden_size_2, batch_size, num_classes, learni
35
33
self .logits = []
36
34
self .errors = []
37
35
current_input = self .inpt
36
+ current_output = self .output
38
37
for i in range (self .num_layers ):
39
- h_conv1 = tf . nn . conv2d ( current_input , W_conv1 , strides = [ 1 , 1 , 1 , 1 ], padding = 'SAME' ) + b_conv1
40
- h_pool1 = tf .nn . max_pool ( h_conv1 , ksize = [ 1 , 2 , 2 , 1 ], strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
38
+ # scale output down by a stride of 2, to match convolution output
39
+ current_output = tf .strided_slice ( current_output , [ 0 , 0 , 0 ], [ 0 , 0 , 0 ], strides = [1 , 2 , 2 ], end_mask = 7 )
41
40
41
+ # convolution steps
42
+ h_conv1 = tf .nn .conv2d (current_input , self .W_conv1 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ) + b_conv1
43
+ h_pool1 = tf .nn .max_pool (h_conv1 , ksize = [1 , 2 , 2 , 1 ], strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
42
44
tanh = tf .tanh (h_pool1 )
43
- print "**** tanh" , tanh .get_shape ()
44
-
45
45
h_conv2 = tf .nn .conv2d (tanh , W_conv2 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ) + b_conv2
46
- print "&&&& h_conv2" , h_conv2 .get_shape ()
47
-
48
46
h_conv3 = tf .nn .conv2d (h_conv2 , W_conv3 , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ) + b_conv3
49
- print "&&&& h_conv3" , h_conv3 .get_shape ()
50
-
51
- # # figure out the frickin logits reshaping
52
- # # h_conv3 shape is [batch_size x width x height x num_categories]
53
- # conv3_shape = tf.shape(h_conv3)
54
- # conv3_height = conv3_shape[1]
55
- # conv3_width = conv3_shape[2]
56
- #
57
- # # TODO don't hardcode this slice
58
- # center_pixel = tf.slice(h_conv3, begin=[0, conv3_height / 2, conv3_width / 2, 0],
59
- # size=[1, 1, 1, self.num_classes])
60
-
61
47
current_logits = h_conv3
62
- logits_shape = tf .shape (current_logits )
63
- center_logit = tf .slice (current_logits , begin = [0 , logits_shape [1 ] / 2 , logits_shape [2 ] / 2 , 0 ],
64
- size = [- 1 , 1 , 1 , - 1 ])
65
- center_logit = tf .reshape (center_logit , shape = [1 , 1 , num_classes ])
66
- current_error = tf .reduce_mean (tf .nn .sparse_softmax_cross_entropy_with_logits (center_logit , self .output ))
48
+
49
+ # tensorflow 11 doesn't have multidimensional softmax, we need to get predictions manually :-(
50
+ # (predictions are what's passed to the next iteration/layer of the CNN
51
+ exp_logits = tf .exp (current_logits )
52
+ predictions = exp_logits / tf .reduce_sum (exp_logits , reduction_indices = [3 ], keep_dims = True )
53
+
54
+ cross_entropy = tf .nn .sparse_softmax_cross_entropy_with_logits (current_logits , current_output )
55
+ error_for_all_pixel = tf .reduce_mean (cross_entropy , reduction_indices = [0 ])
56
+ error_for_image = tf .reduce_mean (error_for_all_pixel )
67
57
self .logits .append (current_logits )
68
- self .errors .append (current_error )
58
+ self .errors .append (error_for_image )
69
59
70
60
# extracts RGB channels from input image. Only keeps every other pixel, since convolution scales down the
71
61
# output. The shape of this should have the same height and width and the logits.
72
62
rgb = tf .strided_slice (current_input , [0 , 0 , 0 , 0 ], [0 , 0 , 0 , 3 ], strides = [1 , 2 , 2 , 1 ], end_mask = 7 )
73
- current_input = tf .concat (concat_dim = 3 , values = [rgb , current_logits ])
74
- print "Current Input Shape: " , current_input .get_shape ()
63
+ current_input = tf .concat (concat_dim = 3 , values = [rgb , predictions ])
75
64
76
- self .train_step = tf .train .AdamOptimizer (learning_rate ).minimize (tf .add_n (self .errors ))
65
+ self .loss = tf .add_n (self .errors )
66
+ self .train_step = tf .train .AdamOptimizer (self .learning_rate ).minimize (self .loss )
77
67
78
68
79
69
def save_model (sess , path , saver = None ):
0 commit comments