Skip to content
This repository was archived by the owner on Dec 11, 2022. It is now read-only.

Commit 9a895a1

Browse files
Gal Leibovichgalnov
authored andcommitted
bug-fix for l2_regularization not in use (#230)
* bug-fix for l2_regularization not in use * removing not in use TF REGULARIZATION_LOSSES collection
1 parent 10220be commit 9a895a1

File tree

5 files changed

+21
-22
lines changed

5 files changed

+21
-22
lines changed

rl_coach/architectures/tensorflow_components/architecture.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,7 @@ def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition,
102102
self.global_step = tf.train.get_or_create_global_step()
103103

104104
# build the network
105-
self.get_model()
106-
107-
# model weights
108-
self.weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.full_name)
105+
self.weights = self.get_model()
109106

110107
# create the placeholder for the assigning gradients and some tensorboard summaries for the weights
111108
for idx, var in enumerate(self.weights):
@@ -125,12 +122,6 @@ def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition,
125122
# gradients ops
126123
self._create_gradient_ops()
127124

128-
# L2 regularization
129-
if self.network_parameters.l2_regularization != 0:
130-
self.l2_regularization = [tf.add_n([tf.nn.l2_loss(v) for v in self.weights])
131-
* self.network_parameters.l2_regularization]
132-
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.l2_regularization)
133-
134125
self.inc_step = self.global_step.assign_add(1)
135126

136127
# reset LSTM hidden cells
@@ -150,11 +141,13 @@ def __init__(self, agent_parameters: AgentParameters, spaces: SpacesDefinition,
150141
# set the fetches for training
151142
self._set_initial_fetch_list()
152143

153-
def get_model(self) -> None:
144+
def get_model(self) -> List:
154145
"""
155146
Constructs the model using `network_parameters` and sets `input_embedders`, `middleware`,
156147
`output_heads`, `outputs`, `losses`, `total_loss`, `adaptive_learning_rate_scheme`,
157-
`current_learning_rate`, and `optimizer`
148+
`current_learning_rate`, and `optimizer`.
149+
150+
:return: A list of the model's weights
158151
"""
159152
raise NotImplementedError
160153

rl_coach/architectures/tensorflow_components/general_network.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ def get_output_head(self, head_params: HeadParameters, head_idx: int):
222222
'agent_parameters': self.ap, 'spaces': self.spaces, 'network_name': self.network_wrapper_name,
223223
'head_idx': head_idx, 'is_local': self.network_is_local})
224224

225-
def get_model(self):
225+
def get_model(self) -> List:
226226
# validate the configuration
227227
if len(self.network_parameters.input_embedders_parameters) == 0:
228228
raise ValueError("At least one input type should be defined")
@@ -338,9 +338,18 @@ def get_model(self):
338338

339339
head_count += 1
340340

341+
# model weights
342+
self.weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.full_name)
343+
341344
# Losses
342345
self.losses = tf.losses.get_losses(self.full_name)
343-
self.losses += tf.losses.get_regularization_losses(self.full_name)
346+
347+
# L2 regularization
348+
if self.network_parameters.l2_regularization != 0:
349+
self.l2_regularization = tf.add_n([tf.nn.l2_loss(v) for v in self.weights]) \
350+
* self.network_parameters.l2_regularization
351+
self.losses += self.l2_regularization
352+
344353
self.total_loss = tf.reduce_sum(self.losses)
345354
# tf.summary.scalar('total_loss', self.total_loss)
346355

@@ -386,6 +395,8 @@ def get_model(self):
386395
else:
387396
raise Exception("{} is not a valid optimizer type".format(self.network_parameters.optimizer_type))
388397

398+
return self.weights
399+
389400
def __str__(self):
390401
result = []
391402

rl_coach/architectures/tensorflow_components/heads/acer_policy_head.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ def _build_module(self, input_layer):
5656
if self.beta:
5757
self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
5858
self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]
59-
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
6059

6160
# Truncated importance sampling with bias corrections
6261
importance_sampling_weight = tf.placeholder(tf.float32, [None, self.num_actions],

rl_coach/architectures/tensorflow_components/heads/policy_head.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,6 @@ def _build_module(self, input_layer):
7878
self.entropy = tf.add_n([tf.reduce_mean(dist.entropy()) for dist in self.policy_distributions])
7979
self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]
8080

81-
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
82-
8381
# calculate loss
8482
self.action_log_probs_wrt_policy = \
8583
tf.add_n([dist.log_prob(action) for dist, action in zip(self.policy_distributions, self.actions)])

rl_coach/architectures/tensorflow_components/heads/ppo_head.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,8 @@ def _build_module(self, input_layer):
6868
if self.use_kl_regularization:
6969
# no clipping => use kl regularization
7070
self.weighted_kl_divergence = tf.multiply(self.kl_coefficient, self.kl_divergence)
71-
self.regularizations = self.weighted_kl_divergence + self.high_kl_penalty_coefficient * \
72-
tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))
73-
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
71+
self.regularizations += [self.weighted_kl_divergence + self.high_kl_penalty_coefficient * \
72+
tf.square(tf.maximum(0.0, self.kl_divergence - self.kl_cutoff))]
7473

7574
# calculate surrogate loss
7675
self.advantages = tf.placeholder(tf.float32, [None], name="advantages")
@@ -93,8 +92,7 @@ def _build_module(self, input_layer):
9392
# add entropy regularization
9493
if self.beta:
9594
self.entropy = tf.reduce_mean(self.policy_distribution.entropy())
96-
self.regularizations = -tf.multiply(self.beta, self.entropy, name='entropy_regularization')
97-
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.regularizations)
95+
self.regularizations += [-tf.multiply(self.beta, self.entropy, name='entropy_regularization')]
9896

9997
self.loss = self.surrogate_loss
10098
tf.losses.add_loss(self.loss)

0 commit comments

Comments
 (0)