@@ -212,12 +212,10 @@ def sample_td3_params(trial: optuna.Trial) -> Dict[str, Any]:
212
212
episodic = trial .suggest_categorical ("episodic" , [True , False ])
213
213
214
214
if episodic :
215
- n_episodes_rollout = 1
216
- train_freq , gradient_steps = - 1 , - 1
215
+ train_freq , gradient_steps = (1 , "episode" ), - 1
217
216
else :
218
217
train_freq = trial .suggest_categorical ("train_freq" , [1 , 16 , 128 , 256 , 1000 , 2000 ])
219
218
gradient_steps = train_freq
220
- n_episodes_rollout = - 1
221
219
222
220
noise_type = trial .suggest_categorical ("noise_type" , ["ornstein-uhlenbeck" , "normal" , None ])
223
221
noise_std = trial .suggest_uniform ("noise_std" , 0 , 1 )
@@ -241,7 +239,6 @@ def sample_td3_params(trial: optuna.Trial) -> Dict[str, Any]:
241
239
"buffer_size" : buffer_size ,
242
240
"train_freq" : train_freq ,
243
241
"gradient_steps" : gradient_steps ,
244
- "n_episodes_rollout" : n_episodes_rollout ,
245
242
"policy_kwargs" : dict (net_arch = net_arch ),
246
243
}
247
244
@@ -274,12 +271,10 @@ def sample_ddpg_params(trial: optuna.Trial) -> Dict[str, Any]:
274
271
episodic = trial .suggest_categorical ("episodic" , [True , False ])
275
272
276
273
if episodic :
277
- n_episodes_rollout = 1
278
- train_freq , gradient_steps = - 1 , - 1
274
+ train_freq , gradient_steps = (1 , "episode" ), - 1
279
275
else :
280
276
train_freq = trial .suggest_categorical ("train_freq" , [1 , 16 , 128 , 256 , 1000 , 2000 ])
281
277
gradient_steps = train_freq
282
- n_episodes_rollout = - 1
283
278
284
279
noise_type = trial .suggest_categorical ("noise_type" , ["ornstein-uhlenbeck" , "normal" , None ])
285
280
noise_std = trial .suggest_uniform ("noise_std" , 0 , 1 )
@@ -302,7 +297,6 @@ def sample_ddpg_params(trial: optuna.Trial) -> Dict[str, Any]:
302
297
"buffer_size" : buffer_size ,
303
298
"train_freq" : train_freq ,
304
299
"gradient_steps" : gradient_steps ,
305
- "n_episodes_rollout" : n_episodes_rollout ,
306
300
"policy_kwargs" : dict (net_arch = net_arch ),
307
301
}
308
302
@@ -337,7 +331,6 @@ def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]:
337
331
train_freq = trial .suggest_categorical ("train_freq" , [1 , 4 , 8 , 16 , 128 , 256 , 1000 ])
338
332
subsample_steps = trial .suggest_categorical ("subsample_steps" , [1 , 2 , 4 , 8 ])
339
333
gradient_steps = max (train_freq // subsample_steps , 1 )
340
- n_episodes_rollout = - 1
341
334
342
335
net_arch = trial .suggest_categorical ("net_arch" , ["tiny" , "small" , "medium" ])
343
336
@@ -350,7 +343,6 @@ def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]:
350
343
"buffer_size" : buffer_size ,
351
344
"train_freq" : train_freq ,
352
345
"gradient_steps" : gradient_steps ,
353
- "n_episodes_rollout" : n_episodes_rollout ,
354
346
"exploration_fraction" : exploration_fraction ,
355
347
"exploration_final_eps" : exploration_final_eps ,
356
348
"target_update_interval" : target_update_interval ,
0 commit comments