-
Notifications
You must be signed in to change notification settings - Fork 11
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
Traceback (most recent call last):
File "train_moat_tfrecord.py", line 424, in <module>
history = model.fit(
File "/home/c/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/home/c/anaconda3/lib/python3.8/site-packages/keras/callbacks.py", line 2240, in on_epoch_begin
raise ValueError('Optimizer must have a "lr" attribute.')
ValueError: Optimizer must have a "lr" attribute.
To Reproduce
code:
with strategy.scope():
model = MoAt(
in_shape=(image_size, image_size, 3),
out_classes=total_labels,
definition_name=definition_name,
window_sides=window_sides,
input_scaling="inception",
stochdepth_rate=stochdepth_rate,
)
if args.checkpoint is None and args.origin==False:
save_path = "outputs/%s_%s" % (definition_name, date_time)
elif args.origin == False and args.checkpoint is not None:
save_path = f'{checkpoint_dir}'
"""# 在开始训练前检查是否有已保存的检查点,如果有的话,加载权重
print("Resuming training from checkpoint:", args.checkpoint)
# 创建一个检查点对象
checkpoint = tf.train.Checkpoint(model=model)
# 指定要加载的 checkpoints 文件路径
# checkpoint.restore(args.checkpoint)
# model = tf.keras.models.load_model(args.checkpoint) """
model = tf.keras.models.load_model(args.checkpoint, compile=False) # 暂时不编译
elif args.origin==True:
save_path = "outputs/%s_%s" % (definition_name, date_time)
origin_model = tf.keras.models.load_model("results/reference")
i = 0
for layer_original, layer_modified in zip(origin_model.layers[:-2], model.layers[:-2]):
if layer_original.get_weights():
i+=1
layer_modified.set_weights(layer_original.get_weights())
print("load from reference:", i)
f1 = F1Score(total_labels, "micro", 0.4) # >0.4的为1,其他为0;全局范围内计算
rec_at_p65 = tf.keras.metrics.RecallAtPrecision(0.65, num_thresholds=1024) # 在>=0.65的精确率的情况下,计算最大的召回率;从1024个不同thresholds选择阈值
loss = AsymmetricLoss(
reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE,
gamma_neg=asl_gamma_neg,
gamma_pos=asl_gamma_pos,
clip=asl_clip,
)
curr_opt = Adam(
learning_rate=warmup_learning_rate,
weight_decay=weight_decay_rate,
)
curr_opt.exclude_from_weight_decay(var_names = [
r".*(gamma|beta|bias|mean|variance|embedding):0$"
])
opt = GradientAccumulateOptimizer(optimizer=curr_opt, accum_steps=6) # 6*6=36
model.compile(optimizer=opt, loss=loss, metrics=[f1, rec_at_p65])
t800 = tf.keras.callbacks.TerminateOnNaN()
sched = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=True)
rmc_loss = tf.keras.callbacks.ModelCheckpoint(
"%s/variables/best_model/best"%save_path,
save_best_only=True,
save_freq="epoch",
save_weights_only=True,
)
# 设置tensorboard
tensorboard_step_writer = tf.summary.create_file_writer(f"{save_path}/tensorboard_step")
tensorboard_epoch_writer = tf.summary.create_file_writer(f"{save_path}/tensorboard_epoch")
""" if args.wandb:
cb_list = [t800, rmc_loss, sched, WandbCallback(save_model=False), CustomCallback(), metrics_csv_logger]
else: """
cb_list = [t800, rmc_loss, sched, CustomCallback(), metrics_csv_logger]
print("initial_epoch:", initial_epoch)
**history = model.fit(
training_dataset,
validation_data=validation_dataset,
initial_epoch=initial_epoch,
epochs=total_epochs,
steps_per_epoch= math.ceil(train_dataset_len // global_batch_size) ,
validation_steps=math.ceil(val_dataset_len // global_batch_size ),
callbacks=cb_list,
)**
Expected behavior
A clear and concise description of what you expected to happen.
Error logs/Screenshots
If applicable, add logs/screenshots to give more information about the issue.
Desktop (please complete the following information):
- OS: [SB Version: :core-4.1-amd64:core-4.1-noarch
Distributor ID: CentOS
Description: CentOS Linux release 7.8.2003 (Core)
Release: 7.8.2003
Codename: Core
] - Python: [3.8.5]
- TensorFlow: [2.12.0]
Additional context
error source:
@keras_export("keras.callbacks.LearningRateScheduler")
class LearningRateScheduler(Callback):
....
def on_epoch_begin(self, epoch, logs=None):
if not hasattr(self.model.optimizer, "lr"):
raise ValueError('Optimizer must have a "lr" attribute.')
try: # new API
lr = float(backend.get_value(self.model.optimizer.lr))
lr = self.schedule(epoch, lr)
andreped
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working