diff --git a/README.md b/README.md index 9565fe30..ddc6a445 100644 --- a/README.md +++ b/README.md @@ -346,7 +346,7 @@ python scripts/live_inference.py --model model.onnx # Path to the ONNX model file --webcam # Use webcam as input source --classes classes.txt # Path to the classes file with each name on a new row - --video-width 720 # Input size for the model + --inference-size 720 # Input size for the model --provider tensorrt # Execution provider (cpu/cuda/tensorrt) --threshold 0.3 # Detection confidence threshold ``` @@ -385,7 +385,7 @@ python scripts/live_inference.py --model model.onnx # Path to the ONNX model file --video video.mp4 # Path to the input video file --classes classes.txt # Path to the classes file with each name on a new row - --video-width 320 # Input size for the model + --inference-size 320 # Input size for the model (renamed from --video-width) --provider cpu # Execution provider (cpu/cuda/tensorrt) --threshold 0.3 # Detection confidence threshold ``` @@ -422,7 +422,7 @@ The following is a demo of image inference > --onnx model.onnx > --webcam > --class-names classes.txt -> --input-size 320 +> --inference-size 320 > ``` > Under the hood, this automatically pull in the `onnxruntime-gpu` package into the `cuda` environment and use the GPU for inference! > @@ -456,7 +456,7 @@ pixi run -e cpu train-model Run live inference ```bash -pixi run -e cuda live-inference --onnx model.onnx --webcam --provider cuda --class-names classes.txt --input-size 640 +pixi run -e cuda live-inference --onnx model.onnx --webcam --provider cuda --class-names classes.txt --inference-size 640 ``` > [!TIP] @@ -468,7 +468,7 @@ pixi run -e cuda live-inference --onnx model.onnx --webcam --provider cuda --cla > ``` ```bash -pixi run -e cpu live-inference --onnx model.onnx --input video.mp4 --class-names classes.txt --input-size 320 +pixi run -e cpu live-inference --onnx model.onnx --input video.mp4 --class-names classes.txt --inference-size 320 ``` Launch Gradio app diff --git a/nbs/progressive-resising.ipynb b/nbs/progressive-resising.ipynb new file mode 100644 index 00000000..635cbcfe --- /dev/null +++ b/nbs/progressive-resising.ipynb @@ -0,0 +1,5926 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The aim of this notebook is to test the progressive resizing strategy." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train at 128px for 10 epochs" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 14:28:42.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.DFINETransformer.num_queries' to: 100\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.PostProcessor.num_top_queries' to: 100\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.HGNetv2.pretrained' to: True\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.HGNetv2.freeze_at' to: 0\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_init_process_group\u001b[0m:\u001b[36m60\u001b[0m - \u001b[1mInitializing process group for single-process training\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_init_process_group\u001b[0m:\u001b[36m80\u001b[0m - \u001b[1mProcess group initialized successfully\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m239\u001b[0m - \u001b[1mStarting training...\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m242\u001b[0m - \u001b[1mOverriding epochs to 10\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m276\u001b[0m - \u001b[1mSetting pretrained flag to True for HGNetv2\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m296\u001b[0m - \u001b[1mAutomatically calculated mixup epochs: [1, 5]\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m306\u001b[0m - \u001b[1mAutomatically calculated data augmentation epochs: 1, 5, 9\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mAutomatically calculated stop epoch: 9\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m323\u001b[0m - \u001b[1mAutomatically calculated no augmentation epochs: 1\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.407\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m336\u001b[0m - \u001b[1mAutomatically calculated flat epochs: 5\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m363\u001b[0m - \u001b[1mAutomatically calculated warmup iterations: 403 (1.0 epochs)\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m376\u001b[0m - \u001b[1mAutomatically calculated EMA warmups: 403 (1.0 epochs)\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m152\u001b[0m - \u001b[1mDisabling sync_bn and find_unused_parameters for single-process training\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m164\u001b[0m - \u001b[1mSet device in config: cuda\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m174\u001b[0m - \u001b[1mInitializing solver for task: detection\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded stage1 B0 HGNetV2 from local file.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dnth/Desktop/DEIMKit/src/deimkit/engine/core/workspace.py:180: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " return module(**module_kwargs)\n", + "\u001b[32m2025-04-03 14:28:42.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m201\u001b[0m - \u001b[1mTraining setup complete. Output directory: outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.895\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m203\u001b[0m - \u001b[1mSaving config to outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px/config.yml\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:42.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m384\u001b[0m - \u001b[1mUsing device: cuda\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:43.026\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:43.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m393\u001b[0m - \u001b[1mInitial model saved as best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:43.028\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mNumber of trainable parameters: 10217817\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:43.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m409\u001b[0m - \u001b[1mUsing custom scheduler: flatcosine\u001b[0m\n", + "\u001b[32m2025-04-03 14:28:43.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 0/10\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial lr: [0.0002, 0.0004, 0.0004]\n", + "building train_dataloader with batch_size=16...\n", + " ### Transform @Mosaic ### \n", + " ### Transform @RandomPhotometricDistort ### \n", + " ### Transform @RandomZoomOut ### \n", + " ### Transform @RandomIoUCrop ### \n", + " ### Transform @SanitizeBoundingBoxes ### \n", + " ### Transform @RandomHorizontalFlip ### \n", + " ### Transform @Resize ### \n", + " ### Transform @SanitizeBoundingBoxes ### \n", + " ### Transform @ConvertPILImage ### \n", + " ### Transform @ConvertBoxes ### \n", + " ### Mosaic with Prob.@0.5 and ZoomOut/IoUCrop existed ### \n", + " ### ImgTransforms Epochs: [1, 5, 9] ### \n", + " ### Policy_ops@['Mosaic', 'RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop'] ###\n", + " ### Using MixUp with Prob@0.5 in [1, 5] epochs ### \n", + " ### Multi-scale Training until 9 epochs ### \n", + " ### Multi-scales@ [96, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 160] ### \n", + "building val_dataloader with batch_size=16...\n", + " ### Transform @Resize ### \n", + " ### Transform @ConvertPILImage ### \n", + "[0.0002, 0.0004, 0.0004] [0.0001, 0.0002, 0.0002] 4030 403 2015 403\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 403/403 [01:12<00:00, 5.58it/s, loss=31.8230]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000199 loss: 33.3829 (39.1596) loss_mal: 1.7744 (1.1150) loss_bbox: 0.6207 (1.6377) loss_giou: 0.5784 (1.2010) loss_fgl: 1.2619 (0.8419) loss_mal_aux_0: 1.5762 (1.0511) loss_bbox_aux_0: 0.6617 (1.6652) loss_giou_aux_0: 0.6207 (1.2228) loss_fgl_aux_0: 1.2456 (0.8445) loss_mal_aux_1: 1.6826 (1.0775) loss_bbox_aux_1: 0.6528 (1.6499) loss_giou_aux_1: 0.6150 (1.2112) loss_fgl_aux_1: 1.2573 (0.8424) loss_mal_pre: 1.5498 (1.0360) loss_bbox_pre: 0.6626 (1.6757) loss_giou_pre: 0.6109 (1.2275) loss_mal_enc_0: 1.2812 (0.9584) loss_bbox_enc_0: 1.1488 (1.8270) loss_giou_enc_0: 0.9773 (1.3395) loss_mal_dn_0: 0.7905 (0.8399) loss_bbox_dn_0: 1.0637 (1.4471) loss_giou_dn_0: 0.8955 (1.2364) loss_fgl_dn_0: 1.1151 (0.9050) loss_mal_dn_1: 0.7656 (0.8051) loss_bbox_dn_1: 1.0062 (1.4335) loss_giou_dn_1: 0.8753 (1.2316) loss_fgl_dn_1: 1.1129 (0.9027) loss_mal_dn_2: 0.7974 (0.8108) loss_bbox_dn_2: 0.9348 (1.4187) loss_giou_dn_2: 0.8050 (1.2191) loss_fgl_dn_2: 1.1289 (0.9075) loss_mal_dn_pre: 0.7905 (0.8407) loss_bbox_dn_pre: 1.0606 (1.4641) loss_giou_dn_pre: 0.8919 (1.2363) loss_ddf_aux_0: 0.0282 (0.0144) loss_ddf_aux_1: 0.0126 (0.0041) loss_ddf_dn_0: 0.0351 (0.0141) loss_ddf_dn_1: 0.0187 (0.0048)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:02<00:00, 15.33it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.101\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.221\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.075\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.093\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.112\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.348\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.535\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.601\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.256\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.645\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.947\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.657\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 14:29:57.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 14:29:57.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 0 / mAP: 0.10130532838347801\u001b[0m\n", + "\u001b[32m2025-04-03 14:29:57.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 0, 'coco_eval_bbox': 0.10130532838347801}\u001b[0m\n", + "\u001b[32m2025-04-03 14:29:57.900\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 1/10\u001b[0m\n", + "Epoch 1: 100%|██████████| 403/403 [01:27<00:00, 4.58it/s, loss=29.8709]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 29.8709 (30.9488) loss_mal: 1.2617 (1.3347) loss_bbox: 0.5714 (0.6373) loss_giou: 0.8725 (0.9386) loss_fgl: 1.0942 (1.0593) loss_mal_aux_0: 1.2705 (1.2934) loss_bbox_aux_0: 0.5838 (0.6428) loss_giou_aux_0: 0.8864 (0.9509) loss_fgl_aux_0: 1.0926 (1.0572) loss_ddf_aux_0: 0.0164 (0.0179) loss_mal_aux_1: 1.3223 (1.3216) loss_bbox_aux_1: 0.5724 (0.6371) loss_giou_aux_1: 0.8750 (0.9424) loss_fgl_aux_1: 1.0938 (1.0581) loss_ddf_aux_1: 0.0021 (0.0046) loss_mal_pre: 1.2695 (1.2897) loss_bbox_pre: 0.5786 (0.6401) loss_giou_pre: 0.8854 (0.9500) loss_mal_enc_0: 1.1875 (1.2199) loss_bbox_enc_0: 0.6338 (0.7317) loss_giou_enc_0: 0.9623 (1.0437) loss_mal_dn_0: 0.7544 (0.7467) loss_bbox_dn_0: 0.6393 (0.7217) loss_giou_dn_0: 1.0265 (1.0891) loss_fgl_dn_0: 1.0154 (0.9957) loss_ddf_dn_0: 0.0583 (0.0393) loss_mal_dn_1: 0.7676 (0.7579) loss_bbox_dn_1: 0.6050 (0.6992) loss_giou_dn_1: 0.9681 (1.0558) loss_fgl_dn_1: 1.0213 (1.0022) loss_ddf_dn_1: 0.0079 (0.0084) loss_mal_dn_2: 0.7690 (0.7622) loss_bbox_dn_2: 0.6077 (0.6920) loss_giou_dn_2: 0.9574 (1.0429) loss_fgl_dn_2: 1.0297 (1.0067) loss_mal_dn_pre: 0.7534 (0.7459) loss_bbox_dn_pre: 0.6494 (0.7232) loss_giou_dn_pre: 1.0280 (1.0888)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:02<00:00, 16.88it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.190\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.367\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.181\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.124\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.210\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.386\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.586\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.650\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.316\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.698\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.939\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.762\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 14:31:28.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 14:31:28.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 1 / mAP: 0.19035324202785167\u001b[0m\n", + "\u001b[32m2025-04-03 14:31:28.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 1, 'coco_eval_bbox': 0.19035324202785167}\u001b[0m\n", + "\u001b[32m2025-04-03 14:31:28.307\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 2/10\u001b[0m\n", + "Epoch 2: 100%|██████████| 403/403 [01:28<00:00, 4.54it/s, loss=26.4475]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 28.7592 (29.4460) loss_mal: 1.2188 (1.2884) loss_bbox: 0.5466 (0.5429) loss_giou: 0.8605 (0.8229) loss_fgl: 1.0984 (1.1086) loss_mal_aux_0: 1.2676 (1.2806) loss_bbox_aux_0: 0.5509 (0.5556) loss_giou_aux_0: 0.8617 (0.8410) loss_fgl_aux_0: 1.0982 (1.1088) loss_ddf_aux_0: 0.0214 (0.0260) loss_mal_aux_1: 1.2930 (1.2940) loss_bbox_aux_1: 0.5473 (0.5432) loss_giou_aux_1: 0.8556 (0.8240) loss_fgl_aux_1: 1.0951 (1.1083) loss_ddf_aux_1: 0.0019 (0.0025) loss_mal_pre: 1.2451 (1.2787) loss_bbox_pre: 0.5486 (0.5545) loss_giou_pre: 0.8613 (0.8399) loss_mal_enc_0: 1.2539 (1.2478) loss_bbox_enc_0: 0.5823 (0.6355) loss_giou_enc_0: 0.9374 (0.9270) loss_mal_dn_0: 0.7769 (0.7741) loss_bbox_dn_0: 0.6160 (0.6357) loss_giou_dn_0: 0.9413 (0.9675) loss_fgl_dn_0: 1.0602 (1.0643) loss_ddf_dn_0: 0.0689 (0.0811) loss_mal_dn_1: 0.7871 (0.7926) loss_bbox_dn_1: 0.5592 (0.5909) loss_giou_dn_1: 0.8707 (0.9019) loss_fgl_dn_1: 1.0756 (1.0764) loss_ddf_dn_1: 0.0057 (0.0083) loss_mal_dn_2: 0.7881 (0.7879) loss_bbox_dn_2: 0.5563 (0.5864) loss_giou_dn_2: 0.8650 (0.8934) loss_fgl_dn_2: 1.0790 (1.0790) loss_mal_dn_pre: 0.7769 (0.7738) loss_bbox_dn_pre: 0.6140 (0.6361) loss_giou_dn_pre: 0.9378 (0.9662)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:02<00:00, 17.24it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.295\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.497\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.314\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.026\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.074\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.328\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.476\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.653\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.689\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.025\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.294\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.739\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.944\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.816\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 14:32:59.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 14:32:59.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 2 / mAP: 0.2948400366804452\u001b[0m\n", + "\u001b[32m2025-04-03 14:32:59.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 2, 'coco_eval_bbox': 0.2948400366804452}\u001b[0m\n", + "\u001b[32m2025-04-03 14:32:59.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 3/10\u001b[0m\n", + "Epoch 3: 100%|██████████| 403/403 [01:36<00:00, 4.16it/s, loss=25.9623]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 26.9043 (28.4187) loss_mal: 1.1396 (1.2073) loss_bbox: 0.4422 (0.5063) loss_giou: 0.7112 (0.7890) loss_fgl: 1.1122 (1.1132) loss_mal_aux_0: 1.1641 (1.2253) loss_bbox_aux_0: 0.4593 (0.5196) loss_giou_aux_0: 0.7318 (0.8075) loss_fgl_aux_0: 1.1107 (1.1159) loss_ddf_aux_0: 0.0228 (0.0247) loss_mal_aux_1: 1.1855 (1.2253) loss_bbox_aux_1: 0.4396 (0.5065) loss_giou_aux_1: 0.7127 (0.7897) loss_fgl_aux_1: 1.1112 (1.1135) loss_ddf_aux_1: 0.0018 (0.0018) loss_mal_pre: 1.1621 (1.2233) loss_bbox_pre: 0.4574 (0.5188) loss_giou_pre: 0.7309 (0.8060) loss_mal_enc_0: 1.1992 (1.2192) loss_bbox_enc_0: 0.5098 (0.5897) loss_giou_enc_0: 0.7970 (0.8871) loss_mal_dn_0: 0.7812 (0.7826) loss_bbox_dn_0: 0.5000 (0.5819) loss_giou_dn_0: 0.8297 (0.9044) loss_fgl_dn_0: 1.1120 (1.0937) loss_ddf_dn_0: 0.0999 (0.1005) loss_mal_dn_1: 0.7939 (0.7927) loss_bbox_dn_1: 0.4189 (0.5289) loss_giou_dn_1: 0.7284 (0.8289) loss_fgl_dn_1: 1.1072 (1.1024) loss_ddf_dn_1: 0.0069 (0.0071) loss_mal_dn_2: 0.7798 (0.7868) loss_bbox_dn_2: 0.4150 (0.5251) loss_giou_dn_2: 0.7208 (0.8227) loss_fgl_dn_2: 1.1082 (1.1029) loss_mal_dn_pre: 0.7812 (0.7823) loss_bbox_dn_pre: 0.5012 (0.5825) loss_giou_dn_pre: 0.8256 (0.9038)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:02<00:00, 14.40it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.361\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.613\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.379\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.180\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.392\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.502\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.634\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.696\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.358\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.742\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.958\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.813\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 14:34:39.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 14:34:39.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 3 / mAP: 0.3606628641695955\u001b[0m\n", + "\u001b[32m2025-04-03 14:34:39.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 3, 'coco_eval_bbox': 0.3606628641695955}\u001b[0m\n", + "\u001b[32m2025-04-03 14:34:39.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 4/10\u001b[0m\n", + "Epoch 4: 100%|██████████| 403/403 [01:34<00:00, 4.26it/s, loss=25.1240]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 25.9052 (27.6389) loss_mal: 1.0967 (1.1710) loss_bbox: 0.4150 (0.4730) loss_giou: 0.7091 (0.7484) loss_fgl: 1.1109 (1.1232) loss_mal_aux_0: 1.1406 (1.1900) loss_bbox_aux_0: 0.4522 (0.4865) loss_giou_aux_0: 0.7479 (0.7674) loss_fgl_aux_0: 1.1302 (1.1280) loss_ddf_aux_0: 0.0254 (0.0274) loss_mal_aux_1: 1.1074 (1.1850) loss_bbox_aux_1: 0.4285 (0.4733) loss_giou_aux_1: 0.7104 (0.7490) loss_fgl_aux_1: 1.1139 (1.1235) loss_ddf_aux_1: 0.0017 (0.0019) loss_mal_pre: 1.1387 (1.1882) loss_bbox_pre: 0.4568 (0.4861) loss_giou_pre: 0.7429 (0.7660) loss_mal_enc_0: 1.1523 (1.2117) loss_bbox_enc_0: 0.5232 (0.5594) loss_giou_enc_0: 0.8067 (0.8494) loss_mal_dn_0: 0.7837 (0.7863) loss_bbox_dn_0: 0.4453 (0.5478) loss_giou_dn_0: 0.7641 (0.8538) loss_fgl_dn_0: 1.1316 (1.1156) loss_ddf_dn_0: 0.1077 (0.1085) loss_mal_dn_1: 0.7832 (0.7867) loss_bbox_dn_1: 0.3746 (0.4916) loss_giou_dn_1: 0.6921 (0.7735) loss_fgl_dn_1: 1.1261 (1.1188) loss_ddf_dn_1: 0.0078 (0.0071) loss_mal_dn_2: 0.7686 (0.7804) loss_bbox_dn_2: 0.3820 (0.4876) loss_giou_dn_2: 0.6834 (0.7677) loss_fgl_dn_2: 1.1219 (1.1182) loss_mal_dn_pre: 0.7832 (0.7857) loss_bbox_dn_pre: 0.4410 (0.5478) loss_giou_dn_pre: 0.7657 (0.8534)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:02<00:00, 15.16it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.441\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.682\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.511\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.056\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.180\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.480\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.567\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.706\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.743\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.062\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.397\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.788\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.972\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.863\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 14:36:16.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_128px/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 14:36:16.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 4 / mAP: 0.4408642088712889\u001b[0m\n", + "\u001b[32m2025-04-03 14:36:16.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 4, 'coco_eval_bbox': 0.4408642088712889}\u001b[0m\n", + "\u001b[32m2025-04-03 14:36:16.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 5/10\u001b[0m\n", + "Epoch 5: 0%| | 0/403 [00:00 1:\n", + "/home/dnth/Desktop/DEIMKit/src/deimkit/engine/deim/dfine_decoder.py:129: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if reference_points.shape[-1] == 2:\n", + "/home/dnth/Desktop/DEIMKit/src/deimkit/engine/deim/dfine_decoder.py:133: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " elif reference_points.shape[-1] == 4:\n", + "/home/dnth/Desktop/DEIMKit/.pixi/envs/cuda/lib/python3.11/site-packages/torch/onnx/_internal/jit_utils.py:308: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at /pytorch/torch/csrc/jit/passes/onnx/constant_fold.cpp:178.)\n", + " _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)\n", + "/home/dnth/Desktop/DEIMKit/.pixi/envs/cuda/lib/python3.11/site-packages/torch/onnx/utils.py:657: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at /pytorch/torch/csrc/jit/passes/onnx/constant_fold.cpp:178.)\n", + " _C._jit_pass_onnx_graph_shape_type_inference(\n", + "/home/dnth/Desktop/DEIMKit/.pixi/envs/cuda/lib/python3.11/site-packages/torch/onnx/utils.py:1127: UserWarning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied. (Triggered internally at /pytorch/torch/csrc/jit/passes/onnx/constant_fold.cpp:178.)\n", + " _C._jit_pass_onnx_graph_shape_type_inference(\n", + "\u001b[32m2025-04-03 18:31:07.853\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mdeimkit.exporter\u001b[0m:\u001b[36mto_onnx\u001b[0m:\u001b[36m285\u001b[0m - \u001b[32m\u001b[1mONNX export completed successfully: /home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/best.onnx\u001b[0m\n", + "\u001b[32m2025-04-03 18:31:07.854\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.exporter\u001b[0m:\u001b[36mto_onnx\u001b[0m:\u001b[36m296\u001b[0m - \u001b[1mSimplifying ONNX model with input shapes: {'input_bgr': torch.Size([1, 3, 640, 640]), 'orig_target_sizes': torch.Size([1, 2])}\u001b[0m\n", + "\u001b[32m2025-04-03 18:31:07.890\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.exporter\u001b[0m:\u001b[36m_simplify_onnx_model\u001b[0m:\u001b[36m401\u001b[0m - \u001b[1mSimplifying ONNX model: /home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/best.onnx -> /home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/best.onnx\u001b[0m\n", + "\u001b[32m2025-04-03 18:31:10.010\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mdeimkit.exporter\u001b[0m:\u001b[36m_simplify_onnx_model\u001b[0m:\u001b[36m411\u001b[0m - \u001b[32m\u001b[1mONNX model simplification successful: /home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/best.onnx\u001b[0m\n", + "\u001b[32m2025-04-03 18:31:10.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.exporter\u001b[0m:\u001b[36m_check_onnx_model\u001b[0m:\u001b[36m368\u001b[0m - \u001b[1mONNX model validation successful: /home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/best.onnx\u001b[0m\n" + ] + } + ], + "source": [ + "from deimkit.exporter import Exporter\n", + "from deimkit.config import Config\n", + "\n", + "config = Config(\"/home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/config.yml\")\n", + "exporter = Exporter(config)\n", + "\n", + "output_path = exporter.to_onnx(\n", + " checkpoint_path=\"/home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/best.pth\",\n", + " output_path=\"/home/dnth/Desktop/DEIMKit/nbs/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px/best.onnx\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train at 640px for 30 epochs with model trained at smaller sizes image" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:01:56.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.DFINETransformer.num_queries' to: 100\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.PostProcessor.num_top_queries' to: 100\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.HGNetv2.pretrained' to: True\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.725\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.model\u001b[0m:\u001b[36mupdate_setting\u001b[0m:\u001b[36m83\u001b[0m - \u001b[1mSetting 'yaml_cfg.HGNetv2.freeze_at' to: 0\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_init_process_group\u001b[0m:\u001b[36m60\u001b[0m - \u001b[1mInitializing process group for single-process training\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.773\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_init_process_group\u001b[0m:\u001b[36m80\u001b[0m - \u001b[1mProcess group initialized successfully\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.774\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mload_checkpoint\u001b[0m:\u001b[36m624\u001b[0m - \u001b[1mLoading checkpoint from outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_320px/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.985\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m152\u001b[0m - \u001b[1mDisabling sync_bn and find_unused_parameters for single-process training\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m164\u001b[0m - \u001b[1mSet device in config: cuda\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:56.986\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m174\u001b[0m - \u001b[1mInitializing solver for task: detection\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded stage1 B0 HGNetV2 from local file.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dnth/Desktop/DEIMKit/src/deimkit/engine/core/workspace.py:180: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n", + " return module(**module_kwargs)\n", + "\u001b[32m2025-04-03 21:01:57.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m201\u001b[0m - \u001b[1mTraining setup complete. Output directory: outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m203\u001b[0m - \u001b[1mSaving config to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/config.yml\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.556\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mload_state_dict_with_mismatch\u001b[0m:\u001b[36m642\u001b[0m - \u001b[33m\u001b[1mShape mismatch, loading compatible parameters only: Error(s) in loading state_dict for DistributedDataParallel:\n", + "\tsize mismatch for module.decoder.anchors: copying a param with shape torch.Size([1, 2100, 4]) from checkpoint, the shape in current model is torch.Size([1, 8400, 4]).\n", + "\tsize mismatch for module.decoder.valid_mask: copying a param with shape torch.Size([1, 2100, 1]) from checkpoint, the shape in current model is torch.Size([1, 8400, 1]).\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.603\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mload_state_dict_with_mismatch\u001b[0m:\u001b[36m649\u001b[0m - \u001b[1mLoaded parameters with matching shapes\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mload_checkpoint\u001b[0m:\u001b[36m659\u001b[0m - \u001b[1mAttempting to load EMA parameters with matching shapes...\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.668\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mload_state_dict_with_mismatch\u001b[0m:\u001b[36m642\u001b[0m - \u001b[33m\u001b[1mShape mismatch, loading compatible parameters only: Error(s) in loading state_dict for DEIM:\n", + "\tsize mismatch for decoder.anchors: copying a param with shape torch.Size([1, 2100, 4]) from checkpoint, the shape in current model is torch.Size([1, 8400, 4]).\n", + "\tsize mismatch for decoder.valid_mask: copying a param with shape torch.Size([1, 2100, 1]) from checkpoint, the shape in current model is torch.Size([1, 8400, 1]).\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial lr: [0.0002, 0.0004, 0.0004]\n", + "building train_dataloader with batch_size=16...\n", + " ### Transform @Mosaic ### \n", + " ### Transform @RandomPhotometricDistort ### \n", + " ### Transform @RandomZoomOut ### \n", + " ### Transform @RandomIoUCrop ### \n", + " ### Transform @SanitizeBoundingBoxes ### \n", + " ### Transform @RandomHorizontalFlip ### \n", + " ### Transform @Resize ### \n", + " ### Transform @SanitizeBoundingBoxes ### \n", + " ### Transform @ConvertPILImage ### \n", + " ### Transform @ConvertBoxes ### \n", + " ### Mosaic with Prob.@0.5 and ZoomOut/IoUCrop existed ### \n", + " ### ImgTransforms Epochs: [4, 64, 120] ### \n", + " ### Policy_ops@['Mosaic', 'RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop'] ###\n", + " ### Using MixUp with Prob@0.5 in [4, 64] epochs ### \n", + " ### Multi-scale Training until 120 epochs ### \n", + " ### Multi-scales@ [480, 512, 544, 576, 608, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 640, 800, 768, 736, 704, 672] ### \n", + "building val_dataloader with batch_size=16...\n", + " ### Transform @Resize ### \n", + " ### Transform @ConvertPILImage ### \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:01:57.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mload_state_dict_with_mismatch\u001b[0m:\u001b[36m649\u001b[0m - \u001b[1mLoaded parameters with matching shapes\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.739\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mload_checkpoint\u001b[0m:\u001b[36m671\u001b[0m - \u001b[1mLoaded checkpoint from epoch 9\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m239\u001b[0m - \u001b[1mStarting training...\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m242\u001b[0m - \u001b[1mOverriding epochs to 30\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.743\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m276\u001b[0m - \u001b[1mSetting pretrained flag to True for HGNetv2\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m296\u001b[0m - \u001b[1mAutomatically calculated mixup epochs: [1, 15]\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m306\u001b[0m - \u001b[1mAutomatically calculated data augmentation epochs: 1, 15, 27\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m315\u001b[0m - \u001b[1mAutomatically calculated stop epoch: 27\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.744\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m323\u001b[0m - \u001b[1mAutomatically calculated no augmentation epochs: 3\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m336\u001b[0m - \u001b[1mAutomatically calculated flat epochs: 15\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m363\u001b[0m - \u001b[1mAutomatically calculated warmup iterations: 605 (1.5 epochs)\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m376\u001b[0m - \u001b[1mAutomatically calculated EMA warmups: 605 (1.5 epochs)\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m152\u001b[0m - \u001b[1mDisabling sync_bn and find_unused_parameters for single-process training\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m164\u001b[0m - \u001b[1mSet device in config: cuda\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m174\u001b[0m - \u001b[1mInitializing solver for task: detection\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m201\u001b[0m - \u001b[1mTraining setup complete. Output directory: outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_setup\u001b[0m:\u001b[36m203\u001b[0m - \u001b[1mSaving config to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/config.yml\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:57.794\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m384\u001b[0m - \u001b[1mUsing device: cuda\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:58.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:58.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m393\u001b[0m - \u001b[1mInitial model saved as best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:58.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m403\u001b[0m - \u001b[1mNumber of trainable parameters: 10217817\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:58.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m409\u001b[0m - \u001b[1mUsing custom scheduler: flatcosine\u001b[0m\n", + "\u001b[32m2025-04-03 21:01:58.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 0/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.0002, 0.0004, 0.0004] [0.0001, 0.0002, 0.0002] 12090 605 6045 1209\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████| 403/403 [01:56<00:00, 3.45it/s, loss=17.1592]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000088 loss: 19.4117 (24.3900) loss_mal: 0.6772 (1.0626) loss_bbox: 0.2405 (0.3639) loss_giou: 0.2484 (0.3325) loss_fgl: 1.0165 (1.0863) loss_mal_aux_0: 0.8638 (1.2681) loss_bbox_aux_0: 0.2830 (0.4363) loss_giou_aux_0: 0.2865 (0.3983) loss_fgl_aux_0: 1.1088 (1.1692) loss_ddf_aux_0: 0.0908 (0.1043) loss_mal_aux_1: 0.7036 (1.1360) loss_bbox_aux_1: 0.2601 (0.3721) loss_giou_aux_1: 0.2459 (0.3398) loss_fgl_aux_1: 1.0167 (1.0943) loss_ddf_aux_1: 0.0062 (0.0066) loss_mal_pre: 0.8599 (1.2658) loss_bbox_pre: 0.2772 (0.4368) loss_giou_pre: 0.2818 (0.3981) loss_mal_enc_0: 1.1650 (1.7527) loss_bbox_enc_0: 0.4331 (0.7851) loss_giou_enc_0: 0.3969 (0.6513) loss_mal_dn_0: 0.6680 (0.7325) loss_bbox_dn_0: 0.3921 (0.5426) loss_giou_dn_0: 0.3716 (0.4963) loss_fgl_dn_0: 1.2100 (1.2478) loss_ddf_dn_0: 0.2250 (0.2516) loss_mal_dn_1: 0.5215 (0.6138) loss_bbox_dn_1: 0.2814 (0.3962) loss_giou_dn_1: 0.2554 (0.3513) loss_fgl_dn_1: 1.0249 (1.1073) loss_ddf_dn_1: 0.0143 (0.0178) loss_mal_dn_2: 0.5054 (0.5944) loss_bbox_dn_2: 0.2540 (0.3746) loss_giou_dn_2: 0.2448 (0.3320) loss_fgl_dn_2: 1.0178 (1.0930) loss_mal_dn_pre: 0.6699 (0.7307) loss_bbox_dn_pre: 0.4011 (0.5520) loss_giou_dn_pre: 0.3765 (0.4960)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.54it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.680\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.897\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.809\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.309\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.390\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.712\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.698\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.819\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.856\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.666\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.882\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.986\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.963\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:04:00.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:04:00.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 0 / mAP: 0.6798089462087499\u001b[0m\n", + "\u001b[32m2025-04-03 21:04:00.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 0, 'coco_eval_bbox': 0.6798089462087499}\u001b[0m\n", + "\u001b[32m2025-04-03 21:04:00.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 1/30\u001b[0m\n", + "Epoch 1: 100%|██████████| 403/403 [01:52<00:00, 3.58it/s, loss=16.7986]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 19.0964 (19.9460) loss_mal: 0.7119 (0.7438) loss_bbox: 0.2425 (0.2663) loss_giou: 0.2257 (0.2603) loss_fgl: 1.0231 (1.0613) loss_mal_aux_0: 0.8218 (0.9122) loss_bbox_aux_0: 0.2701 (0.3185) loss_giou_aux_0: 0.2917 (0.3090) loss_fgl_aux_0: 1.1437 (1.1533) loss_ddf_aux_0: 0.1002 (0.1035) loss_mal_aux_1: 0.6982 (0.7807) loss_bbox_aux_1: 0.2364 (0.2698) loss_giou_aux_1: 0.2298 (0.2636) loss_fgl_aux_1: 1.0328 (1.0665) loss_ddf_aux_1: 0.0053 (0.0067) loss_mal_pre: 0.8198 (0.9133) loss_bbox_pre: 0.2750 (0.3166) loss_giou_pre: 0.2906 (0.3072) loss_mal_enc_0: 0.9219 (1.1057) loss_bbox_enc_0: 0.5084 (0.5127) loss_giou_enc_0: 0.4887 (0.4624) loss_mal_dn_0: 0.6802 (0.6847) loss_bbox_dn_0: 0.4299 (0.4165) loss_giou_dn_0: 0.3998 (0.4019) loss_fgl_dn_0: 1.2197 (1.2256) loss_ddf_dn_0: 0.2925 (0.2692) loss_mal_dn_1: 0.5312 (0.5465) loss_bbox_dn_1: 0.2642 (0.2832) loss_giou_dn_1: 0.2579 (0.2736) loss_fgl_dn_1: 1.0604 (1.0674) loss_ddf_dn_1: 0.0180 (0.0173) loss_mal_dn_2: 0.5049 (0.5324) loss_bbox_dn_2: 0.2344 (0.2695) loss_giou_dn_2: 0.2450 (0.2624) loss_fgl_dn_2: 1.0509 (1.0537) loss_mal_dn_pre: 0.6777 (0.6834) loss_bbox_dn_pre: 0.4417 (0.4233) loss_giou_dn_pre: 0.3987 (0.4019)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.63it/s]\n", + "\u001b[32m2025-04-03 21:05:57.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 0, 'coco_eval_bbox': 0.6798089462087499}\u001b[0m\n", + "\u001b[32m2025-04-03 21:05:57.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 2/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.653\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.860\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.755\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.298\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.462\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.681\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.712\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.823\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.864\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.338\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.698\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.889\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.986\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.965\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 2: 100%|██████████| 403/403 [01:53<00:00, 3.56it/s, loss=17.3891]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 17.3891 (18.5843) loss_mal: 0.5771 (0.6440) loss_bbox: 0.1931 (0.2315) loss_giou: 0.2077 (0.2319) loss_fgl: 0.9996 (1.0271) loss_mal_aux_0: 0.8125 (0.8174) loss_bbox_aux_0: 0.2317 (0.2855) loss_giou_aux_0: 0.2815 (0.2843) loss_fgl_aux_0: 1.0868 (1.1348) loss_ddf_aux_0: 0.1145 (0.1257) loss_mal_aux_1: 0.5854 (0.6763) loss_bbox_aux_1: 0.1987 (0.2356) loss_giou_aux_1: 0.2110 (0.2355) loss_fgl_aux_1: 0.9985 (1.0324) loss_ddf_aux_1: 0.0059 (0.0079) loss_mal_pre: 0.8101 (0.8155) loss_bbox_pre: 0.2409 (0.2823) loss_giou_pre: 0.2783 (0.2814) loss_mal_enc_0: 0.9028 (0.9669) loss_bbox_enc_0: 0.4617 (0.4543) loss_giou_enc_0: 0.3874 (0.4214) loss_mal_dn_0: 0.6440 (0.6576) loss_bbox_dn_0: 0.3518 (0.3762) loss_giou_dn_0: 0.3601 (0.3723) loss_fgl_dn_0: 1.1981 (1.2186) loss_ddf_dn_0: 0.2885 (0.3184) loss_mal_dn_1: 0.4863 (0.5075) loss_bbox_dn_1: 0.2131 (0.2451) loss_giou_dn_1: 0.2277 (0.2445) loss_fgl_dn_1: 1.0092 (1.0376) loss_ddf_dn_1: 0.0151 (0.0210) loss_mal_dn_2: 0.4636 (0.4926) loss_bbox_dn_2: 0.2014 (0.2333) loss_giou_dn_2: 0.2077 (0.2343) loss_fgl_dn_2: 0.9792 (1.0241) loss_mal_dn_pre: 0.6416 (0.6562) loss_bbox_dn_pre: 0.3544 (0.3815) loss_giou_dn_pre: 0.3604 (0.3717)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.58it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.691\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.877\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.820\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.334\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.523\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.718\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.736\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.843\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.875\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.762\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.895\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.973\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:07:55.769\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:07:55.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 2 / mAP: 0.691391135911459\u001b[0m\n", + "\u001b[32m2025-04-03 21:07:55.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 2, 'coco_eval_bbox': 0.691391135911459}\u001b[0m\n", + "\u001b[32m2025-04-03 21:07:55.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 3/30\u001b[0m\n", + "Epoch 3: 100%|██████████| 403/403 [01:53<00:00, 3.54it/s, loss=15.9501]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 16.6024 (17.4524) loss_mal: 0.4771 (0.5614) loss_bbox: 0.1940 (0.2038) loss_giou: 0.1937 (0.2099) loss_fgl: 0.9501 (0.9986) loss_mal_aux_0: 0.6143 (0.7272) loss_bbox_aux_0: 0.2331 (0.2581) loss_giou_aux_0: 0.2447 (0.2598) loss_fgl_aux_0: 1.0667 (1.1120) loss_ddf_aux_0: 0.1020 (0.1324) loss_mal_aux_1: 0.5474 (0.5936) loss_bbox_aux_1: 0.1979 (0.2071) loss_giou_aux_1: 0.1989 (0.2126) loss_fgl_aux_1: 0.9624 (1.0052) loss_ddf_aux_1: 0.0058 (0.0073) loss_mal_pre: 0.6094 (0.7254) loss_bbox_pre: 0.2260 (0.2562) loss_giou_pre: 0.2385 (0.2575) loss_mal_enc_0: 0.8101 (0.8968) loss_bbox_enc_0: 0.3553 (0.4576) loss_giou_enc_0: 0.3555 (0.4180) loss_mal_dn_0: 0.6201 (0.6333) loss_bbox_dn_0: 0.3183 (0.3371) loss_giou_dn_0: 0.3339 (0.3411) loss_fgl_dn_0: 1.1815 (1.2030) loss_ddf_dn_0: 0.3056 (0.3314) loss_mal_dn_1: 0.4683 (0.4752) loss_bbox_dn_1: 0.2069 (0.2113) loss_giou_dn_1: 0.2044 (0.2183) loss_fgl_dn_1: 0.9730 (1.0061) loss_ddf_dn_1: 0.0178 (0.0194) loss_mal_dn_2: 0.4324 (0.4575) loss_bbox_dn_2: 0.1940 (0.2010) loss_giou_dn_2: 0.1981 (0.2097) loss_fgl_dn_2: 0.9572 (0.9916) loss_mal_dn_pre: 0.6182 (0.6318) loss_bbox_dn_pre: 0.3171 (0.3429) loss_giou_dn_pre: 0.3323 (0.3410)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.61it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.726\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.918\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.847\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.320\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.536\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.753\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.741\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.850\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.880\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.412\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.680\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.903\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.982\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.973\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:09:54.745\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:09:54.747\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 3 / mAP: 0.7259081464387614\u001b[0m\n", + "\u001b[32m2025-04-03 21:09:54.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 3, 'coco_eval_bbox': 0.7259081464387614}\u001b[0m\n", + "\u001b[32m2025-04-03 21:09:54.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 4/30\u001b[0m\n", + "Epoch 4: 100%|██████████| 403/403 [02:25<00:00, 2.78it/s, loss=19.3678]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 20.2978 (22.2310) loss_mal: 0.8252 (0.9117) loss_bbox: 0.2634 (0.3225) loss_giou: 0.3821 (0.4551) loss_fgl: 1.0853 (1.1024) loss_mal_aux_0: 0.9697 (0.9881) loss_bbox_aux_0: 0.2536 (0.3419) loss_giou_aux_0: 0.4120 (0.4907) loss_fgl_aux_0: 1.1306 (1.1486) loss_ddf_aux_0: 0.1087 (0.0983) loss_mal_aux_1: 0.8892 (0.9266) loss_bbox_aux_1: 0.2539 (0.3237) loss_giou_aux_1: 0.3816 (0.4570) loss_fgl_aux_1: 1.0915 (1.1038) loss_ddf_aux_1: 0.0057 (0.0048) loss_mal_pre: 0.9492 (0.9880) loss_bbox_pre: 0.2593 (0.3412) loss_giou_pre: 0.4101 (0.4899) loss_mal_enc_0: 1.0078 (1.0455) loss_bbox_enc_0: 0.3553 (0.4392) loss_giou_enc_0: 0.5174 (0.6276) loss_mal_dn_0: 0.6914 (0.7252) loss_bbox_dn_0: 0.3359 (0.3552) loss_giou_dn_0: 0.4855 (0.5336) loss_fgl_dn_0: 1.1948 (1.1998) loss_ddf_dn_0: 0.2889 (0.2724) loss_mal_dn_1: 0.5991 (0.6425) loss_bbox_dn_1: 0.2358 (0.2993) loss_giou_dn_1: 0.3924 (0.4201) loss_fgl_dn_1: 1.0839 (1.1045) loss_ddf_dn_1: 0.0153 (0.0132) loss_mal_dn_2: 0.5908 (0.6348) loss_bbox_dn_2: 0.2314 (0.2956) loss_giou_dn_2: 0.3856 (0.4129) loss_fgl_dn_2: 1.0809 (1.1007) loss_mal_dn_pre: 0.6919 (0.7252) loss_bbox_dn_pre: 0.3338 (0.3557) loss_giou_dn_pre: 0.4848 (0.5338)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.26it/s]\n", + "\u001b[32m2025-04-03 21:12:24.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 3, 'coco_eval_bbox': 0.7259081464387614}\u001b[0m\n", + "\u001b[32m2025-04-03 21:12:24.984\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 5/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.08s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.692\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.893\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.816\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.363\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.450\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.722\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.724\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.816\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.859\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.388\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.667\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.884\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.966\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 5: 100%|██████████| 403/403 [02:44<00:00, 2.46it/s, loss=23.1062]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 18.8316 (21.0811) loss_mal: 0.6812 (0.8446) loss_bbox: 0.1924 (0.2854) loss_giou: 0.3190 (0.4123) loss_fgl: 1.0881 (1.0935) loss_mal_aux_0: 0.8096 (0.9187) loss_bbox_aux_0: 0.2293 (0.3045) loss_giou_aux_0: 0.3414 (0.4459) loss_fgl_aux_0: 1.1478 (1.1453) loss_ddf_aux_0: 0.0782 (0.0997) loss_mal_aux_1: 0.6938 (0.8631) loss_bbox_aux_1: 0.1994 (0.2868) loss_giou_aux_1: 0.3155 (0.4140) loss_fgl_aux_1: 1.0902 (1.0955) loss_ddf_aux_1: 0.0045 (0.0047) loss_mal_pre: 0.8081 (0.9175) loss_bbox_pre: 0.2267 (0.3034) loss_giou_pre: 0.3420 (0.4445) loss_mal_enc_0: 0.9688 (1.0007) loss_bbox_enc_0: 0.3923 (0.3976) loss_giou_enc_0: 0.5582 (0.5740) loss_mal_dn_0: 0.6777 (0.7064) loss_bbox_dn_0: 0.2606 (0.3261) loss_giou_dn_0: 0.4089 (0.4943) loss_fgl_dn_0: 1.2006 (1.1978) loss_ddf_dn_0: 0.2393 (0.2668) loss_mal_dn_1: 0.5610 (0.6144) loss_bbox_dn_1: 0.1970 (0.2703) loss_giou_dn_1: 0.2913 (0.3856) loss_fgl_dn_1: 1.0958 (1.0903) loss_ddf_dn_1: 0.0132 (0.0117) loss_mal_dn_2: 0.5625 (0.6066) loss_bbox_dn_2: 0.1962 (0.2667) loss_giou_dn_2: 0.2901 (0.3792) loss_fgl_dn_2: 1.0954 (1.0858) loss_mal_dn_pre: 0.6782 (0.7060) loss_bbox_dn_pre: 0.2656 (0.3273) loss_giou_dn_pre: 0.4119 (0.4942)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 6.99it/s]\n", + "\u001b[32m2025-04-03 21:15:14.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 3, 'coco_eval_bbox': 0.7259081464387614}\u001b[0m\n", + "\u001b[32m2025-04-03 21:15:14.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 6/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.715\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.917\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.838\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.376\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.483\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.742\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.734\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.820\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.854\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.705\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.876\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.961\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 6: 100%|██████████| 403/403 [02:31<00:00, 2.67it/s, loss=19.1424]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 21.9460 (20.7154) loss_mal: 0.8564 (0.8238) loss_bbox: 0.2540 (0.2755) loss_giou: 0.4078 (0.3985) loss_fgl: 1.0752 (1.0866) loss_mal_aux_0: 0.8892 (0.9039) loss_bbox_aux_0: 0.2655 (0.2948) loss_giou_aux_0: 0.4377 (0.4322) loss_fgl_aux_0: 1.1321 (1.1409) loss_ddf_aux_0: 0.1046 (0.1020) loss_mal_aux_1: 0.8726 (0.8372) loss_bbox_aux_1: 0.2540 (0.2769) loss_giou_aux_1: 0.4057 (0.4002) loss_fgl_aux_1: 1.0798 (1.0886) loss_ddf_aux_1: 0.0042 (0.0046) loss_mal_pre: 0.8955 (0.9031) loss_bbox_pre: 0.2654 (0.2940) loss_giou_pre: 0.4352 (0.4310) loss_mal_enc_0: 0.9897 (0.9886) loss_bbox_enc_0: 0.3811 (0.3888) loss_giou_enc_0: 0.5357 (0.5628) loss_mal_dn_0: 0.7114 (0.6981) loss_bbox_dn_0: 0.2608 (0.3190) loss_giou_dn_0: 0.5006 (0.4777) loss_fgl_dn_0: 1.1892 (1.1928) loss_ddf_dn_0: 0.2887 (0.2673) loss_mal_dn_1: 0.6226 (0.6033) loss_bbox_dn_1: 0.2137 (0.2641) loss_giou_dn_1: 0.3919 (0.3736) loss_fgl_dn_1: 1.0684 (1.0801) loss_ddf_dn_1: 0.0117 (0.0113) loss_mal_dn_2: 0.6143 (0.5951) loss_bbox_dn_2: 0.2157 (0.2604) loss_giou_dn_2: 0.3855 (0.3675) loss_fgl_dn_2: 1.0640 (1.0755) loss_mal_dn_pre: 0.7114 (0.6976) loss_bbox_dn_pre: 0.2631 (0.3203) loss_giou_dn_pre: 0.5040 (0.4776)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.31it/s]\n", + "\u001b[32m2025-04-03 21:17:50.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 3, 'coco_eval_bbox': 0.7259081464387614}\u001b[0m\n", + "\u001b[32m2025-04-03 21:17:50.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 7/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.714\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.925\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.834\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.347\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.497\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.741\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.727\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.812\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.837\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.650\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.860\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.938\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 7: 100%|██████████| 403/403 [02:28<00:00, 2.72it/s, loss=22.2579]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 20.2974 (20.3609) loss_mal: 0.7793 (0.8013) loss_bbox: 0.2563 (0.2723) loss_giou: 0.3413 (0.3899) loss_fgl: 1.0684 (1.0790) loss_mal_aux_0: 0.8608 (0.8813) loss_bbox_aux_0: 0.2658 (0.2875) loss_giou_aux_0: 0.3770 (0.4179) loss_fgl_aux_0: 1.1174 (1.1320) loss_ddf_aux_0: 0.0964 (0.0964) loss_mal_aux_1: 0.7637 (0.8161) loss_bbox_aux_1: 0.2603 (0.2731) loss_giou_aux_1: 0.3386 (0.3914) loss_fgl_aux_1: 1.0714 (1.0805) loss_ddf_aux_1: 0.0057 (0.0043) loss_mal_pre: 0.8652 (0.8800) loss_bbox_pre: 0.2627 (0.2867) loss_giou_pre: 0.3748 (0.4165) loss_mal_enc_0: 0.9302 (0.9725) loss_bbox_enc_0: 0.4163 (0.3727) loss_giou_enc_0: 0.5384 (0.5363) loss_mal_dn_0: 0.6924 (0.6915) loss_bbox_dn_0: 0.2967 (0.3135) loss_giou_dn_0: 0.4098 (0.4659) loss_fgl_dn_0: 1.1892 (1.1892) loss_ddf_dn_0: 0.2791 (0.2632) loss_mal_dn_1: 0.5918 (0.5950) loss_bbox_dn_1: 0.2446 (0.2597) loss_giou_dn_1: 0.3051 (0.3657) loss_fgl_dn_1: 1.0717 (1.0747) loss_ddf_dn_1: 0.0139 (0.0108) loss_mal_dn_2: 0.5767 (0.5865) loss_bbox_dn_2: 0.2422 (0.2561) loss_giou_dn_2: 0.2986 (0.3600) loss_fgl_dn_2: 1.0633 (1.0704) loss_mal_dn_pre: 0.6924 (0.6909) loss_bbox_dn_pre: 0.2977 (0.3146) loss_giou_dn_pre: 0.4106 (0.4654)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.67it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.726\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.930\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.858\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.317\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.518\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.754\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.737\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.807\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.838\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.667\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.862\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.943\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:20:23.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:20:23.550\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 7 / mAP: 0.7260832526743252\u001b[0m\n", + "\u001b[32m2025-04-03 21:20:23.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 7, 'coco_eval_bbox': 0.7260832526743252}\u001b[0m\n", + "\u001b[32m2025-04-03 21:20:23.551\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 8/30\u001b[0m\n", + "Epoch 8: 100%|██████████| 403/403 [02:32<00:00, 2.64it/s, loss=17.1242]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 21.6777 (20.4901) loss_mal: 0.8745 (0.8108) loss_bbox: 0.3251 (0.2733) loss_giou: 0.4515 (0.3991) loss_fgl: 1.1010 (1.0796) loss_mal_aux_0: 0.9541 (0.8844) loss_bbox_aux_0: 0.3321 (0.2897) loss_giou_aux_0: 0.4776 (0.4298) loss_fgl_aux_0: 1.1496 (1.1326) loss_ddf_aux_0: 0.0865 (0.0959) loss_mal_aux_1: 0.8887 (0.8260) loss_bbox_aux_1: 0.3247 (0.2742) loss_giou_aux_1: 0.4522 (0.4007) loss_fgl_aux_1: 1.1008 (1.0812) loss_ddf_aux_1: 0.0039 (0.0040) loss_mal_pre: 0.9629 (0.8836) loss_bbox_pre: 0.3324 (0.2892) loss_giou_pre: 0.4756 (0.4285) loss_mal_enc_0: 1.0000 (0.9726) loss_bbox_enc_0: 0.4584 (0.3801) loss_giou_enc_0: 0.6104 (0.5591) loss_mal_dn_0: 0.7319 (0.6940) loss_bbox_dn_0: 0.3036 (0.3136) loss_giou_dn_0: 0.5309 (0.4716) loss_fgl_dn_0: 1.1770 (1.1889) loss_ddf_dn_0: 0.2386 (0.2561) loss_mal_dn_1: 0.6445 (0.5963) loss_bbox_dn_1: 0.2567 (0.2588) loss_giou_dn_1: 0.4305 (0.3706) loss_fgl_dn_1: 1.0822 (1.0764) loss_ddf_dn_1: 0.0091 (0.0099) loss_mal_dn_2: 0.6387 (0.5876) loss_bbox_dn_2: 0.2582 (0.2553) loss_giou_dn_2: 0.4164 (0.3648) loss_fgl_dn_2: 1.0809 (1.0720) loss_mal_dn_pre: 0.7319 (0.6935) loss_bbox_dn_pre: 0.3098 (0.3150) loss_giou_dn_pre: 0.5310 (0.4712)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.37it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.732\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.921\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.851\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.314\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.542\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.759\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.740\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.815\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.852\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.362\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.674\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.876\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.954\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:23:01.237\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:23:01.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 8 / mAP: 0.7323541495261611\u001b[0m\n", + "\u001b[32m2025-04-03 21:23:01.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 8, 'coco_eval_bbox': 0.7323541495261611}\u001b[0m\n", + "\u001b[32m2025-04-03 21:23:01.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 9/30\u001b[0m\n", + "Epoch 9: 100%|██████████| 403/403 [02:33<00:00, 2.63it/s, loss=24.2352]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 17.5129 (19.8934) loss_mal: 0.6226 (0.7701) loss_bbox: 0.1912 (0.2600) loss_giou: 0.3206 (0.3829) loss_fgl: 1.0420 (1.0718) loss_mal_aux_0: 0.7148 (0.8410) loss_bbox_aux_0: 0.2442 (0.2766) loss_giou_aux_0: 0.3919 (0.4143) loss_fgl_aux_0: 1.1105 (1.1273) loss_ddf_aux_0: 0.0927 (0.0952) loss_mal_aux_1: 0.6431 (0.7831) loss_bbox_aux_1: 0.1911 (0.2610) loss_giou_aux_1: 0.3255 (0.3846) loss_fgl_aux_1: 1.0444 (1.0739) loss_ddf_aux_1: 0.0051 (0.0043) loss_mal_pre: 0.7178 (0.8404) loss_bbox_pre: 0.2402 (0.2758) loss_giou_pre: 0.3940 (0.4129) loss_mal_enc_0: 0.8496 (0.9453) loss_bbox_enc_0: 0.3304 (0.3641) loss_giou_enc_0: 0.5038 (0.5388) loss_mal_dn_0: 0.6396 (0.6813) loss_bbox_dn_0: 0.2285 (0.2980) loss_giou_dn_0: 0.3740 (0.4492) loss_fgl_dn_0: 1.1789 (1.1845) loss_ddf_dn_0: 0.2732 (0.2475) loss_mal_dn_1: 0.5171 (0.5804) loss_bbox_dn_1: 0.1727 (0.2447) loss_giou_dn_1: 0.2718 (0.3524) loss_fgl_dn_1: 1.0588 (1.0686) loss_ddf_dn_1: 0.0095 (0.0098) loss_mal_dn_2: 0.5107 (0.5721) loss_bbox_dn_2: 0.1660 (0.2414) loss_giou_dn_2: 0.2722 (0.3471) loss_fgl_dn_2: 1.0532 (1.0646) loss_mal_dn_pre: 0.6406 (0.6807) loss_bbox_dn_pre: 0.2287 (0.2993) loss_giou_dn_pre: 0.3762 (0.4486)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 7.09it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.748\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.935\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.870\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.338\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.534\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.773\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.746\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.824\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.859\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.706\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.879\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.951\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:25:39.750\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:25:39.751\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 9 / mAP: 0.7475741830997051\u001b[0m\n", + "\u001b[32m2025-04-03 21:25:39.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 9, 'coco_eval_bbox': 0.7475741830997051}\u001b[0m\n", + "\u001b[32m2025-04-03 21:25:39.753\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 10/30\u001b[0m\n", + "Epoch 10: 100%|██████████| 403/403 [02:46<00:00, 2.42it/s, loss=23.4202]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 21.2283 (19.9574) loss_mal: 0.7847 (0.7787) loss_bbox: 0.3108 (0.2647) loss_giou: 0.4071 (0.3812) loss_fgl: 1.0865 (1.0708) loss_mal_aux_0: 0.8721 (0.8529) loss_bbox_aux_0: 0.3235 (0.2823) loss_giou_aux_0: 0.4252 (0.4126) loss_fgl_aux_0: 1.1323 (1.1259) loss_ddf_aux_0: 0.0945 (0.0985) loss_mal_aux_1: 0.8262 (0.7969) loss_bbox_aux_1: 0.3145 (0.2654) loss_giou_aux_1: 0.4080 (0.3827) loss_fgl_aux_1: 1.0867 (1.0727) loss_ddf_aux_1: 0.0046 (0.0044) loss_mal_pre: 0.8643 (0.8521) loss_bbox_pre: 0.3231 (0.2816) loss_giou_pre: 0.4187 (0.4112) loss_mal_enc_0: 0.9507 (0.9381) loss_bbox_enc_0: 0.3810 (0.3744) loss_giou_enc_0: 0.5506 (0.5364) loss_mal_dn_0: 0.6948 (0.6810) loss_bbox_dn_0: 0.3442 (0.3004) loss_giou_dn_0: 0.4954 (0.4474) loss_fgl_dn_0: 1.1825 (1.1825) loss_ddf_dn_0: 0.2327 (0.2524) loss_mal_dn_1: 0.6279 (0.5802) loss_bbox_dn_1: 0.2883 (0.2469) loss_giou_dn_1: 0.4076 (0.3519) loss_fgl_dn_1: 1.0920 (1.0672) loss_ddf_dn_1: 0.0108 (0.0105) loss_mal_dn_2: 0.6187 (0.5723) loss_bbox_dn_2: 0.2844 (0.2432) loss_giou_dn_2: 0.3965 (0.3463) loss_fgl_dn_2: 1.0865 (1.0629) loss_mal_dn_pre: 0.6958 (0.6805) loss_bbox_dn_pre: 0.3398 (0.3014) loss_giou_dn_pre: 0.4996 (0.4465)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 7.06it/s]\n", + "\u001b[32m2025-04-03 21:28:31.416\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 9, 'coco_eval_bbox': 0.7475741830997051}\u001b[0m\n", + "\u001b[32m2025-04-03 21:28:31.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 11/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.737\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.926\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.847\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.339\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.535\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.763\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.744\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.830\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.865\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.685\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.889\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.966\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 11: 100%|██████████| 403/403 [02:42<00:00, 2.48it/s, loss=20.7901]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 20.7901 (19.7131) loss_mal: 0.7749 (0.7595) loss_bbox: 0.2926 (0.2601) loss_giou: 0.3893 (0.3807) loss_fgl: 1.0507 (1.0655) loss_mal_aux_0: 0.7627 (0.8264) loss_bbox_aux_0: 0.2995 (0.2778) loss_giou_aux_0: 0.4171 (0.4119) loss_fgl_aux_0: 1.1098 (1.1213) loss_ddf_aux_0: 0.0929 (0.0970) loss_mal_aux_1: 0.7485 (0.7726) loss_bbox_aux_1: 0.2936 (0.2612) loss_giou_aux_1: 0.3923 (0.3824) loss_fgl_aux_1: 1.0521 (1.0674) loss_ddf_aux_1: 0.0044 (0.0044) loss_mal_pre: 0.7593 (0.8255) loss_bbox_pre: 0.3014 (0.2770) loss_giou_pre: 0.4216 (0.4105) loss_mal_enc_0: 0.8857 (0.9276) loss_bbox_enc_0: 0.3917 (0.3650) loss_giou_enc_0: 0.5637 (0.5315) loss_mal_dn_0: 0.6978 (0.6750) loss_bbox_dn_0: 0.2905 (0.2917) loss_giou_dn_0: 0.4589 (0.4421) loss_fgl_dn_0: 1.1724 (1.1798) loss_ddf_dn_0: 0.2246 (0.2453) loss_mal_dn_1: 0.6040 (0.5751) loss_bbox_dn_1: 0.2393 (0.2406) loss_giou_dn_1: 0.3701 (0.3495) loss_fgl_dn_1: 1.0831 (1.0632) loss_ddf_dn_1: 0.0090 (0.0097) loss_mal_dn_2: 0.6030 (0.5669) loss_bbox_dn_2: 0.2371 (0.2371) loss_giou_dn_2: 0.3660 (0.3444) loss_fgl_dn_2: 1.0753 (1.0591) loss_mal_dn_pre: 0.6973 (0.6744) loss_bbox_dn_pre: 0.2948 (0.2929) loss_giou_dn_pre: 0.4595 (0.4413)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 6.87it/s]\n", + "\u001b[32m2025-04-03 21:31:19.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 9, 'coco_eval_bbox': 0.7475741830997051}\u001b[0m\n", + "\u001b[32m2025-04-03 21:31:19.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 12/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.743\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.928\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.857\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.351\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.550\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.769\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.746\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.836\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.871\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.720\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.892\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.968\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 12: 100%|██████████| 403/403 [02:41<00:00, 2.49it/s, loss=22.2182]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 20.4810 (19.5051) loss_mal: 0.7764 (0.7492) loss_bbox: 0.3034 (0.2511) loss_giou: 0.4024 (0.3757) loss_fgl: 1.0823 (1.0609) loss_mal_aux_0: 0.8008 (0.8168) loss_bbox_aux_0: 0.3222 (0.2667) loss_giou_aux_0: 0.4142 (0.4052) loss_fgl_aux_0: 1.1308 (1.1142) loss_ddf_aux_0: 0.0823 (0.0939) loss_mal_aux_1: 0.7725 (0.7636) loss_bbox_aux_1: 0.2972 (0.2520) loss_giou_aux_1: 0.4005 (0.3774) loss_fgl_aux_1: 1.0769 (1.0628) loss_ddf_aux_1: 0.0037 (0.0042) loss_mal_pre: 0.7974 (0.8159) loss_bbox_pre: 0.3207 (0.2658) loss_giou_pre: 0.4102 (0.4036) loss_mal_enc_0: 0.8584 (0.9239) loss_bbox_enc_0: 0.4310 (0.3546) loss_giou_enc_0: 0.5432 (0.5348) loss_mal_dn_0: 0.6758 (0.6719) loss_bbox_dn_0: 0.3165 (0.2871) loss_giou_dn_0: 0.4197 (0.4352) loss_fgl_dn_0: 1.1684 (1.1763) loss_ddf_dn_0: 0.2249 (0.2484) loss_mal_dn_1: 0.5664 (0.5710) loss_bbox_dn_1: 0.2950 (0.2356) loss_giou_dn_1: 0.3561 (0.3424) loss_fgl_dn_1: 1.0626 (1.0569) loss_ddf_dn_1: 0.0084 (0.0096) loss_mal_dn_2: 0.5557 (0.5621) loss_bbox_dn_2: 0.2957 (0.2323) loss_giou_dn_2: 0.3553 (0.3372) loss_fgl_dn_2: 1.0616 (1.0525) loss_mal_dn_pre: 0.6738 (0.6712) loss_bbox_dn_pre: 0.3304 (0.2886) loss_giou_dn_pre: 0.4178 (0.4346)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 7.14it/s]\n", + "\u001b[32m2025-04-03 21:34:06.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 9, 'coco_eval_bbox': 0.7475741830997051}\u001b[0m\n", + "\u001b[32m2025-04-03 21:34:06.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 13/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.745\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.928\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.866\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.326\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.512\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.772\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.748\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.833\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.865\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.700\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.885\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.973\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 13: 100%|██████████| 403/403 [02:44<00:00, 2.46it/s, loss=21.5508]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 17.5323 (19.6634) loss_mal: 0.6670 (0.7606) loss_bbox: 0.2543 (0.2608) loss_giou: 0.2418 (0.3758) loss_fgl: 1.0425 (1.0617) loss_mal_aux_0: 0.7637 (0.8311) loss_bbox_aux_0: 0.2865 (0.2775) loss_giou_aux_0: 0.3080 (0.4053) loss_fgl_aux_0: 1.0939 (1.1181) loss_ddf_aux_0: 0.1041 (0.0966) loss_mal_aux_1: 0.6592 (0.7758) loss_bbox_aux_1: 0.2368 (0.2619) loss_giou_aux_1: 0.2425 (0.3773) loss_fgl_aux_1: 1.0422 (1.0635) loss_ddf_aux_1: 0.0042 (0.0042) loss_mal_pre: 0.7480 (0.8302) loss_bbox_pre: 0.2819 (0.2769) loss_giou_pre: 0.3007 (0.4041) loss_mal_enc_0: 0.8726 (0.9314) loss_bbox_enc_0: 0.3876 (0.3757) loss_giou_enc_0: 0.4783 (0.5388) loss_mal_dn_0: 0.6567 (0.6719) loss_bbox_dn_0: 0.2829 (0.2938) loss_giou_dn_0: 0.3664 (0.4359) loss_fgl_dn_0: 1.1751 (1.1750) loss_ddf_dn_0: 0.2308 (0.2451) loss_mal_dn_1: 0.5122 (0.5695) loss_bbox_dn_1: 0.2490 (0.2420) loss_giou_dn_1: 0.2446 (0.3440) loss_fgl_dn_1: 1.0578 (1.0565) loss_ddf_dn_1: 0.0085 (0.0093) loss_mal_dn_2: 0.5156 (0.5615) loss_bbox_dn_2: 0.2490 (0.2387) loss_giou_dn_2: 0.2398 (0.3389) loss_fgl_dn_2: 1.0560 (1.0524) loss_mal_dn_pre: 0.6528 (0.6713) loss_bbox_dn_pre: 0.2824 (0.2948) loss_giou_dn_pre: 0.3619 (0.4353)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 7.13it/s]\n", + "\u001b[32m2025-04-03 21:36:55.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 9, 'coco_eval_bbox': 0.7475741830997051}\u001b[0m\n", + "\u001b[32m2025-04-03 21:36:55.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 14/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.08s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.745\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.930\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.853\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.313\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.538\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.771\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.750\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.826\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.859\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.675\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.882\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.957\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 14: 100%|██████████| 403/403 [02:37<00:00, 2.55it/s, loss=20.7375]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000200 loss: 16.9232 (18.9275) loss_mal: 0.6763 (0.7167) loss_bbox: 0.1492 (0.2341) loss_giou: 0.2787 (0.3514) loss_fgl: 1.0302 (1.0538) loss_mal_aux_0: 0.7036 (0.7911) loss_bbox_aux_0: 0.1743 (0.2515) loss_giou_aux_0: 0.3084 (0.3817) loss_fgl_aux_0: 1.0909 (1.1125) loss_ddf_aux_0: 0.0837 (0.1003) loss_mal_aux_1: 0.6875 (0.7358) loss_bbox_aux_1: 0.1526 (0.2353) loss_giou_aux_1: 0.2796 (0.3531) loss_fgl_aux_1: 1.0330 (1.0558) loss_ddf_aux_1: 0.0036 (0.0045) loss_mal_pre: 0.7012 (0.7902) loss_bbox_pre: 0.1770 (0.2507) loss_giou_pre: 0.3061 (0.3801) loss_mal_enc_0: 0.8633 (0.9020) loss_bbox_enc_0: 0.3189 (0.3445) loss_giou_enc_0: 0.4827 (0.5096) loss_mal_dn_0: 0.6299 (0.6583) loss_bbox_dn_0: 0.2121 (0.2689) loss_giou_dn_0: 0.3581 (0.4136) loss_fgl_dn_0: 1.1894 (1.1733) loss_ddf_dn_0: 0.2285 (0.2506) loss_mal_dn_1: 0.5156 (0.5511) loss_bbox_dn_1: 0.1607 (0.2179) loss_giou_dn_1: 0.2551 (0.3225) loss_fgl_dn_1: 1.0211 (1.0479) loss_ddf_dn_1: 0.0090 (0.0095) loss_mal_dn_2: 0.5059 (0.5432) loss_bbox_dn_2: 0.1490 (0.2148) loss_giou_dn_2: 0.2496 (0.3176) loss_fgl_dn_2: 1.0089 (1.0433) loss_mal_dn_pre: 0.6294 (0.6575) loss_bbox_dn_pre: 0.2119 (0.2699) loss_giou_dn_pre: 0.3547 (0.4127)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.31it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.751\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.931\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.860\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.307\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.543\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.778\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.753\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.829\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.862\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.362\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.700\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.884\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.982\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.959\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:39:38.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:39:38.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 14 / mAP: 0.7513420307969209\u001b[0m\n", + "\u001b[32m2025-04-03 21:39:38.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 14, 'coco_eval_bbox': 0.7513420307969209}\u001b[0m\n", + "\u001b[32m2025-04-03 21:39:38.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 15/30\u001b[0m\n", + "Epoch 15: 100%|██████████| 403/403 [02:40<00:00, 2.51it/s, loss=21.6808]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000198 loss: 20.0360 (19.0545) loss_mal: 0.7422 (0.7207) loss_bbox: 0.2305 (0.2455) loss_giou: 0.3337 (0.3650) loss_fgl: 1.0644 (1.0518) loss_mal_aux_0: 0.8940 (0.7885) loss_bbox_aux_0: 0.2575 (0.2614) loss_giou_aux_0: 0.3601 (0.3924) loss_fgl_aux_0: 1.1190 (1.1062) loss_ddf_aux_0: 0.0839 (0.0890) loss_mal_aux_1: 0.8120 (0.7350) loss_bbox_aux_1: 0.2313 (0.2465) loss_giou_aux_1: 0.3327 (0.3666) loss_fgl_aux_1: 1.0662 (1.0535) loss_ddf_aux_1: 0.0040 (0.0041) loss_mal_pre: 0.8677 (0.7883) loss_bbox_pre: 0.2607 (0.2604) loss_giou_pre: 0.3580 (0.3908) loss_mal_enc_0: 0.9121 (0.8934) loss_bbox_enc_0: 0.3698 (0.3519) loss_giou_enc_0: 0.5661 (0.5200) loss_mal_dn_0: 0.6943 (0.6599) loss_bbox_dn_0: 0.2914 (0.2747) loss_giou_dn_0: 0.4334 (0.4189) loss_fgl_dn_0: 1.1868 (1.1695) loss_ddf_dn_0: 0.2195 (0.2402) loss_mal_dn_1: 0.6060 (0.5551) loss_bbox_dn_1: 0.2290 (0.2246) loss_giou_dn_1: 0.3554 (0.3308) loss_fgl_dn_1: 1.0650 (1.0479) loss_ddf_dn_1: 0.0088 (0.0093) loss_mal_dn_2: 0.5962 (0.5479) loss_bbox_dn_2: 0.2248 (0.2215) loss_giou_dn_2: 0.3522 (0.3260) loss_fgl_dn_2: 1.0636 (1.0439) loss_mal_dn_pre: 0.6934 (0.6591) loss_bbox_dn_pre: 0.2881 (0.2759) loss_giou_dn_pre: 0.4314 (0.4182)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 7.10it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.756\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.932\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.858\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.307\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.559\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.782\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.756\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.834\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.860\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.692\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.884\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.982\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.948\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 21:42:24.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 21:42:24.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 15 / mAP: 0.7564426878028832\u001b[0m\n", + "\u001b[32m2025-04-03 21:42:24.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 21:42:24.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 16/30\u001b[0m\n", + "Epoch 16: 100%|██████████| 403/403 [02:30<00:00, 2.67it/s, loss=22.1689]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000193 loss: 17.4362 (18.6813) loss_mal: 0.6055 (0.6961) loss_bbox: 0.2045 (0.2380) loss_giou: 0.2907 (0.3481) loss_fgl: 1.0429 (1.0441) loss_mal_aux_0: 0.6670 (0.7689) loss_bbox_aux_0: 0.2399 (0.2546) loss_giou_aux_0: 0.3235 (0.3763) loss_fgl_aux_0: 1.0792 (1.1019) loss_ddf_aux_0: 0.0946 (0.0941) loss_mal_aux_1: 0.6465 (0.7139) loss_bbox_aux_1: 0.2017 (0.2389) loss_giou_aux_1: 0.2917 (0.3494) loss_fgl_aux_1: 1.0374 (1.0460) loss_ddf_aux_1: 0.0043 (0.0043) loss_mal_pre: 0.6670 (0.7676) loss_bbox_pre: 0.2292 (0.2542) loss_giou_pre: 0.3209 (0.3750) loss_mal_enc_0: 0.8677 (0.8751) loss_bbox_enc_0: 0.2955 (0.3434) loss_giou_enc_0: 0.4520 (0.4984) loss_mal_dn_0: 0.6230 (0.6507) loss_bbox_dn_0: 0.2099 (0.2700) loss_giou_dn_0: 0.3517 (0.4067) loss_fgl_dn_0: 1.1646 (1.1646) loss_ddf_dn_0: 0.2561 (0.2500) loss_mal_dn_1: 0.5171 (0.5424) loss_bbox_dn_1: 0.1655 (0.2195) loss_giou_dn_1: 0.2846 (0.3185) loss_fgl_dn_1: 1.0450 (1.0374) loss_ddf_dn_1: 0.0092 (0.0091) loss_mal_dn_2: 0.5142 (0.5341) loss_bbox_dn_2: 0.1610 (0.2164) loss_giou_dn_2: 0.2800 (0.3139) loss_fgl_dn_2: 1.0420 (1.0328) loss_mal_dn_pre: 0.6211 (0.6498) loss_bbox_dn_pre: 0.2105 (0.2712) loss_giou_dn_pre: 0.3493 (0.4058)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.42it/s]\n", + "\u001b[32m2025-04-03 21:45:00.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 21:45:00.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 17/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.756\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.933\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.858\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.307\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.560\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.782\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.755\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.841\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.865\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.692\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.889\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.951\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 17: 100%|██████████| 403/403 [02:26<00:00, 2.75it/s, loss=21.8055]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000185 loss: 18.2861 (18.7120) loss_mal: 0.6323 (0.7151) loss_bbox: 0.1774 (0.2339) loss_giou: 0.3066 (0.3483) loss_fgl: 1.0000 (1.0429) loss_mal_aux_0: 0.6899 (0.7800) loss_bbox_aux_0: 0.2129 (0.2505) loss_giou_aux_0: 0.3484 (0.3763) loss_fgl_aux_0: 1.0679 (1.1003) loss_ddf_aux_0: 0.0839 (0.0938) loss_mal_aux_1: 0.6616 (0.7302) loss_bbox_aux_1: 0.1912 (0.2349) loss_giou_aux_1: 0.3080 (0.3498) loss_fgl_aux_1: 1.0026 (1.0451) loss_ddf_aux_1: 0.0035 (0.0044) loss_mal_pre: 0.6978 (0.7791) loss_bbox_pre: 0.2082 (0.2497) loss_giou_pre: 0.3524 (0.3751) loss_mal_enc_0: 0.8569 (0.8865) loss_bbox_enc_0: 0.2990 (0.3422) loss_giou_enc_0: 0.4672 (0.5002) loss_mal_dn_0: 0.6387 (0.6495) loss_bbox_dn_0: 0.2195 (0.2672) loss_giou_dn_0: 0.3783 (0.4052) loss_fgl_dn_0: 1.1427 (1.1601) loss_ddf_dn_0: 0.2204 (0.2403) loss_mal_dn_1: 0.5347 (0.5451) loss_bbox_dn_1: 0.1787 (0.2181) loss_giou_dn_1: 0.3054 (0.3205) loss_fgl_dn_1: 1.0373 (1.0367) loss_ddf_dn_1: 0.0071 (0.0093) loss_mal_dn_2: 0.5298 (0.5368) loss_bbox_dn_2: 0.1726 (0.2150) loss_giou_dn_2: 0.3061 (0.3159) loss_fgl_dn_2: 1.0327 (1.0324) loss_mal_dn_pre: 0.6406 (0.6489) loss_bbox_dn_pre: 0.2270 (0.2683) loss_giou_dn_pre: 0.3769 (0.4045)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.41it/s]\n", + "\u001b[32m2025-04-03 21:47:31.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 21:47:31.719\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 18/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.753\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.931\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.865\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.576\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.779\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.754\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.837\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.857\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.338\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.686\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.882\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.946\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 18: 100%|██████████| 403/403 [02:30<00:00, 2.68it/s, loss=19.9531]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000175 loss: 16.3511 (18.7280) loss_mal: 0.5474 (0.7040) loss_bbox: 0.1725 (0.2433) loss_giou: 0.2686 (0.3537) loss_fgl: 1.0324 (1.0429) loss_mal_aux_0: 0.6177 (0.7677) loss_bbox_aux_0: 0.1975 (0.2595) loss_giou_aux_0: 0.3048 (0.3798) loss_fgl_aux_0: 1.0775 (1.0961) loss_ddf_aux_0: 0.0861 (0.0893) loss_mal_aux_1: 0.5933 (0.7193) loss_bbox_aux_1: 0.1745 (0.2444) loss_giou_aux_1: 0.2717 (0.3552) loss_fgl_aux_1: 1.0359 (1.0450) loss_ddf_aux_1: 0.0037 (0.0040) loss_mal_pre: 0.6206 (0.7674) loss_bbox_pre: 0.1987 (0.2587) loss_giou_pre: 0.3050 (0.3785) loss_mal_enc_0: 0.7915 (0.8763) loss_bbox_enc_0: 0.2600 (0.3441) loss_giou_enc_0: 0.4267 (0.4963) loss_mal_dn_0: 0.6138 (0.6497) loss_bbox_dn_0: 0.2152 (0.2737) loss_giou_dn_0: 0.3277 (0.4067) loss_fgl_dn_0: 1.1507 (1.1617) loss_ddf_dn_0: 0.2418 (0.2424) loss_mal_dn_1: 0.4932 (0.5436) loss_bbox_dn_1: 0.1654 (0.2229) loss_giou_dn_1: 0.2364 (0.3210) loss_fgl_dn_1: 1.0178 (1.0375) loss_ddf_dn_1: 0.0077 (0.0090) loss_mal_dn_2: 0.4805 (0.5354) loss_bbox_dn_2: 0.1619 (0.2196) loss_giou_dn_2: 0.2321 (0.3163) loss_fgl_dn_2: 1.0127 (1.0333) loss_mal_dn_pre: 0.6118 (0.6490) loss_bbox_dn_pre: 0.2158 (0.2749) loss_giou_dn_pre: 0.3273 (0.4059)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.46it/s]\n", + "\u001b[32m2025-04-03 21:50:07.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 21:50:07.274\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 19/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.751\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.934\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.862\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.568\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.776\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.754\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.833\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.856\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.686\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.880\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.946\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 19: 100%|██████████| 403/403 [02:27<00:00, 2.74it/s, loss=20.6627]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000163 loss: 18.1969 (18.4216) loss_mal: 0.6689 (0.7059) loss_bbox: 0.1912 (0.2243) loss_giou: 0.2931 (0.3384) loss_fgl: 1.0259 (1.0403) loss_mal_aux_0: 0.7114 (0.7711) loss_bbox_aux_0: 0.1940 (0.2396) loss_giou_aux_0: 0.3187 (0.3641) loss_fgl_aux_0: 1.0951 (1.0946) loss_ddf_aux_0: 0.0787 (0.0923) loss_mal_aux_1: 0.6821 (0.7209) loss_bbox_aux_1: 0.1868 (0.2253) loss_giou_aux_1: 0.2909 (0.3399) loss_fgl_aux_1: 1.0311 (1.0422) loss_ddf_aux_1: 0.0033 (0.0040) loss_mal_pre: 0.7036 (0.7701) loss_bbox_pre: 0.1962 (0.2390) loss_giou_pre: 0.3194 (0.3629) loss_mal_enc_0: 0.8369 (0.8687) loss_bbox_enc_0: 0.3076 (0.3229) loss_giou_enc_0: 0.5046 (0.4853) loss_mal_dn_0: 0.6514 (0.6437) loss_bbox_dn_0: 0.2567 (0.2613) loss_giou_dn_0: 0.3493 (0.3937) loss_fgl_dn_0: 1.1500 (1.1547) loss_ddf_dn_0: 0.2235 (0.2396) loss_mal_dn_1: 0.5435 (0.5383) loss_bbox_dn_1: 0.1929 (0.2129) loss_giou_dn_1: 0.2866 (0.3117) loss_fgl_dn_1: 1.0215 (1.0319) loss_ddf_dn_1: 0.0080 (0.0085) loss_mal_dn_2: 0.5410 (0.5306) loss_bbox_dn_2: 0.1900 (0.2099) loss_giou_dn_2: 0.2844 (0.3071) loss_fgl_dn_2: 1.0180 (1.0279) loss_mal_dn_pre: 0.6504 (0.6429) loss_bbox_dn_pre: 0.2626 (0.2622) loss_giou_dn_pre: 0.3505 (0.3929)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.44it/s]\n", + "\u001b[32m2025-04-03 21:52:39.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 21:52:39.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 20/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.754\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.936\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.867\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.570\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.779\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.755\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.831\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.858\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.694\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.881\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.951\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 20: 100%|██████████| 403/403 [02:29<00:00, 2.69it/s, loss=15.8849]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000150 loss: 15.8849 (17.9574) loss_mal: 0.6152 (0.6703) loss_bbox: 0.1689 (0.2214) loss_giou: 0.2844 (0.3260) loss_fgl: 1.0422 (1.0251) loss_mal_aux_0: 0.6802 (0.7370) loss_bbox_aux_0: 0.1987 (0.2367) loss_giou_aux_0: 0.3054 (0.3513) loss_fgl_aux_0: 1.0892 (1.0805) loss_ddf_aux_0: 0.0794 (0.0926) loss_mal_aux_1: 0.5957 (0.6852) loss_bbox_aux_1: 0.1696 (0.2225) loss_giou_aux_1: 0.2902 (0.3274) loss_fgl_aux_1: 1.0394 (1.0270) loss_ddf_aux_1: 0.0035 (0.0039) loss_mal_pre: 0.6787 (0.7363) loss_bbox_pre: 0.1947 (0.2364) loss_giou_pre: 0.3031 (0.3501) loss_mal_enc_0: 0.7681 (0.8416) loss_bbox_enc_0: 0.2533 (0.3107) loss_giou_enc_0: 0.3954 (0.4547) loss_mal_dn_0: 0.6099 (0.6318) loss_bbox_dn_0: 0.1812 (0.2565) loss_giou_dn_0: 0.3357 (0.3808) loss_fgl_dn_0: 1.1353 (1.1453) loss_ddf_dn_0: 0.2253 (0.2443) loss_mal_dn_1: 0.5024 (0.5243) loss_bbox_dn_1: 0.1449 (0.2084) loss_giou_dn_1: 0.2589 (0.3009) loss_fgl_dn_1: 1.0256 (1.0184) loss_ddf_dn_1: 0.0074 (0.0083) loss_mal_dn_2: 0.4910 (0.5162) loss_bbox_dn_2: 0.1413 (0.2053) loss_giou_dn_2: 0.2552 (0.2966) loss_fgl_dn_2: 1.0217 (1.0144) loss_mal_dn_pre: 0.6069 (0.6311) loss_bbox_dn_pre: 0.1849 (0.2579) loss_giou_dn_pre: 0.3324 (0.3801)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.36it/s]\n", + "\u001b[32m2025-04-03 21:55:13.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 21:55:13.943\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 21/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.755\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.938\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.866\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.580\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.780\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.756\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.831\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.856\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.688\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.879\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.948\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 21: 100%|██████████| 403/403 [02:26<00:00, 2.75it/s, loss=14.8098]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000137 loss: 15.9672 (17.8664) loss_mal: 0.5620 (0.6703) loss_bbox: 0.1941 (0.2187) loss_giou: 0.3436 (0.3258) loss_fgl: 0.9852 (1.0242) loss_mal_aux_0: 0.6499 (0.7328) loss_bbox_aux_0: 0.2008 (0.2317) loss_giou_aux_0: 0.3679 (0.3487) loss_fgl_aux_0: 1.0657 (1.0767) loss_ddf_aux_0: 0.0840 (0.0842) loss_mal_aux_1: 0.5688 (0.6861) loss_bbox_aux_1: 0.1942 (0.2196) loss_giou_aux_1: 0.3412 (0.3272) loss_fgl_aux_1: 0.9893 (1.0258) loss_ddf_aux_1: 0.0033 (0.0036) loss_mal_pre: 0.6470 (0.7321) loss_bbox_pre: 0.2015 (0.2313) loss_giou_pre: 0.3696 (0.3478) loss_mal_enc_0: 0.8413 (0.8400) loss_bbox_enc_0: 0.2617 (0.3086) loss_giou_enc_0: 0.4640 (0.4593) loss_mal_dn_0: 0.5835 (0.6294) loss_bbox_dn_0: 0.2006 (0.2512) loss_giou_dn_0: 0.3600 (0.3773) loss_fgl_dn_0: 1.1325 (1.1426) loss_ddf_dn_0: 0.2502 (0.2373) loss_mal_dn_1: 0.4639 (0.5221) loss_bbox_dn_1: 0.1561 (0.2046) loss_giou_dn_1: 0.2814 (0.2988) loss_fgl_dn_1: 0.9738 (1.0178) loss_ddf_dn_1: 0.0074 (0.0079) loss_mal_dn_2: 0.4595 (0.5145) loss_bbox_dn_2: 0.1547 (0.2017) loss_giou_dn_2: 0.2787 (0.2947) loss_fgl_dn_2: 0.9716 (1.0139) loss_mal_dn_pre: 0.5806 (0.6287) loss_bbox_dn_pre: 0.1988 (0.2527) loss_giou_dn_pre: 0.3548 (0.3769)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.58it/s]\n", + "\u001b[32m2025-04-03 21:57:45.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 21:57:45.448\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 22/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.753\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.936\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.867\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.561\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.778\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.752\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.830\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.854\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.698\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.877\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.946\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 22: 100%|██████████| 403/403 [02:44<00:00, 2.45it/s, loss=13.3271]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000125 loss: 15.3005 (17.6047) loss_mal: 0.5840 (0.6581) loss_bbox: 0.1579 (0.2156) loss_giou: 0.2382 (0.3184) loss_fgl: 0.9761 (1.0170) loss_mal_aux_0: 0.6548 (0.7240) loss_bbox_aux_0: 0.2000 (0.2300) loss_giou_aux_0: 0.2617 (0.3428) loss_fgl_aux_0: 1.0290 (1.0698) loss_ddf_aux_0: 0.0954 (0.0867) loss_mal_aux_1: 0.5674 (0.6728) loss_bbox_aux_1: 0.1583 (0.2164) loss_giou_aux_1: 0.2421 (0.3197) loss_fgl_aux_1: 0.9732 (1.0187) loss_ddf_aux_1: 0.0041 (0.0036) loss_mal_pre: 0.6621 (0.7222) loss_bbox_pre: 0.1989 (0.2299) loss_giou_pre: 0.2676 (0.3418) loss_mal_enc_0: 0.8140 (0.8223) loss_bbox_enc_0: 0.2339 (0.2981) loss_giou_enc_0: 0.3836 (0.4388) loss_mal_dn_0: 0.5747 (0.6217) loss_bbox_dn_0: 0.2135 (0.2436) loss_giou_dn_0: 0.2814 (0.3667) loss_fgl_dn_0: 1.1162 (1.1347) loss_ddf_dn_0: 0.2533 (0.2431) loss_mal_dn_1: 0.4475 (0.5143) loss_bbox_dn_1: 0.1405 (0.1981) loss_giou_dn_1: 0.2058 (0.2909) loss_fgl_dn_1: 0.9721 (1.0104) loss_ddf_dn_1: 0.0079 (0.0074) loss_mal_dn_2: 0.4463 (0.5071) loss_bbox_dn_2: 0.1384 (0.1954) loss_giou_dn_2: 0.2031 (0.2869) loss_fgl_dn_2: 0.9651 (1.0064) loss_mal_dn_pre: 0.5737 (0.6209) loss_bbox_dn_pre: 0.2204 (0.2448) loss_giou_dn_pre: 0.2803 (0.3660)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 6.92it/s]\n", + "\u001b[32m2025-04-03 22:00:35.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 22:00:35.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 23/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.08s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.755\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.938\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.869\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.568\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.781\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.753\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.826\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.853\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.691\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.876\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.988\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.946\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 23: 100%|██████████| 403/403 [02:45<00:00, 2.43it/s, loss=18.9006]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000115 loss: 15.4950 (17.2954) loss_mal: 0.5771 (0.6394) loss_bbox: 0.1661 (0.2108) loss_giou: 0.2864 (0.3120) loss_fgl: 0.9641 (1.0105) loss_mal_aux_0: 0.6587 (0.7040) loss_bbox_aux_0: 0.2104 (0.2241) loss_giou_aux_0: 0.3072 (0.3356) loss_fgl_aux_0: 1.0514 (1.0651) loss_ddf_aux_0: 0.0917 (0.0865) loss_mal_aux_1: 0.5972 (0.6520) loss_bbox_aux_1: 0.1762 (0.2118) loss_giou_aux_1: 0.2892 (0.3137) loss_fgl_aux_1: 0.9736 (1.0128) loss_ddf_aux_1: 0.0036 (0.0039) loss_mal_pre: 0.6450 (0.7037) loss_bbox_pre: 0.2070 (0.2230) loss_giou_pre: 0.3055 (0.3343) loss_mal_enc_0: 0.7852 (0.8005) loss_bbox_enc_0: 0.2726 (0.2845) loss_giou_enc_0: 0.4349 (0.4246) loss_mal_dn_0: 0.5977 (0.6159) loss_bbox_dn_0: 0.2266 (0.2357) loss_giou_dn_0: 0.3033 (0.3573) loss_fgl_dn_0: 1.1055 (1.1292) loss_ddf_dn_0: 0.2253 (0.2349) loss_mal_dn_1: 0.4675 (0.5062) loss_bbox_dn_1: 0.1612 (0.1917) loss_giou_dn_1: 0.2399 (0.2834) loss_fgl_dn_1: 0.9780 (1.0039) loss_ddf_dn_1: 0.0071 (0.0079) loss_mal_dn_2: 0.4612 (0.4989) loss_bbox_dn_2: 0.1524 (0.1887) loss_giou_dn_2: 0.2387 (0.2792) loss_fgl_dn_2: 0.9750 (0.9998) loss_mal_dn_pre: 0.5938 (0.6153) loss_bbox_dn_pre: 0.2313 (0.2375) loss_giou_dn_pre: 0.3031 (0.3569)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 6.61it/s]\n", + "\u001b[32m2025-04-03 22:03:26.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 22:03:26.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 24/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.752\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.937\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.864\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.562\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.777\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.752\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.826\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.851\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.664\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.875\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.983\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.946\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 24: 100%|██████████| 403/403 [02:37<00:00, 2.56it/s, loss=15.2482]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000107 loss: 15.8278 (17.0718) loss_mal: 0.5669 (0.6287) loss_bbox: 0.1428 (0.2037) loss_giou: 0.2277 (0.3085) loss_fgl: 0.9997 (1.0002) loss_mal_aux_0: 0.6738 (0.6925) loss_bbox_aux_0: 0.1719 (0.2170) loss_giou_aux_0: 0.2584 (0.3308) loss_fgl_aux_0: 1.0436 (1.0515) loss_ddf_aux_0: 0.0775 (0.0834) loss_mal_aux_1: 0.5845 (0.6435) loss_bbox_aux_1: 0.1470 (0.2045) loss_giou_aux_1: 0.2347 (0.3097) loss_fgl_aux_1: 1.0003 (1.0022) loss_ddf_aux_1: 0.0031 (0.0038) loss_mal_pre: 0.6709 (0.6923) loss_bbox_pre: 0.1753 (0.2168) loss_giou_pre: 0.2599 (0.3297) loss_mal_enc_0: 0.7651 (0.7922) loss_bbox_enc_0: 0.2583 (0.2774) loss_giou_enc_0: 0.3557 (0.4185) loss_mal_dn_0: 0.5918 (0.6082) loss_bbox_dn_0: 0.1973 (0.2331) loss_giou_dn_0: 0.2981 (0.3533) loss_fgl_dn_0: 1.1224 (1.1220) loss_ddf_dn_0: 0.2304 (0.2345) loss_mal_dn_1: 0.4617 (0.5003) loss_bbox_dn_1: 0.1570 (0.1887) loss_giou_dn_1: 0.2270 (0.2808) loss_fgl_dn_1: 0.9893 (0.9949) loss_ddf_dn_1: 0.0069 (0.0076) loss_mal_dn_2: 0.4626 (0.4926) loss_bbox_dn_2: 0.1535 (0.1859) loss_giou_dn_2: 0.2246 (0.2769) loss_fgl_dn_2: 0.9935 (0.9909) loss_mal_dn_pre: 0.5898 (0.6075) loss_bbox_dn_pre: 0.2047 (0.2349) loss_giou_dn_pre: 0.2972 (0.3529)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.50it/s]\n", + "\u001b[32m2025-04-03 22:06:09.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 22:06:09.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 25/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.755\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.939\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.866\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.558\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.780\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.753\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.827\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.849\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.664\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.873\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.983\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.946\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 25: 100%|██████████| 403/403 [02:28<00:00, 2.71it/s, loss=12.6402]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000102 loss: 14.0983 (16.9975) loss_mal: 0.4976 (0.6265) loss_bbox: 0.1304 (0.2037) loss_giou: 0.2113 (0.3053) loss_fgl: 0.9336 (0.9998) loss_mal_aux_0: 0.5562 (0.6901) loss_bbox_aux_0: 0.1439 (0.2162) loss_giou_aux_0: 0.2265 (0.3264) loss_fgl_aux_0: 0.9912 (1.0482) loss_ddf_aux_0: 0.0758 (0.0789) loss_mal_aux_1: 0.4983 (0.6391) loss_bbox_aux_1: 0.1308 (0.2045) loss_giou_aux_1: 0.2137 (0.3064) loss_fgl_aux_1: 0.9360 (1.0017) loss_ddf_aux_1: 0.0031 (0.0033) loss_mal_pre: 0.5547 (0.6894) loss_bbox_pre: 0.1424 (0.2160) loss_giou_pre: 0.2267 (0.3254) loss_mal_enc_0: 0.7144 (0.7895) loss_bbox_enc_0: 0.1816 (0.2741) loss_giou_enc_0: 0.3255 (0.4138) loss_mal_dn_0: 0.5479 (0.6070) loss_bbox_dn_0: 0.1669 (0.2331) loss_giou_dn_0: 0.2627 (0.3503) loss_fgl_dn_0: 1.0858 (1.1176) loss_ddf_dn_0: 0.2313 (0.2327) loss_mal_dn_1: 0.4246 (0.4978) loss_bbox_dn_1: 0.1198 (0.1900) loss_giou_dn_1: 0.1872 (0.2793) loss_fgl_dn_1: 0.9241 (0.9917) loss_ddf_dn_1: 0.0069 (0.0073) loss_mal_dn_2: 0.4172 (0.4905) loss_bbox_dn_2: 0.1178 (0.1873) loss_giou_dn_2: 0.1860 (0.2755) loss_fgl_dn_2: 0.9183 (0.9877) loss_mal_dn_pre: 0.5483 (0.6064) loss_bbox_dn_pre: 0.1665 (0.2348) loss_giou_dn_pre: 0.2616 (0.3500)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.40it/s]\n", + "\u001b[32m2025-04-03 22:08:43.054\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 15, 'coco_eval_bbox': 0.7564426878028832}\u001b[0m\n", + "\u001b[32m2025-04-03 22:08:43.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 26/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.756\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.939\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.871\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.563\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.782\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.754\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.825\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.847\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.664\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.870\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.983\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.944\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 26: 100%|██████████| 403/403 [02:30<00:00, 2.68it/s, loss=14.8032]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000100 loss: 18.7728 (16.8171) loss_mal: 0.7183 (0.6201) loss_bbox: 0.2123 (0.1957) loss_giou: 0.3541 (0.3014) loss_fgl: 1.0298 (0.9948) loss_mal_aux_0: 0.7910 (0.6776) loss_bbox_aux_0: 0.2346 (0.2073) loss_giou_aux_0: 0.3762 (0.3223) loss_fgl_aux_0: 1.0804 (1.0450) loss_ddf_aux_0: 0.0826 (0.0800) loss_mal_aux_1: 0.7422 (0.6333) loss_bbox_aux_1: 0.2165 (0.1965) loss_giou_aux_1: 0.3545 (0.3028) loss_fgl_aux_1: 1.0304 (0.9970) loss_ddf_aux_1: 0.0039 (0.0035) loss_mal_pre: 0.7905 (0.6778) loss_bbox_pre: 0.2322 (0.2072) loss_giou_pre: 0.3775 (0.3215) loss_mal_enc_0: 0.8286 (0.7789) loss_bbox_enc_0: 0.2908 (0.2659) loss_giou_enc_0: 0.4605 (0.4109) loss_mal_dn_0: 0.6646 (0.6029) loss_bbox_dn_0: 0.2548 (0.2275) loss_giou_dn_0: 0.4193 (0.3466) loss_fgl_dn_0: 1.1236 (1.1164) loss_ddf_dn_0: 0.2232 (0.2355) loss_mal_dn_1: 0.5469 (0.4930) loss_bbox_dn_1: 0.2211 (0.1844) loss_giou_dn_1: 0.3367 (0.2752) loss_fgl_dn_1: 1.0245 (0.9881) loss_ddf_dn_1: 0.0085 (0.0075) loss_mal_dn_2: 0.5444 (0.4859) loss_bbox_dn_2: 0.2221 (0.1818) loss_giou_dn_2: 0.3290 (0.2712) loss_fgl_dn_2: 1.0243 (0.9838) loss_mal_dn_pre: 0.6636 (0.6024) loss_bbox_dn_pre: 0.2575 (0.2288) loss_giou_dn_pre: 0.4182 (0.3462)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.43it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.06s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.758\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.939\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.872\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.568\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.783\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.755\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.827\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.848\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.664\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.872\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.983\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.944\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 22:11:18.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 22:11:18.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 26 / mAP: 0.7580810398692926\u001b[0m\n", + "\u001b[32m2025-04-03 22:11:18.513\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 26, 'coco_eval_bbox': 0.7580810398692926}\u001b[0m\n", + "\u001b[32m2025-04-03 22:11:18.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 27/30\u001b[0m\n", + "Epoch 27: 100%|██████████| 403/403 [02:31<00:00, 2.67it/s, loss=15.8156]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000100 loss: 18.5853 (16.9391) loss_mal: 0.7373 (0.6309) loss_bbox: 0.1907 (0.2015) loss_giou: 0.3578 (0.3068) loss_fgl: 1.0085 (0.9986) loss_mal_aux_0: 0.7598 (0.6869) loss_bbox_aux_0: 0.1908 (0.2129) loss_giou_aux_0: 0.3862 (0.3273) loss_fgl_aux_0: 1.0392 (1.0451) loss_ddf_aux_0: 0.0691 (0.0782) loss_mal_aux_1: 0.7588 (0.6445) loss_bbox_aux_1: 0.1888 (0.2025) loss_giou_aux_1: 0.3602 (0.3082) loss_fgl_aux_1: 1.0063 (1.0007) loss_ddf_aux_1: 0.0033 (0.0038) loss_mal_pre: 0.7471 (0.6864) loss_bbox_pre: 0.1935 (0.2127) loss_giou_pre: 0.3794 (0.3265) loss_mal_enc_0: 0.7998 (0.7767) loss_bbox_enc_0: 0.2983 (0.2668) loss_giou_enc_0: 0.4626 (0.4095) loss_mal_dn_0: 0.6548 (0.6052) loss_bbox_dn_0: 0.2384 (0.2298) loss_giou_dn_0: 0.4170 (0.3494) loss_fgl_dn_0: 1.1262 (1.1150) loss_ddf_dn_0: 0.2239 (0.2335) loss_mal_dn_1: 0.5566 (0.4975) loss_bbox_dn_1: 0.1921 (0.1864) loss_giou_dn_1: 0.3427 (0.2787) loss_fgl_dn_1: 1.0288 (0.9899) loss_ddf_dn_1: 0.0073 (0.0078) loss_mal_dn_2: 0.5459 (0.4905) loss_bbox_dn_2: 0.1905 (0.1838) loss_giou_dn_2: 0.3364 (0.2746) loss_fgl_dn_2: 1.0277 (0.9858) loss_mal_dn_pre: 0.6548 (0.6046) loss_bbox_dn_pre: 0.2427 (0.2314) loss_giou_dn_pre: 0.4149 (0.3489)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:04<00:00, 7.26it/s]\n", + "\u001b[32m2025-04-03 22:13:54.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 26, 'coco_eval_bbox': 0.7580810398692926}\u001b[0m\n", + "\u001b[32m2025-04-03 22:13:54.596\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 28/30\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.08s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.758\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.938\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.871\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.567\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.783\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.755\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.826\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.847\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.663\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.871\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.944\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Epoch 28: 100%|██████████| 403/403 [02:29<00:00, 2.69it/s, loss=17.8688]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000100 loss: 17.8688 (16.7956) loss_mal: 0.7007 (0.6240) loss_bbox: 0.2133 (0.1999) loss_giou: 0.3395 (0.3005) loss_fgl: 0.9896 (0.9952) loss_mal_aux_0: 0.7612 (0.6772) loss_bbox_aux_0: 0.2178 (0.2117) loss_giou_aux_0: 0.3528 (0.3200) loss_fgl_aux_0: 1.0348 (1.0418) loss_ddf_aux_0: 0.0771 (0.0751) loss_mal_aux_1: 0.7109 (0.6367) loss_bbox_aux_1: 0.2150 (0.2007) loss_giou_aux_1: 0.3411 (0.3017) loss_fgl_aux_1: 0.9957 (0.9971) loss_ddf_aux_1: 0.0034 (0.0036) loss_mal_pre: 0.7578 (0.6773) loss_bbox_pre: 0.2143 (0.2115) loss_giou_pre: 0.3516 (0.3191) loss_mal_enc_0: 0.8091 (0.7760) loss_bbox_enc_0: 0.2516 (0.2659) loss_giou_enc_0: 0.4302 (0.4007) loss_mal_dn_0: 0.6284 (0.6016) loss_bbox_dn_0: 0.2574 (0.2282) loss_giou_dn_0: 0.3779 (0.3434) loss_fgl_dn_0: 1.1205 (1.1121) loss_ddf_dn_0: 0.2222 (0.2298) loss_mal_dn_1: 0.5493 (0.4934) loss_bbox_dn_1: 0.2188 (0.1854) loss_giou_dn_1: 0.3137 (0.2743) loss_fgl_dn_1: 1.0091 (0.9871) loss_ddf_dn_1: 0.0085 (0.0073) loss_mal_dn_2: 0.5405 (0.4862) loss_bbox_dn_2: 0.2111 (0.1828) loss_giou_dn_2: 0.3094 (0.2705) loss_fgl_dn_2: 1.0063 (0.9833) loss_mal_dn_pre: 0.6299 (0.6011) loss_bbox_dn_pre: 0.2571 (0.2301) loss_giou_dn_pre: 0.3792 (0.3431)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 7.19it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.760\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.939\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.872\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.567\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.785\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.757\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.828\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.850\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.674\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.874\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.951\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-03 22:16:29.835\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36m_save_checkpoint\u001b[0m:\u001b[36m613\u001b[0m - \u001b[1mCheckpoint saved to outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing/best.pth\u001b[0m\n", + "\u001b[32m2025-04-03 22:16:29.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m505\u001b[0m - \u001b[1m🏆 NEW BEST MODEL! Epoch 28 / mAP: 0.7595313026974617\u001b[0m\n", + "\u001b[32m2025-04-03 22:16:29.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 28, 'coco_eval_bbox': 0.7595313026974617}\u001b[0m\n", + "\u001b[32m2025-04-03 22:16:29.840\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m422\u001b[0m - \u001b[1mEpoch 29/30\u001b[0m\n", + "Epoch 29: 100%|██████████| 403/403 [02:43<00:00, 2.47it/s, loss=16.3756]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: lr: 0.000100 loss: 17.9363 (16.6991) loss_mal: 0.7114 (0.6088) loss_bbox: 0.1932 (0.1989) loss_giou: 0.3335 (0.2993) loss_fgl: 1.0021 (0.9955) loss_mal_aux_0: 0.7686 (0.6703) loss_bbox_aux_0: 0.1977 (0.2107) loss_giou_aux_0: 0.3601 (0.3190) loss_fgl_aux_0: 1.0700 (1.0406) loss_ddf_aux_0: 0.0617 (0.0721) loss_mal_aux_1: 0.7119 (0.6253) loss_bbox_aux_1: 0.1925 (0.1998) loss_giou_aux_1: 0.3370 (0.3007) loss_fgl_aux_1: 1.0055 (0.9974) loss_ddf_aux_1: 0.0035 (0.0036) loss_mal_pre: 0.7676 (0.6700) loss_bbox_pre: 0.1955 (0.2103) loss_giou_pre: 0.3634 (0.3180) loss_mal_enc_0: 0.8315 (0.7700) loss_bbox_enc_0: 0.2532 (0.2628) loss_giou_enc_0: 0.4028 (0.3990) loss_mal_dn_0: 0.6177 (0.5992) loss_bbox_dn_0: 0.2262 (0.2257) loss_giou_dn_0: 0.3957 (0.3416) loss_fgl_dn_0: 1.1261 (1.1122) loss_ddf_dn_0: 0.2142 (0.2252) loss_mal_dn_1: 0.5283 (0.4898) loss_bbox_dn_1: 0.1945 (0.1828) loss_giou_dn_1: 0.3228 (0.2721) loss_fgl_dn_1: 1.0103 (0.9879) loss_ddf_dn_1: 0.0067 (0.0071) loss_mal_dn_2: 0.5278 (0.4830) loss_bbox_dn_2: 0.1927 (0.1803) loss_giou_dn_2: 0.3187 (0.2686) loss_fgl_dn_2: 1.0059 (0.9840) loss_mal_dn_pre: 0.6167 (0.5986) loss_bbox_dn_pre: 0.2250 (0.2274) loss_giou_dn_pre: 0.3939 (0.3414)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 100%|██████████| 36/36 [00:05<00:00, 7.04it/s]\n", + "\u001b[32m2025-04-03 22:19:18.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m544\u001b[0m - \u001b[1m✅ Current best stats: {'epoch': 28, 'coco_eval_bbox': 0.7595313026974617}\u001b[0m\n", + "\u001b[32m2025-04-03 22:19:18.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m555\u001b[0m - \u001b[1mTraining completed in 1:17:20\u001b[0m\n", + "\u001b[32m2025-04-03 22:19:18.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdeimkit.trainer\u001b[0m:\u001b[36mfit\u001b[0m:\u001b[36m556\u001b[0m - \u001b[1mBest stats: {'epoch': 28, 'coco_eval_bbox': 0.7595313026974617}\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Averaged stats: \n", + "Accumulating evaluation results...\n", + "COCOeval_opt.accumulate() finished...\n", + "DONE (t=0.07s).\n", + "IoU metric: bbox\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.758\n", + " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.938\n", + " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.870\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.316\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.567\n", + " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.784\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.756\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.826\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.849\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.350\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.674\n", + " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.873\n", + " Average Recall (AR) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.985\n", + " Average Recall (AR) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.948\n" + ] + } + ], + "source": [ + "from deimkit import Config, Trainer, configure_dataset, configure_model\n", + "\n", + "conf = Config.from_model_name(\"deim_hgnetv2_s\")\n", + "\n", + "conf = configure_model(conf, num_queries=100, freeze_at=0, pretrained=True)\n", + "\n", + "conf = configure_dataset(\n", + " config=conf,\n", + " image_size=(640, 640),\n", + " train_ann_file=\"/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/train/_annotations.coco.json\",\n", + " train_img_folder=\"/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/train\",\n", + " val_ann_file=\"/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/valid/_annotations.coco.json\",\n", + " val_img_folder=\"/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/valid\",\n", + " train_batch_size=16,\n", + " val_batch_size=16,\n", + " num_classes=4,\n", + " remap_mscoco=False,\n", + " output_dir=\"./outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_progressive_resizing\",\n", + ")\n", + "\n", + "trainer = Trainer(conf)\n", + "\n", + "trainer.load_checkpoint(\"./outputs/rock-paper-scissors/deim_hgnetv2_s_10ep_320px/best.pth\")\n", + "trainer.fit(epochs=30, save_best_only=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cuda", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts/live_inference.py b/scripts/live_inference.py index 3070b4f6..3b911bb2 100644 --- a/scripts/live_inference.py +++ b/scripts/live_inference.py @@ -1,5 +1,6 @@ import colorsys import time +from typing import Optional import cv2 import numpy as np @@ -19,9 +20,26 @@ def generate_colors(num_classes): def draw_boxes( - image, labels, boxes, scores, ratio, padding, threshold=0.3, class_names=None -): - """Draw bounding boxes on the image.""" + image: np.ndarray, + labels: np.ndarray, + boxes: np.ndarray, + scores: np.ndarray, + threshold: float = 0.3, + class_names: Optional[list[str]] = None, +) -> np.ndarray: + """Draw bounding boxes on the image with detection results. + + Args: + image: Input image array in BGR format + labels: Array of class labels + boxes: Array of bounding box coordinates [x1, y1, x2, y2] + scores: Array of confidence scores + threshold: Minimum confidence threshold for displaying detections + class_names: Optional list of class names for labels + + Returns: + np.ndarray: Image with drawn bounding boxes in BGR format + """ # Generate colors for classes num_classes = len(class_names) if class_names else 91 colors = generate_colors(num_classes) @@ -92,10 +110,20 @@ def draw_boxes( return image -def run_inference( - model_path, image_path, class_names_path=None, threshold=0.3, provider="cpu" -): - # Set up providers based on selection +def load_model(model_path: str, provider: str = "cpu") -> ort.InferenceSession: + """Initialize and load the ONNX model with specified provider. + + Args: + model_path: Path to the ONNX model file + provider: Provider to use for inference ("cpu", "cuda", or "tensorrt") + + Returns: + ort.InferenceSession: Initialized ONNX Runtime session + + Raises: + RuntimeError: If model loading fails with specified provider + """ + if provider == "cpu": providers = ["CPUExecutionProvider"] elif provider == "cuda": @@ -116,7 +144,7 @@ def run_inference( ( "TensorrtExecutionProvider", { - "trt_fp16_enable": True, + "trt_fp16_enable": False, "trt_engine_cache_enable": True, "trt_engine_cache_path": "./trt_cache", "trt_timing_cache_enable": True, @@ -124,17 +152,41 @@ def run_inference( ), "CPUExecutionProvider", ] + else: + raise ValueError(f"Unsupported provider: {provider}") try: print(f"Loading ONNX model with providers: {providers}...") session = ort.InferenceSession(model_path, providers=providers) print(f"Using provider: {session.get_providers()[0]}") + return session except Exception as e: print(f"Error creating inference session with providers {providers}: {e}") print("Attempting to fall back to CPU execution...") session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) + return session + - # Load class names if provided +def run_inference( + model_path: str, + input_source: str | int, # Can be image path, video path, or camera index + class_names_path: str | None = None, + threshold: float = 0.3, + provider: str = "cpu", + inference_size: int = 640, # renamed from video_width +) -> None: + """Run object detection on images or video streams. + + Args: + model_path: Path to the ONNX model file + input_source: Path to image/video file or camera index (usually 0 for webcam) + class_names_path: Optional path to class names file + threshold: Detection confidence threshold + provider: ONNXRuntime provider ("cpu", "cuda", "tensorrt") + inference_size: Size for inference processing (default: 640). Larger dimension will be scaled to this size while preserving aspect ratio + """ + # Load model and class names + session = load_model(model_path, provider) class_names = None if class_names_path: try: @@ -144,25 +196,140 @@ def run_inference( except Exception as e: print(f"Error loading class names: {e}") - # Load image - image = cv2.imread(image_path) # Load as BGR - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert BGR to RGB - original_image = image.copy() + # Determine if input is image file or video source + if isinstance(input_source, str) and any( + input_source.lower().endswith(ext) + for ext in ['.jpg', '.jpeg', '.png', '.bmp'] + ): + # Handle single image + image = cv2.imread(input_source) + if image is None: + raise RuntimeError(f"Failed to load image: {input_source}") + + result = process_frame( + image, + session, + class_names, + threshold, + inference_size + ) + + # Save and display result + output_path = "detection_result.jpg" + cv2.imwrite(output_path, result) + print(f"Detection complete. Result saved to {output_path}") + + cv2.imshow("Detection Result", result) + cv2.waitKey(0) + cv2.destroyAllWindows() + + else: + # Handle video/webcam + cap = cv2.VideoCapture(input_source) + if not cap.isOpened(): + raise RuntimeError(f"Failed to open video source: {input_source}") + + # Configure video capture + if isinstance(input_source, int): # Webcam settings + cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc("M", "J", "P", "G")) + cap.set(cv2.CAP_PROP_FPS, 100) + cap.set(cv2.CAP_PROP_FRAME_WIDTH, inference_size) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, int(inference_size * 9 / 16)) + + prev_time = time.time() + fps_display = 0 + show_overlay = True + try: + while True: + ret, frame = cap.read() + if not ret: + break + + # Calculate FPS + current_time = time.time() + fps_display = 1 / (current_time - prev_time) if current_time - prev_time > 0 else 0 + prev_time = current_time + + # Process frame + result = process_frame( + frame, + session, + class_names, + threshold, + inference_size + ) + + # Add overlay + result = draw_text_overlay( + result, + fps_display, + session.get_providers()[0], + inference_size, + show_overlay + ) + + # Display result + cv2.imshow("Detection", result) + + # Handle key presses + key = cv2.waitKey(1) & 0xFF + if key == ord("q"): + break + elif key == ord("t"): + show_overlay = not show_overlay + + finally: + cap.release() + cv2.destroyAllWindows() + + +def process_frame( + frame: np.ndarray, + session: ort.InferenceSession, + class_names: list[str] | None, + threshold: float, + target_width: int, +) -> np.ndarray: + """Process a single frame through the object detection model. + + Args: + frame: Input frame in BGR format + session: ONNX Runtime inference session + class_names: Optional list of class names + threshold: Detection confidence threshold + target_width: Target width for processing + + Returns: + np.ndarray: Processed frame with detections + """ + # Calculate scaling and padding + height, width = frame.shape[:2] + scale = target_width / max(height, width) + new_height = int(height * scale) + new_width = int(width * scale) + + # Calculate padding + y_offset = (target_width - new_height) // 2 + x_offset = (target_width - new_width) // 2 + + # Create model input with padding + model_input = np.zeros((target_width, target_width, 3), dtype=np.uint8) + model_input[ + y_offset : y_offset + new_height, + x_offset : x_offset + new_width + ] = cv2.resize(frame, (new_width, new_height)) + + # Prepare input data im_data = np.ascontiguousarray( - image.transpose(2, 0, 1), # HWC to CHW format + model_input.transpose(2, 0, 1), dtype=np.float32, ) - im_data = np.expand_dims(im_data, axis=0) # Add batch dimension - orig_size = np.array([[image.shape[0], image.shape[1]]], dtype=np.int64) - - print(f"Image frame shape: {image.shape}") - print(f"Processed input shape: {im_data.shape}") - - # Get input name from model metadata - input_name = session.get_inputs()[0].name + im_data = np.expand_dims(im_data, axis=0) + orig_size = np.array([[target_width, target_width]], dtype=np.int64) # Run inference + input_name = session.get_inputs()[0].name outputs = session.run( output_names=None, input_feed={input_name: im_data, "orig_target_sizes": orig_size}, @@ -170,528 +337,122 @@ def run_inference( # Process outputs labels, boxes, scores = outputs - - # print(outputs) - - # Draw bounding boxes on the image - result_image = draw_boxes( - original_image, + + # Scale boxes back to original frame size + boxes = boxes[0] # Remove batch dimension + boxes[:, [0, 2]] = (boxes[:, [0, 2]] - x_offset) / scale # x coordinates + boxes[:, [1, 3]] = (boxes[:, [1, 3]] - y_offset) / scale # y coordinates + + # Draw detections + return draw_boxes( + frame, labels[0], - boxes[0], + boxes, scores[0], - 1.0, # No ratio needed since we're not resizing - (0, 0), # No padding needed threshold=threshold, class_names=class_names, ) - # Save and show result - output_path = "detection_result.jpg" - result_bgr = cv2.cvtColor( - result_image, cv2.COLOR_RGB2BGR - ) # Convert back to BGR for OpenCV - cv2.imwrite(output_path, result_bgr) - print(f"Detection complete. Result saved to {output_path}") - - # Display the result - cv2.imshow("Detection Result", result_bgr) - cv2.waitKey(0) - cv2.destroyAllWindows() - - return result_image - - -def run_inference_webcam( - model_path, class_names_path=None, provider="cpu", threshold=0.3, video_width=640 -): - """Run real-time object detection on webcam feed.""" - # Set up providers based on selection - if provider == "cpu": - providers = ["CPUExecutionProvider"] - elif provider == "cuda": - providers = [ - ( - "CUDAExecutionProvider", - { - "arena_extend_strategy": "kNextPowerOfTwo", - "gpu_mem_limit": 2 * 1024 * 1024 * 1024, - "cudnn_conv_algo_search": "EXHAUSTIVE", - "do_copy_in_default_stream": True, - }, - ), - "CPUExecutionProvider", - ] - elif provider == "tensorrt": - providers = [ - ( - "TensorrtExecutionProvider", - { - "trt_fp16_enable": False, - "trt_engine_cache_enable": True, - "trt_engine_cache_path": "./trt_cache", - "trt_timing_cache_enable": True, - }, - ), - "CPUExecutionProvider", - ] - - try: - print(f"Loading ONNX model with providers: {providers}...") - session = ort.InferenceSession(model_path, providers=providers) - print(f"Using provider: {session.get_providers()[0]}") - except Exception as e: - print(f"Error creating inference session with providers {providers}: {e}") - print("Attempting to fall back to CPU execution...") - session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) - - # Update FPS calculation variables - prev_time = time.time() - fps_display = 0 - - # Load class names if provided - class_names = None - if class_names_path: - try: - with open(class_names_path, "r") as f: - class_names = [line.strip() for line in f.readlines()] - print(f"Loaded {len(class_names)} class names") - except Exception as e: - print(f"Error loading class names: {e}") - # Initialize webcam - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - raise RuntimeError("Failed to open webcam") - - # Set camera to maximum possible FPS - cap.set( - cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc("M", "J", "P", "G") - ) # Use MJPG format for higher FPS - cap.set( - cv2.CAP_PROP_FPS, 1000 - ) # Request very high FPS - will default to max supported - cap.set(cv2.CAP_PROP_FRAME_WIDTH, video_width) - cap.set(cv2.CAP_PROP_FRAME_HEIGHT, int(video_width * 9 / 16)) # 16:9 aspect ratio - - # Print actual camera properties - actual_fps = cap.get(cv2.CAP_PROP_FPS) - actual_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) - actual_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) - print( - f"Camera settings - FPS: {actual_fps}, Resolution: {actual_width}x{actual_height}" +def draw_text_overlay( + image: np.ndarray, + fps: float, + provider: str, + video_width: int, + show_overlay: bool = True, +) -> np.ndarray: + """Draw text overlays (FPS, width, provider) on the detection frame. + + Args: + image: Input image array in BGR format + fps: Current FPS value to display + provider: Provider name being used + video_width: Current video width in pixels + show_overlay: Whether to show the text overlay + + Returns: + np.ndarray: Image with text overlays + """ + if not show_overlay: + return image + + # Add video width display at top left with dark green background + width_text = f"Width: {int(video_width)}px" + text_size = cv2.getTextSize(width_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0] + + # Draw dark green background rectangle + cv2.rectangle( + image, + (5, 5), # Slight padding from corner + (text_size[0] + 15, 35), # Add padding around text + (0, 100, 0), # Dark green in BGR + -1, # Filled rectangle ) - try: - while True: - ret, frame = cap.read() - if not ret: - print("Failed to grab frame") - break - - # Calculate FPS - current_time = time.time() - if current_time - prev_time > 0: # Avoid division by zero - fps_display = 1 / (current_time - prev_time) - prev_time = current_time - - # Calculate scaling and padding - height, width = frame.shape[:2] - scale = 640.0 / max(height, width) - new_height = int(height * scale) - new_width = int(width * scale) - - # Calculate padding - y_offset = (640 - new_height) // 2 - x_offset = (640 - new_width) // 2 - - # Create model input with padding - model_input = np.zeros((640, 640, 3), dtype=np.uint8) - model_input[ - y_offset : y_offset + new_height, x_offset : x_offset + new_width - ] = cv2.resize(frame, (new_width, new_height)) - - # Convert BGR to RGB for model input - image = cv2.cvtColor(model_input, cv2.COLOR_BGR2RGB) - - # Prepare input data - im_data = np.ascontiguousarray( - image.transpose(2, 0, 1), - dtype=np.float32, - ) - im_data = np.expand_dims(im_data, axis=0) - orig_size = np.array([[640, 640]], dtype=np.int64) # Use padded size - - # Get input name and run inference - input_name = session.get_inputs()[0].name - outputs = session.run( - output_names=None, - input_feed={input_name: im_data, "orig_target_sizes": orig_size}, - ) - - # Process outputs - labels, boxes, scores = outputs - - # Scale boxes from padded 640x640 to original frame size - boxes = boxes[0] # Remove batch dimension - boxes[:, [0, 2]] = (boxes[:, [0, 2]] - x_offset) / scale # x coordinates - boxes[:, [1, 3]] = (boxes[:, [1, 3]] - y_offset) / scale # y coordinates - - # Draw bounding boxes on the original frame - result_image = draw_boxes( - frame, # Use original frame - labels[0], - boxes, - scores[0], - 1.0, # No additional scaling needed - (0, 0), # No additional padding needed - threshold=threshold, - class_names=class_names, - ) - - # No need to convert back to BGR since we're using the original frame - result_bgr = result_image - - # Add video width display at top left with dark green background - width_text = f"Width: {int(actual_width)}px" - text_size = cv2.getTextSize(width_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0] - - # Draw dark green background rectangle - cv2.rectangle( - result_bgr, - (5, 5), # Slight padding from corner - (text_size[0] + 15, 35), # Add padding around text - (0, 100, 0), # Dark green in BGR - -1, # Filled rectangle - ) - - # Draw text - cv2.putText( - result_bgr, - width_text, - (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, - 0.8, - (255, 255, 255), # White text - 2, - ) - - # Add FPS display (existing code) - fps_text = f"FPS: {fps_display:.1f}" - text_size = cv2.getTextSize(fps_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0] - text_x = result_bgr.shape[1] - text_size[0] - 10 - text_y = 30 - - # Draw FPS background rectangle - cv2.rectangle( - result_bgr, - (text_x - 5, text_y - text_size[1] - 5), - (text_x + text_size[0] + 5, text_y + 5), - (139, 0, 0), - -1, - ) - - # Draw FPS text - cv2.putText( - result_bgr, - fps_text, - (text_x, text_y), - cv2.FONT_HERSHEY_SIMPLEX, - 0.8, - (255, 255, 255), - 2, - ) - - # Add provider display - provider_text = f"Provider: {session.get_providers()[0]}" - text_size = cv2.getTextSize( - provider_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2 - )[0] - text_x = (result_bgr.shape[1] - text_size[0]) // 2 - text_y = result_bgr.shape[0] - 20 - - # Draw provider background rectangle - cv2.rectangle( - result_bgr, - (text_x - 5, text_y - text_size[1] - 5), - (text_x + text_size[0] + 5, text_y + 5), - (0, 0, 139), - -1, - ) - - # Draw provider text - cv2.putText( - result_bgr, - provider_text, - (text_x, text_y), - cv2.FONT_HERSHEY_SIMPLEX, - 0.8, - (255, 255, 255), - 2, - ) - - # Display the result - cv2.imshow("Webcam Detection", result_bgr) - - # Handle key presses - key = cv2.waitKey(1) & 0xFF - if key == ord("q"): - break - - finally: - cap.release() - cv2.destroyAllWindows() - - -def run_inference_video( - model_path, - video_path, - class_names_path=None, - provider="cpu", - threshold=0.3, - video_width=640, -): - """Run object detection on a video file.""" - # Set up providers (same as webcam function) - if provider == "cpu": - providers = ["CPUExecutionProvider"] - elif provider == "cuda": - providers = [ - ( - "CUDAExecutionProvider", - { - "arena_extend_strategy": "kNextPowerOfTwo", - "gpu_mem_limit": 2 * 1024 * 1024 * 1024, - "cudnn_conv_algo_search": "EXHAUSTIVE", - "do_copy_in_default_stream": True, - }, - ), - "CPUExecutionProvider", - ] - elif provider == "tensorrt": - providers = [ - ( - "TensorrtExecutionProvider", - { - "trt_fp16_enable": False, - "trt_engine_cache_enable": True, - "trt_engine_cache_path": "./trt_cache", - "trt_timing_cache_enable": True, - }, - ), - "CPUExecutionProvider", - ] - - # Initialize model session - try: - print(f"Loading ONNX model with providers: {providers}...") - session = ort.InferenceSession(model_path, providers=providers) - print(f"Using provider: {session.get_providers()[0]}") - except Exception as e: - print(f"Error creating inference session with providers {providers}: {e}") - print("Attempting to fall back to CPU execution...") - session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) - - # Load class names - class_names = None - if class_names_path: - try: - with open(class_names_path, "r") as f: - class_names = [line.strip() for line in f.readlines()] - print(f"Loaded {len(class_names)} class names") - except Exception as e: - print(f"Error loading class names: {e}") - - # Open video file - cap = cv2.VideoCapture(video_path) - if not cap.isOpened(): - raise RuntimeError(f"Failed to open video file: {video_path}") + # Draw width text + cv2.putText( + image, + width_text, + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (255, 255, 255), # White text + 2, + ) - # Get video properties - fps = int(cap.get(cv2.CAP_PROP_FPS)) - frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + # Add FPS display + fps_text = f"FPS: {fps:.1f}" + text_size = cv2.getTextSize(fps_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0] + text_x = image.shape[1] - text_size[0] - 10 + text_y = 30 + + # Draw FPS background rectangle + cv2.rectangle( + image, + (text_x - 5, text_y - text_size[1] - 5), + (text_x + text_size[0] + 5, text_y + 5), + (139, 0, 0), + -1, + ) - # Calculate output dimensions based on video_width - scale = video_width / frame_width - output_width = video_width - output_height = int(frame_height * scale) + # Draw FPS text + cv2.putText( + image, + fps_text, + (text_x, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (255, 255, 255), + 2, + ) - # Create video writer with new dimensions - output_path = "detection_output.mp4" - fourcc = cv2.VideoWriter_fourcc(*"mp4v") - out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, output_height)) + # Add provider display + provider_text = f"Provider: {provider}" + text_size = cv2.getTextSize(provider_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0] + text_x = (image.shape[1] - text_size[0]) // 2 + text_y = image.shape[0] - 20 + + # Draw provider background rectangle + cv2.rectangle( + image, + (text_x - 5, text_y - text_size[1] - 5), + (text_x + text_size[0] + 5, text_y + 5), + (0, 0, 139), + -1, + ) - # Initialize FPS calculation - prev_time = time.time() - fps_display = 0 - frame_count = 0 + # Draw provider text + cv2.putText( + image, + provider_text, + (text_x, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (255, 255, 255), + 2, + ) - try: - while cap.isOpened(): - ret, frame = cap.read() - if not ret: - break - - frame_count += 1 - if frame_count % 10 == 0: - progress = (frame_count / total_frames) * 100 - print(f"Processing: {progress:.1f}% complete", end="\r") - - # Calculate FPS - current_time = time.time() - if current_time - prev_time > 0: - fps_display = 1 / (current_time - prev_time) - prev_time = current_time - - # Calculate scaling and padding using video_width parameter - height, width = frame.shape[:2] - scale = video_width / max(height, width) - new_height = int(height * scale) - new_width = int(width * scale) - - # Calculate padding - y_offset = (video_width - new_height) // 2 - x_offset = (video_width - new_width) // 2 - - # Create model input with padding using video_width - model_input = np.zeros((video_width, video_width, 3), dtype=np.uint8) - model_input[ - y_offset : y_offset + new_height, x_offset : x_offset + new_width - ] = cv2.resize(frame, (new_width, new_height)) - - # Convert BGR to RGB for model input - image = cv2.cvtColor(model_input, cv2.COLOR_BGR2RGB) - - # Prepare input data - im_data = np.ascontiguousarray( - image.transpose(2, 0, 1), - dtype=np.float32, - ) - im_data = np.expand_dims(im_data, axis=0) - orig_size = np.array( - [[video_width, video_width]], dtype=np.int64 - ) # Use padded size - - # Run inference - input_name = session.get_inputs()[0].name - outputs = session.run( - output_names=None, - input_feed={input_name: im_data, "orig_target_sizes": orig_size}, - ) - - # Process outputs - labels, boxes, scores = outputs - - # Scale boxes from padded 640x640 to original frame size - boxes = boxes[0] # Remove batch dimension - boxes[:, [0, 2]] = (boxes[:, [0, 2]] - x_offset) / scale # x coordinates - boxes[:, [1, 3]] = (boxes[:, [1, 3]] - y_offset) / scale # y coordinates - - # Draw bounding boxes on the original frame - result_image = draw_boxes( - frame, # Use original frame - labels[0], - boxes, - scores[0], - 1.0, # No additional scaling needed - (0, 0), # No additional padding needed - threshold=threshold, - class_names=class_names, - ) - - # Before writing the frame, resize it - result_image = cv2.resize(result_image, (output_width, output_height)) - out.write(result_image) - - # Add video width display at top left with dark green background - width_text = f"Width: {output_width}px" - text_size = cv2.getTextSize(width_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0] - - # Draw dark green background rectangle - cv2.rectangle( - result_image, - (5, 5), # Slight padding from corner - (text_size[0] + 15, 35), # Add padding around text - (0, 100, 0), # Dark green in BGR - -1, # Filled rectangle - ) - - # Draw text - cv2.putText( - result_image, - width_text, - (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, - 0.8, - (255, 255, 255), # White text - 2, - ) - - # Add FPS counter and provider info (existing code) - fps_text = f"FPS: {fps_display:.1f}" - text_size = cv2.getTextSize(fps_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)[0] - text_x = result_image.shape[1] - text_size[0] - 10 - text_y = 30 - - # Draw FPS background rectangle - cv2.rectangle( - result_image, - (text_x - 5, text_y - text_size[1] - 5), - (text_x + text_size[0] + 5, text_y + 5), - (139, 0, 0), - -1, - ) - - # Draw FPS text - cv2.putText( - result_image, - fps_text, - (text_x, text_y), - cv2.FONT_HERSHEY_SIMPLEX, - 0.8, - (255, 255, 255), - 2, - ) - - # Add provider display at bottom (matching webcam style) - provider_text = f"Provider: {session.get_providers()[0]}" - text_size = cv2.getTextSize( - provider_text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2 - )[0] - text_x = (result_image.shape[1] - text_size[0]) // 2 - text_y = result_image.shape[0] - 20 - - # Draw provider background rectangle - cv2.rectangle( - result_image, - (text_x - 5, text_y - text_size[1] - 5), - (text_x + text_size[0] + 5, text_y + 5), - (139, 0, 0), - -1, - ) - - # Draw provider text - cv2.putText( - result_image, - provider_text, - (text_x, text_y), - cv2.FONT_HERSHEY_SIMPLEX, - 0.8, - (255, 255, 255), - 2, - ) - - # Display frame (optional) - cv2.imshow("Video Detection", result_image) - if cv2.waitKey(1) & 0xFF == ord("q"): - break - - finally: - cap.release() - out.release() - cv2.destroyAllWindows() - print(f"\nVideo processing complete. Output saved to {output_path}") + return image if __name__ == "__main__": @@ -704,10 +465,10 @@ def run_inference_video( parser.add_argument("--image", type=str, help="Path to input image (optional)") parser.add_argument("--webcam", action="store_true", help="Use webcam input") parser.add_argument( - "--video-width", + "--inference-size", type=int, default=640, - help="Width of the video input in pixels (default: 640). Height will be adjusted to maintain aspect ratio", + help="Size for inference processing (default: 640). Larger dimension will be scaled to this size while preserving aspect ratio", ) parser.add_argument( "--classes", type=str, help="Path to class names file (optional)" @@ -730,21 +491,21 @@ def run_inference_video( args = parser.parse_args() if args.webcam: - run_inference_webcam( - args.model, args.classes, args.provider, args.threshold, args.video_width + run_inference( + args.model, 0, args.classes, args.threshold, args.provider, args.inference_size ) elif args.video: - run_inference_video( + run_inference( args.model, args.video, args.classes, - args.provider, args.threshold, - args.video_width, + args.provider, + args.inference_size, ) elif args.image: run_inference( - args.model, args.image, args.classes, args.threshold, args.provider + args.model, args.image, args.classes, args.threshold, args.provider, args.inference_size ) else: parser.error("Either --image, --video, or --webcam must be specified") diff --git a/scripts/train.py b/scripts/train.py index ace183b9..d43fccc1 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -6,18 +6,19 @@ conf = configure_dataset( config=conf, - image_size=(640, 640), + image_size=(800, 800), train_ann_file="/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/train/_annotations.coco.json", train_img_folder="/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/train", val_ann_file="/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/valid/_annotations.coco.json", val_img_folder="/home/dnth/Desktop/DEIMKit/dataset_collections/Rock Paper Scissors SXSW.v14i.coco/valid", - train_batch_size=20, - val_batch_size=20, + train_batch_size=16, + val_batch_size=16, num_classes=4, remap_mscoco=False, - output_dir="./outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_num_queries_pinto", + output_dir="./outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_num_queries_resume_3", ) trainer = Trainer(conf) -trainer.fit(epochs=30, save_best_only=True) +trainer.load_checkpoint("/home/dnth/Desktop/DEIMKit/outputs/rock-paper-scissors/deim_hgnetv2_s_30ep_640px_num_queries_resume_2/best.pth") +trainer.fit(epochs=10, save_best_only=True, lr=0.00004) diff --git a/src/deimkit/trainer.py b/src/deimkit/trainer.py index 7c3e4b29..e3f5be26 100644 --- a/src/deimkit/trainer.py +++ b/src/deimkit/trainer.py @@ -612,43 +612,62 @@ def _save_checkpoint( torch.save(state, checkpoint_path) logger.info(f"Checkpoint saved to {checkpoint_path}") - def load_checkpoint(self, checkpoint_path: Union[str, Path]) -> None: + def load_checkpoint(self, checkpoint_path: Union[str, Path], strict: bool = False) -> None: """ - Load a model checkpoint. + Load a model checkpoint, handling potential image size differences. Args: checkpoint_path: Path to the checkpoint file. + strict: Whether to strictly enforce that the keys in state_dict match. """ checkpoint_path = Path(checkpoint_path) logger.info(f"Loading checkpoint from {checkpoint_path}") # Load checkpoint - if str(checkpoint_path).startswith("http"): - state = torch.hub.load_state_dict_from_url( - str(checkpoint_path), map_location="cpu" - ) - else: - state = torch.load(checkpoint_path, map_location="cpu") + state = (torch.hub.load_state_dict_from_url(str(checkpoint_path), map_location="cpu") + if str(checkpoint_path).startswith("http") + else torch.load(checkpoint_path, map_location="cpu")) # Setup if not already done if self.model is None: self._setup() - # Load model state - self.model.load_state_dict(state["model"]) + def load_state_dict_with_mismatch(model, state_dict): + """Helper function to load state dict handling shape mismatches""" + try: + missing, unexpected = model.load_state_dict(state_dict, strict=False) + if missing or unexpected: + logger.warning(f"Missing keys: {missing}\nUnexpected keys: {unexpected}") + except RuntimeError as e: + logger.warning(f"Shape mismatch, loading compatible parameters only: {e}") + current_state = model.state_dict() + matched_state = { + k: v for k, v in state_dict.items() + if k in current_state and current_state[k].shape == v.shape + } + model.load_state_dict(matched_state, strict=False) + logger.info("Loaded parameters with matching shapes") - # Load optimizer state if available - if "optimizer" in state and self.optimizer is not None: - self.optimizer.load_state_dict(state["optimizer"]) + # Load model state + load_state_dict_with_mismatch(self.model, state["model"]) # Load EMA state if available if "ema" in state and self.ema is not None: - self.ema.load_state_dict(state["ema"]) + try: + self.ema.load_state_dict(state["ema"]) + except RuntimeError: + logger.info("Attempting to load EMA parameters with matching shapes...") + load_state_dict_with_mismatch(self.ema.module, state["ema"]["module"]) - # Update last epoch - if "last_epoch" in state: - self.last_epoch = state["last_epoch"] + # Load optimizer state if available + if "optimizer" in state and self.optimizer is not None: + try: + self.optimizer.load_state_dict(state["optimizer"]) + except ValueError as e: + logger.warning(f"Could not load optimizer state: {e}") + # Update last epoch + self.last_epoch = state.get("last_epoch", -1) logger.info(f"Loaded checkpoint from epoch {self.last_epoch}") def test(self) -> Dict[str, Any]: