From 2c3fd2b7cd379b42823cf0c59c8c82254baef7e5 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Sat, 29 Mar 2025 00:53:04 +0000
Subject: [PATCH 01/20] early stopping

---
 rfdetr/main.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/rfdetr/main.py b/rfdetr/main.py
index 7c18a25..4025094 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -133,10 +133,19 @@ def __init__(self, **kwargs):
             self.model.backbone[0].encoder = get_peft_model(self.model.backbone[0].encoder, lora_config)
         self.model = self.model.to(self.device)
         self.criterion, self.postprocessors = build_criterion_and_postprocessors(args)
+        self.stop_early = False
+
+    def request_early_stop(self):
+        self.stop_early = True
+        print("Early stopping requested, will complete current epoch and stop")
     
     def reinitialize_detection_head(self, num_classes):
         self.model.reinitialize_detection_head(num_classes)
 
+    def request_early_stop(self):
+        self.stop_early = True
+        print("Early stopping requested, will complete current epoch and stop")
+
     def train(self, callbacks: DefaultDict[str, List[Callable]], **kwargs):
         currently_supported_callbacks = ["on_fit_epoch_end", "on_train_batch_start", "on_train_end"]
         for key in callbacks.keys():
@@ -398,6 +407,10 @@ def lr_lambda(current_step: int):
             for callback in callbacks["on_fit_epoch_end"]:
                 callback(log_stats)
 
+            if self.stop_early:
+                print(f"Early stopping requested, stopping at epoch {epoch}")
+                break
+
         best_is_ema = best_map_ema_5095 > best_map_5095
         if best_is_ema:
             shutil.copy2(output_dir / 'checkpoint_best_ema.pth', output_dir / 'checkpoint_best_total.pth')

From 5e466521cbbdfe6eda10c73b53f9a2b80fb4248a Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Sat, 29 Mar 2025 21:38:25 +0000
Subject: [PATCH 02/20] mp upd

---
 rfdetr/main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rfdetr/main.py b/rfdetr/main.py
index 4025094..a62d8bf 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -45,6 +45,8 @@
 import shutil
 from rfdetr.util.files import download_file
 import os
+import torch.multiprocessing
+torch.multiprocessing.set_sharing_strategy('file_system')
 
 logger = getLogger(__name__)
 

From 84ad2ac7a461a036dd33fff860aeeaf74c53ea92 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 16:57:31 +0000
Subject: [PATCH 03/20] 100 eps

---
 rfdetr/main.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/rfdetr/main.py b/rfdetr/main.py
index a62d8bf..0a32f5a 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -136,10 +136,6 @@ def __init__(self, **kwargs):
         self.model = self.model.to(self.device)
         self.criterion, self.postprocessors = build_criterion_and_postprocessors(args)
         self.stop_early = False
-
-    def request_early_stop(self):
-        self.stop_early = True
-        print("Early stopping requested, will complete current epoch and stop")
     
     def reinitialize_detection_head(self, num_classes):
         self.model.reinitialize_detection_head(num_classes)

From b15c33a9a89475de18b6576b18a26cbb2f997f30 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 17:03:21 +0000
Subject: [PATCH 04/20] early stopping

---
 rfdetr/main.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rfdetr/main.py b/rfdetr/main.py
index 0a32f5a..4f61f65 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -45,8 +45,9 @@
 import shutil
 from rfdetr.util.files import download_file
 import os
-import torch.multiprocessing
-torch.multiprocessing.set_sharing_strategy('file_system')
+if os.environ.get("USE_FILE_SYSTEM_SHARING", "0") == "1":
+    import torch.multiprocessing
+    torch.multiprocessing.set_sharing_strategy('file_system')
 
 logger = getLogger(__name__)
 

From 4503135ab2d181ded9a691b6d11f63fc28185154 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 19:37:10 +0000
Subject: [PATCH 05/20] eraly stopping callback

---
 rfdetr/config.py              |  4 +++
 rfdetr/main.py                | 29 +++++++++++++++++++
 rfdetr/util/early_stopping.py | 54 +++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 rfdetr/util/early_stopping.py

diff --git a/rfdetr/config.py b/rfdetr/config.py
index fcea48c..86079b0 100644
--- a/rfdetr/config.py
+++ b/rfdetr/config.py
@@ -70,3 +70,7 @@ class TrainConfig(BaseModel):
     use_ema: bool = True
     num_workers: int = 2
     weight_decay: float = 1e-4
+    early_stopping: bool = False
+    early_stopping_patience: int = 5
+    early_stopping_min_delta: float = 0.001
+    early_stopping_use_ema: bool = False
diff --git a/rfdetr/main.py b/rfdetr/main.py
index 4f61f65..894b6e2 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -159,6 +159,17 @@ def train(self, callbacks: DefaultDict[str, List[Callable]], **kwargs):
         print(args)
         device = torch.device(args.device)
 
+        # Initialize early stopping if enabled
+        if args.early_stopping:
+            from rfdetr.util.early_stopping import EarlyStoppingCallback
+            early_stopping_callback = EarlyStoppingCallback(
+                patience=args.early_stopping_patience,
+                min_delta=args.early_stopping_min_delta,
+                use_ema=args.early_stopping_use_ema
+            )
+            early_stopping_callback.set_model(self)
+            callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
+        
         # fix the seed for reproducibility
         seed = args.seed + utils.get_rank()
         torch.manual_seed(seed)
@@ -752,6 +763,15 @@ def get_args_parser():
     )
     parser.add_argument('--lr_min_factor', default=0.0, type=float, 
         help='Minimum learning rate factor (as a fraction of initial lr) at the end of cosine annealing')
+    # Early stopping parameters
+    parser.add_argument('--early_stopping', action='store_true',
+                        help='Enable early stopping based on mAP improvement')
+    parser.add_argument('--early_stopping_patience', default=5, type=int,
+                        help='Number of epochs with no improvement after which training will be stopped')
+    parser.add_argument('--early_stopping_min_delta', default=0.001, type=float,
+                        help='Minimum change in mAP to qualify as an improvement')
+    parser.add_argument('--early_stopping_use_ema', action='store_true',
+                        help='Use EMA model metrics for early stopping')
     # subparsers
     subparsers = parser.add_subparsers(title='sub-commands', dest='subcommand',
         description='valid subcommands', help='additional help')
@@ -882,6 +902,11 @@ def populate_args(
     warmup_epochs=1,
     lr_scheduler='step',
     lr_min_factor=0.0,
+    # Early stopping parameters
+    early_stopping=False,
+    early_stopping_patience=5,
+    early_stopping_min_delta=0.001,
+    early_stopping_use_ema=False,
     # Additional
     subcommand=None,
     **extra_kwargs  # To handle any unexpected arguments
@@ -976,6 +1001,10 @@ def populate_args(
         warmup_epochs=warmup_epochs,
         lr_scheduler=lr_scheduler,
         lr_min_factor=lr_min_factor,
+        early_stopping=early_stopping,
+        early_stopping_patience=early_stopping_patience,
+        early_stopping_min_delta=early_stopping_min_delta,
+        early_stopping_use_ema=early_stopping_use_ema,
         **extra_kwargs
     )
     return args
\ No newline at end of file
diff --git a/rfdetr/util/early_stopping.py b/rfdetr/util/early_stopping.py
new file mode 100644
index 0000000..19ce3af
--- /dev/null
+++ b/rfdetr/util/early_stopping.py
@@ -0,0 +1,54 @@
+"""
+Early stopping callback for RF-DETR training
+"""
+
+class EarlyStoppingCallback:
+    """
+    Early stopping callback that monitors mAP and stops training if no improvement 
+    over a threshold is observed for a specified number of epochs.
+    
+    Args:
+        patience (int): Number of epochs with no improvement to wait before stopping
+        min_delta (float): Minimum change in mAP to qualify as improvement
+        use_ema (bool): Whether to use EMA model metrics for early stopping
+        verbose (bool): Whether to print early stopping messages
+    """
+    
+    def __init__(self, patience=5, min_delta=0.001, use_ema=False, verbose=True):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.use_ema = use_ema
+        self.verbose = verbose
+        self.best_map = 0.0
+        self.counter = 0
+        self.stop_training = False
+        self.model = None
+        
+    def update(self, log_stats):
+        """Update early stopping state based on epoch validation metrics"""
+        if self.use_ema and 'ema_test_coco_eval_bbox' in log_stats:
+            current_map = log_stats['ema_test_coco_eval_bbox'][0]
+        elif 'test_coco_eval_bbox' in log_stats:
+            current_map = log_stats['test_coco_eval_bbox'][0]
+        else:
+            return
+        
+        if current_map > self.best_map + self.min_delta:
+            self.best_map = current_map
+            self.counter = 0
+            if self.verbose:
+                print(f"Early stopping: mAP improved to {current_map:.4f}")
+        else:
+            self.counter += 1
+            if self.verbose:
+                print(f"Early stopping: No improvement in mAP for {self.counter} epochs (best: {self.best_map:.4f}, current: {current_map:.4f})")
+
+            if self.counter >= self.patience:
+                self.stop_training = True
+                print(f"Early stopping triggered: No improvement above {self.min_delta} threshold for {self.patience} epochs")
+                if self.model:
+                    self.model.request_early_stop()
+                
+    def set_model(self, model):
+        """Set the model reference to call request_early_stop when needed"""
+        self.model = model
\ No newline at end of file

From c8c576a1040592a087f0b957ea09c2c81c40cdd2 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 20:38:10 +0000
Subject: [PATCH 06/20] eraly stopping callback

---
 rfdetr/detr.py                | 10 ++++++++++
 rfdetr/test_output/log.txt    | 35 +++++++++++++++++++++++++++++++++++
 rfdetr/util/early_stopping.py | 21 +++++++++++----------
 3 files changed, 56 insertions(+), 10 deletions(-)
 create mode 100644 rfdetr/test_output/log.txt

diff --git a/rfdetr/detr.py b/rfdetr/detr.py
index 10c2e28..a38bc4c 100644
--- a/rfdetr/detr.py
+++ b/rfdetr/detr.py
@@ -71,6 +71,16 @@ def train_from_config(self, config: TrainConfig, **kwargs):
         self.callbacks["on_fit_epoch_end"].append(metrics_tensor_board_sink.update)
         self.callbacks["on_train_end"].append(metrics_tensor_board_sink.close)
 
+        if config.early_stopping:
+            from rfdetr.util.early_stopping import EarlyStoppingCallback
+            early_stopping_callback = EarlyStoppingCallback(
+                model=self.model,
+                patience=config.early_stopping_patience,
+                min_delta=config.early_stopping_min_delta,
+                use_ema=config.early_stopping_use_ema
+            )
+            self.callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
+
         self.model.train(
             **all_kwargs,
             callbacks=self.callbacks,
diff --git a/rfdetr/test_output/log.txt b/rfdetr/test_output/log.txt
new file mode 100644
index 0000000..0731320
--- /dev/null
+++ b/rfdetr/test_output/log.txt
@@ -0,0 +1,35 @@
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.36, 0.288, 0.0, 0.0, 0.0, 0.0, 0.324], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.38, 0.30400000000000005, 0.0, 0.0, 0.0, 0.0, 0.342], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
+{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.42, 0.336, 0.0, 0.0, 0.0, 0.0, 0.378], 'n_parameters': 1000000}
+{'epoch': 7, 'train_loss': 0.125, 'train_class_error': 0.0625, 'test_loss': 0.15, 'test_coco_eval_bbox': [0.44, 0.35200000000000004, 0.0, 0.0, 0.0, 0.0, 0.396], 'n_parameters': 1000000}
+{'epoch': 8, 'train_loss': 0.1111111111111111, 'train_class_error': 0.05555555555555555, 'test_loss': 0.13333333333333333, 'test_coco_eval_bbox': [0.46, 0.36800000000000005, 0.0, 0.0, 0.0, 0.0, 0.41400000000000003], 'n_parameters': 1000000}
+{'epoch': 9, 'train_loss': 0.1, 'train_class_error': 0.05, 'test_loss': 0.12, 'test_coco_eval_bbox': [0.48, 0.384, 0.0, 0.0, 0.0, 0.0, 0.432], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.35, 0.27999999999999997, 0.0, 0.0, 0.0, 0.0, 0.315], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.45, 0.36000000000000004, 0.0, 0.0, 0.0, 0.0, 0.405], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.451, 0.3608, 0.0, 0.0, 0.0, 0.0, 0.40590000000000004], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
+{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.33, 0.264, 0.0, 0.0, 0.0, 0.0, 0.29700000000000004], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.31, 0.248, 0.0, 0.0, 0.0, 0.0, 0.279], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.315, 0.252, 0.0, 0.0, 0.0, 0.0, 0.28350000000000003]}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.336, 0.26880000000000004, 0.0, 0.0, 0.0, 0.0, 0.3024]}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35700000000000004, 0.2856, 0.0, 0.0, 0.0, 0.0, 0.32130000000000003]}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35805000000000003, 0.28644000000000003, 0.0, 0.0, 0.0, 0.0, 0.32224500000000006]}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
diff --git a/rfdetr/util/early_stopping.py b/rfdetr/util/early_stopping.py
index 19ce3af..413dc58 100644
--- a/rfdetr/util/early_stopping.py
+++ b/rfdetr/util/early_stopping.py
@@ -14,41 +14,42 @@ class EarlyStoppingCallback:
         verbose (bool): Whether to print early stopping messages
     """
     
-    def __init__(self, patience=5, min_delta=0.001, use_ema=False, verbose=True):
+    def __init__(self, model, patience=5, min_delta=0.001, use_ema=False, verbose=True):
         self.patience = patience
         self.min_delta = min_delta
         self.use_ema = use_ema
         self.verbose = verbose
         self.best_map = 0.0
         self.counter = 0
-        self.stop_training = False
-        self.model = None
+        self.model = model
         
     def update(self, log_stats):
         """Update early stopping state based on epoch validation metrics"""
+        # Get the mAP value from the log stats
         if self.use_ema and 'ema_test_coco_eval_bbox' in log_stats:
             current_map = log_stats['ema_test_coco_eval_bbox'][0]
         elif 'test_coco_eval_bbox' in log_stats:
             current_map = log_stats['test_coco_eval_bbox'][0]
         else:
+            # No valid mAP metric found, skip early stopping check
             return
         
+        # Check if current mAP is better than best so far (by at least min_delta)
         if current_map > self.best_map + self.min_delta:
+            # We have an improvement
             self.best_map = current_map
             self.counter = 0
             if self.verbose:
                 print(f"Early stopping: mAP improved to {current_map:.4f}")
         else:
+            # No improvement
             self.counter += 1
             if self.verbose:
                 print(f"Early stopping: No improvement in mAP for {self.counter} epochs (best: {self.best_map:.4f}, current: {current_map:.4f})")
-
+            
+            # Check if early stopping criteria met
             if self.counter >= self.patience:
-                self.stop_training = True
                 print(f"Early stopping triggered: No improvement above {self.min_delta} threshold for {self.patience} epochs")
+                # Request model to stop early
                 if self.model:
-                    self.model.request_early_stop()
-                
-    def set_model(self, model):
-        """Set the model reference to call request_early_stop when needed"""
-        self.model = model
\ No newline at end of file
+                    self.model.request_early_stop()
\ No newline at end of file

From 468c5251330a1b513c64f546e7de3cd1bd747a7f Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 22:18:21 +0000
Subject: [PATCH 07/20] trun early stopping on by default

---
 rfdetr/main.py                |   2 +-
 rfdetr/test_early_stopping.py | 261 ++++++++++++++++++++++++++++++++++
 rfdetr/test_output/log.txt    |  35 +++++
 3 files changed, 297 insertions(+), 1 deletion(-)
 create mode 100644 rfdetr/test_early_stopping.py

diff --git a/rfdetr/main.py b/rfdetr/main.py
index 894b6e2..04527e9 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -903,7 +903,7 @@ def populate_args(
     lr_scheduler='step',
     lr_min_factor=0.0,
     # Early stopping parameters
-    early_stopping=False,
+    early_stopping=True,
     early_stopping_patience=5,
     early_stopping_min_delta=0.001,
     early_stopping_use_ema=False,
diff --git a/rfdetr/test_early_stopping.py b/rfdetr/test_early_stopping.py
new file mode 100644
index 0000000..31b995b
--- /dev/null
+++ b/rfdetr/test_early_stopping.py
@@ -0,0 +1,261 @@
+import sys
+import os
+import time
+import torch
+import numpy as np
+from pathlib import Path
+from collections import defaultdict
+
+# Add the project root to path so we can import the code
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from rfdetr.main import Model, populate_args
+from rfdetr.util.early_stopping import EarlyStoppingCallback
+
+class MockModel:
+    """Mock model that simulates the Model class but doesn't build a real model"""
+    
+    def __init__(self, map_values, **kwargs):
+        """
+        Args:
+            map_values: List of mAP values to return for each epoch
+            **kwargs: Arguments to pass to populate_args
+        """
+        self.map_values = map_values
+        self.args = populate_args(**kwargs)
+        self.stop_early = False
+        self.current_epoch = 0
+    
+    def request_early_stop(self):
+        """Same method as Model.request_early_stop"""
+        self.stop_early = True
+        print("Early stopping requested, will complete current epoch and stop")
+    
+    def train(self, callbacks=None, **kwargs):
+        """Simulated train method that follows the same pattern as Model.train"""
+        if callbacks is None:
+            callbacks = defaultdict(list)
+        
+        # Set up the parameters
+        args = populate_args(**kwargs)
+        
+        # We need a valid output directory for logs
+        output_dir = Path(args.output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        print("\n===== Testing Early Stopping with Mock Model =====")
+        print(f"Using map_values: {self.map_values}")
+        if hasattr(args, 'early_stopping') and args.early_stopping:
+            print(f"Early stopping params: patience={args.early_stopping_patience}, min_delta={args.early_stopping_min_delta}")
+        
+        print("\nStarting mock training...")
+        start_time = time.time()
+        
+        for epoch in range(min(args.epochs, len(self.map_values))):
+            self.current_epoch = epoch
+            
+            # Simulate one epoch of training
+            epoch_start_time = time.time()
+            time.sleep(0.2)  # To make output more readable
+            
+            # Generate mock train stats
+            train_stats = {
+                'loss': 1.0 / (epoch + 1),  # Decreasing loss
+                'class_error': 0.5 / (epoch + 1)
+            }
+            
+            # Generate mock evaluation stats with the pre-defined mAP
+            map_value = self.map_values[epoch]
+            test_stats = {
+                'loss': 1.2 / (epoch + 1),
+                'coco_eval_bbox': [map_value, map_value * 0.8, 0.0, 0.0, 0.0, 0.0, map_value * 0.9]
+            }
+            
+            # Create log stats dictionary similar to the real train method
+            log_stats = {
+                'epoch': epoch,
+                **{f'train_{k}': v for k, v in train_stats.items()},
+                **{f'test_{k}': v for k, v in test_stats.items()},
+                'n_parameters': 1000000  # Dummy value
+            }
+            
+            if args.use_ema:
+                # Add EMA metrics (slightly better than regular metrics)
+                ema_map = map_value * 1.05
+                log_stats['ema_test_coco_eval_bbox'] = [
+                    ema_map, ema_map * 0.8, 0.0, 0.0, 0.0, 0.0, ema_map * 0.9
+                ]
+            
+            print(f"Epoch {epoch}: mAP = {map_value:.4f}")
+            
+            # Write the log file similar to the real train method
+            if args.output_dir:
+                with (output_dir / "log.txt").open("a") as f:
+                    f.write(f"{str(log_stats)}\n")
+            
+            # Call the on_fit_epoch_end callbacks
+            for callback in callbacks["on_fit_epoch_end"]:
+                callback(log_stats)
+            
+            # Check if early stopping was triggered
+            if self.stop_early:
+                print(f"\n✅ Early stopping triggered after epoch {epoch}")
+                break
+        else:
+            print("\n❌ Early stopping was not triggered")
+        
+        total_time = time.time() - start_time
+        print(f"Training completed in {total_time:.2f} seconds")
+
+# Test scenarios with different mAP patterns
+
+def test_scenario_1():
+    """Steady improvement, no early stopping expected"""
+    map_values = [0.30, 0.32, 0.34, 0.36, 0.38, 0.40, 0.42, 0.44, 0.46, 0.48]
+    model = MockModel(map_values=map_values, num_classes=2)
+    
+    # Initialize callbacks - this simulates what happens in detr.py
+    callbacks = defaultdict(list)
+    
+    # Initialize early stopping callback - similar to how it would be done in detr.py
+    early_stopping_callback = EarlyStoppingCallback(
+        model=model,  # Pass model directly now
+        patience=3,
+        min_delta=0.005,
+        use_ema=False
+    )
+    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
+    
+    model.train(
+        callbacks=callbacks, 
+        epochs=10, 
+        output_dir="test_output",
+        early_stopping=True,
+        early_stopping_patience=3,
+        early_stopping_min_delta=0.005
+    )
+
+def test_scenario_2():
+    """Early plateau, should trigger early stopping"""
+    map_values = [0.30, 0.32, 0.34, 0.341, 0.342, 0.342, 0.343, 0.343, 0.344, 0.344]
+    model = MockModel(map_values=map_values, num_classes=2)
+    
+    # Initialize callbacks
+    callbacks = defaultdict(list)
+    
+    # Initialize early stopping callback
+    early_stopping_callback = EarlyStoppingCallback(
+        model=model,
+        patience=3,
+        min_delta=0.005,
+        use_ema=False
+    )
+    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
+    
+    model.train(
+        callbacks=callbacks, 
+        epochs=10, 
+        output_dir="test_output",
+        early_stopping=True,
+        early_stopping_patience=3,
+        early_stopping_min_delta=0.005
+    )
+
+def test_scenario_3():
+    """Initial improvement then plateau"""
+    map_values = [0.30, 0.35, 0.40, 0.45, 0.451, 0.452, 0.452, 0.453, 0.453, 0.454]
+    model = MockModel(map_values=map_values, num_classes=2)
+    
+    # Initialize callbacks
+    callbacks = defaultdict(list)
+    
+    # Initialize early stopping callback
+    early_stopping_callback = EarlyStoppingCallback(
+        model=model,
+        patience=3,
+        min_delta=0.005,
+        use_ema=False
+    )
+    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
+    
+    model.train(
+        callbacks=callbacks, 
+        epochs=10, 
+        output_dir="test_output",
+        early_stopping=True,
+        early_stopping_patience=3,
+        early_stopping_min_delta=0.005
+    )
+
+def test_scenario_4():
+    """Decreasing performance"""
+    map_values = [0.30, 0.32, 0.34, 0.33, 0.32, 0.31, 0.30, 0.29, 0.28, 0.27]
+    model = MockModel(map_values=map_values, num_classes=2)
+    
+    # Initialize callbacks
+    callbacks = defaultdict(list)
+    
+    # Initialize early stopping callback
+    early_stopping_callback = EarlyStoppingCallback(
+        model=model,
+        patience=3,
+        min_delta=0.005,
+        use_ema=False
+    )
+    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
+    
+    model.train(
+        callbacks=callbacks, 
+        epochs=10, 
+        output_dir="test_output",
+        early_stopping=True,
+        early_stopping_patience=3,
+        early_stopping_min_delta=0.005
+    )
+
+def test_scenario_5():
+    """With EMA metrics"""
+    map_values = [0.30, 0.32, 0.34, 0.341, 0.342, 0.342, 0.343, 0.343, 0.344, 0.344]
+    model = MockModel(map_values=map_values, num_classes=2)
+    
+    # Initialize callbacks
+    callbacks = defaultdict(list)
+    
+    # Initialize early stopping callback with EMA
+    early_stopping_callback = EarlyStoppingCallback(
+        model=model,
+        patience=3,
+        min_delta=0.005,
+        use_ema=True
+    )
+    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
+    
+    model.train(
+        callbacks=callbacks, 
+        epochs=10, 
+        output_dir="test_output", 
+        use_ema=True, 
+        early_stopping=True,
+        early_stopping_patience=3,
+        early_stopping_min_delta=0.005,
+        early_stopping_use_ema=True
+    )
+
+if __name__ == "__main__":
+    # Make sure the output directory exists
+    os.makedirs("test_output", exist_ok=True)
+    
+    print("\n\n🔍 SCENARIO 1: Steady improvement, no early stopping")
+    test_scenario_1()
+    
+    print("\n\n🔍 SCENARIO 2: Early plateau, should trigger early stopping")
+    test_scenario_2()
+    
+    print("\n\n🔍 SCENARIO 3: Initial improvement then plateau")
+    test_scenario_3()
+    
+    print("\n\n🔍 SCENARIO 4: Decreasing performance")
+    test_scenario_4()
+    
+    print("\n\n🔍 SCENARIO 5: Using EMA metrics")
+    test_scenario_5()
\ No newline at end of file
diff --git a/rfdetr/test_output/log.txt b/rfdetr/test_output/log.txt
index 0731320..67e9ee3 100644
--- a/rfdetr/test_output/log.txt
+++ b/rfdetr/test_output/log.txt
@@ -33,3 +33,38 @@
 {'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35805000000000003, 0.28644000000000003, 0.0, 0.0, 0.0, 0.0, 0.32224500000000006]}
 {'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
 {'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.36, 0.288, 0.0, 0.0, 0.0, 0.0, 0.324], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.38, 0.30400000000000005, 0.0, 0.0, 0.0, 0.0, 0.342], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
+{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.42, 0.336, 0.0, 0.0, 0.0, 0.0, 0.378], 'n_parameters': 1000000}
+{'epoch': 7, 'train_loss': 0.125, 'train_class_error': 0.0625, 'test_loss': 0.15, 'test_coco_eval_bbox': [0.44, 0.35200000000000004, 0.0, 0.0, 0.0, 0.0, 0.396], 'n_parameters': 1000000}
+{'epoch': 8, 'train_loss': 0.1111111111111111, 'train_class_error': 0.05555555555555555, 'test_loss': 0.13333333333333333, 'test_coco_eval_bbox': [0.46, 0.36800000000000005, 0.0, 0.0, 0.0, 0.0, 0.41400000000000003], 'n_parameters': 1000000}
+{'epoch': 9, 'train_loss': 0.1, 'train_class_error': 0.05, 'test_loss': 0.12, 'test_coco_eval_bbox': [0.48, 0.384, 0.0, 0.0, 0.0, 0.0, 0.432], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.35, 0.27999999999999997, 0.0, 0.0, 0.0, 0.0, 0.315], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.45, 0.36000000000000004, 0.0, 0.0, 0.0, 0.0, 0.405], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.451, 0.3608, 0.0, 0.0, 0.0, 0.0, 0.40590000000000004], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
+{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.33, 0.264, 0.0, 0.0, 0.0, 0.0, 0.29700000000000004], 'n_parameters': 1000000}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.31, 0.248, 0.0, 0.0, 0.0, 0.0, 0.279], 'n_parameters': 1000000}
+{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.315, 0.252, 0.0, 0.0, 0.0, 0.0, 0.28350000000000003]}
+{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.336, 0.26880000000000004, 0.0, 0.0, 0.0, 0.0, 0.3024]}
+{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35700000000000004, 0.2856, 0.0, 0.0, 0.0, 0.0, 0.32130000000000003]}
+{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35805000000000003, 0.28644000000000003, 0.0, 0.0, 0.0, 0.0, 0.32224500000000006]}
+{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
+{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}

From ecc0ee9c504934f96c3aca735576f092a9d84fe9 Mon Sep 17 00:00:00 2001
From: Matvei Popov <46304340+Matvezy@users.noreply.github.com>
Date: Mon, 31 Mar 2025 15:19:11 -0700
Subject: [PATCH 08/20] Delete rfdetr/test_early_stopping.py

---
 rfdetr/test_early_stopping.py | 261 ----------------------------------
 1 file changed, 261 deletions(-)
 delete mode 100644 rfdetr/test_early_stopping.py

diff --git a/rfdetr/test_early_stopping.py b/rfdetr/test_early_stopping.py
deleted file mode 100644
index 31b995b..0000000
--- a/rfdetr/test_early_stopping.py
+++ /dev/null
@@ -1,261 +0,0 @@
-import sys
-import os
-import time
-import torch
-import numpy as np
-from pathlib import Path
-from collections import defaultdict
-
-# Add the project root to path so we can import the code
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from rfdetr.main import Model, populate_args
-from rfdetr.util.early_stopping import EarlyStoppingCallback
-
-class MockModel:
-    """Mock model that simulates the Model class but doesn't build a real model"""
-    
-    def __init__(self, map_values, **kwargs):
-        """
-        Args:
-            map_values: List of mAP values to return for each epoch
-            **kwargs: Arguments to pass to populate_args
-        """
-        self.map_values = map_values
-        self.args = populate_args(**kwargs)
-        self.stop_early = False
-        self.current_epoch = 0
-    
-    def request_early_stop(self):
-        """Same method as Model.request_early_stop"""
-        self.stop_early = True
-        print("Early stopping requested, will complete current epoch and stop")
-    
-    def train(self, callbacks=None, **kwargs):
-        """Simulated train method that follows the same pattern as Model.train"""
-        if callbacks is None:
-            callbacks = defaultdict(list)
-        
-        # Set up the parameters
-        args = populate_args(**kwargs)
-        
-        # We need a valid output directory for logs
-        output_dir = Path(args.output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-        
-        print("\n===== Testing Early Stopping with Mock Model =====")
-        print(f"Using map_values: {self.map_values}")
-        if hasattr(args, 'early_stopping') and args.early_stopping:
-            print(f"Early stopping params: patience={args.early_stopping_patience}, min_delta={args.early_stopping_min_delta}")
-        
-        print("\nStarting mock training...")
-        start_time = time.time()
-        
-        for epoch in range(min(args.epochs, len(self.map_values))):
-            self.current_epoch = epoch
-            
-            # Simulate one epoch of training
-            epoch_start_time = time.time()
-            time.sleep(0.2)  # To make output more readable
-            
-            # Generate mock train stats
-            train_stats = {
-                'loss': 1.0 / (epoch + 1),  # Decreasing loss
-                'class_error': 0.5 / (epoch + 1)
-            }
-            
-            # Generate mock evaluation stats with the pre-defined mAP
-            map_value = self.map_values[epoch]
-            test_stats = {
-                'loss': 1.2 / (epoch + 1),
-                'coco_eval_bbox': [map_value, map_value * 0.8, 0.0, 0.0, 0.0, 0.0, map_value * 0.9]
-            }
-            
-            # Create log stats dictionary similar to the real train method
-            log_stats = {
-                'epoch': epoch,
-                **{f'train_{k}': v for k, v in train_stats.items()},
-                **{f'test_{k}': v for k, v in test_stats.items()},
-                'n_parameters': 1000000  # Dummy value
-            }
-            
-            if args.use_ema:
-                # Add EMA metrics (slightly better than regular metrics)
-                ema_map = map_value * 1.05
-                log_stats['ema_test_coco_eval_bbox'] = [
-                    ema_map, ema_map * 0.8, 0.0, 0.0, 0.0, 0.0, ema_map * 0.9
-                ]
-            
-            print(f"Epoch {epoch}: mAP = {map_value:.4f}")
-            
-            # Write the log file similar to the real train method
-            if args.output_dir:
-                with (output_dir / "log.txt").open("a") as f:
-                    f.write(f"{str(log_stats)}\n")
-            
-            # Call the on_fit_epoch_end callbacks
-            for callback in callbacks["on_fit_epoch_end"]:
-                callback(log_stats)
-            
-            # Check if early stopping was triggered
-            if self.stop_early:
-                print(f"\n✅ Early stopping triggered after epoch {epoch}")
-                break
-        else:
-            print("\n❌ Early stopping was not triggered")
-        
-        total_time = time.time() - start_time
-        print(f"Training completed in {total_time:.2f} seconds")
-
-# Test scenarios with different mAP patterns
-
-def test_scenario_1():
-    """Steady improvement, no early stopping expected"""
-    map_values = [0.30, 0.32, 0.34, 0.36, 0.38, 0.40, 0.42, 0.44, 0.46, 0.48]
-    model = MockModel(map_values=map_values, num_classes=2)
-    
-    # Initialize callbacks - this simulates what happens in detr.py
-    callbacks = defaultdict(list)
-    
-    # Initialize early stopping callback - similar to how it would be done in detr.py
-    early_stopping_callback = EarlyStoppingCallback(
-        model=model,  # Pass model directly now
-        patience=3,
-        min_delta=0.005,
-        use_ema=False
-    )
-    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
-    
-    model.train(
-        callbacks=callbacks, 
-        epochs=10, 
-        output_dir="test_output",
-        early_stopping=True,
-        early_stopping_patience=3,
-        early_stopping_min_delta=0.005
-    )
-
-def test_scenario_2():
-    """Early plateau, should trigger early stopping"""
-    map_values = [0.30, 0.32, 0.34, 0.341, 0.342, 0.342, 0.343, 0.343, 0.344, 0.344]
-    model = MockModel(map_values=map_values, num_classes=2)
-    
-    # Initialize callbacks
-    callbacks = defaultdict(list)
-    
-    # Initialize early stopping callback
-    early_stopping_callback = EarlyStoppingCallback(
-        model=model,
-        patience=3,
-        min_delta=0.005,
-        use_ema=False
-    )
-    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
-    
-    model.train(
-        callbacks=callbacks, 
-        epochs=10, 
-        output_dir="test_output",
-        early_stopping=True,
-        early_stopping_patience=3,
-        early_stopping_min_delta=0.005
-    )
-
-def test_scenario_3():
-    """Initial improvement then plateau"""
-    map_values = [0.30, 0.35, 0.40, 0.45, 0.451, 0.452, 0.452, 0.453, 0.453, 0.454]
-    model = MockModel(map_values=map_values, num_classes=2)
-    
-    # Initialize callbacks
-    callbacks = defaultdict(list)
-    
-    # Initialize early stopping callback
-    early_stopping_callback = EarlyStoppingCallback(
-        model=model,
-        patience=3,
-        min_delta=0.005,
-        use_ema=False
-    )
-    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
-    
-    model.train(
-        callbacks=callbacks, 
-        epochs=10, 
-        output_dir="test_output",
-        early_stopping=True,
-        early_stopping_patience=3,
-        early_stopping_min_delta=0.005
-    )
-
-def test_scenario_4():
-    """Decreasing performance"""
-    map_values = [0.30, 0.32, 0.34, 0.33, 0.32, 0.31, 0.30, 0.29, 0.28, 0.27]
-    model = MockModel(map_values=map_values, num_classes=2)
-    
-    # Initialize callbacks
-    callbacks = defaultdict(list)
-    
-    # Initialize early stopping callback
-    early_stopping_callback = EarlyStoppingCallback(
-        model=model,
-        patience=3,
-        min_delta=0.005,
-        use_ema=False
-    )
-    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
-    
-    model.train(
-        callbacks=callbacks, 
-        epochs=10, 
-        output_dir="test_output",
-        early_stopping=True,
-        early_stopping_patience=3,
-        early_stopping_min_delta=0.005
-    )
-
-def test_scenario_5():
-    """With EMA metrics"""
-    map_values = [0.30, 0.32, 0.34, 0.341, 0.342, 0.342, 0.343, 0.343, 0.344, 0.344]
-    model = MockModel(map_values=map_values, num_classes=2)
-    
-    # Initialize callbacks
-    callbacks = defaultdict(list)
-    
-    # Initialize early stopping callback with EMA
-    early_stopping_callback = EarlyStoppingCallback(
-        model=model,
-        patience=3,
-        min_delta=0.005,
-        use_ema=True
-    )
-    callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
-    
-    model.train(
-        callbacks=callbacks, 
-        epochs=10, 
-        output_dir="test_output", 
-        use_ema=True, 
-        early_stopping=True,
-        early_stopping_patience=3,
-        early_stopping_min_delta=0.005,
-        early_stopping_use_ema=True
-    )
-
-if __name__ == "__main__":
-    # Make sure the output directory exists
-    os.makedirs("test_output", exist_ok=True)
-    
-    print("\n\n🔍 SCENARIO 1: Steady improvement, no early stopping")
-    test_scenario_1()
-    
-    print("\n\n🔍 SCENARIO 2: Early plateau, should trigger early stopping")
-    test_scenario_2()
-    
-    print("\n\n🔍 SCENARIO 3: Initial improvement then plateau")
-    test_scenario_3()
-    
-    print("\n\n🔍 SCENARIO 4: Decreasing performance")
-    test_scenario_4()
-    
-    print("\n\n🔍 SCENARIO 5: Using EMA metrics")
-    test_scenario_5()
\ No newline at end of file

From 37b08321de3cd3388952a941d0b4dd5a31a289cd Mon Sep 17 00:00:00 2001
From: Matvei Popov <46304340+Matvezy@users.noreply.github.com>
Date: Mon, 31 Mar 2025 15:19:42 -0700
Subject: [PATCH 09/20] Delete rfdetr/test_output/log.txt

---
 rfdetr/test_output/log.txt | 70 --------------------------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 rfdetr/test_output/log.txt

diff --git a/rfdetr/test_output/log.txt b/rfdetr/test_output/log.txt
deleted file mode 100644
index 67e9ee3..0000000
--- a/rfdetr/test_output/log.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.36, 0.288, 0.0, 0.0, 0.0, 0.0, 0.324], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.38, 0.30400000000000005, 0.0, 0.0, 0.0, 0.0, 0.342], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
-{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.42, 0.336, 0.0, 0.0, 0.0, 0.0, 0.378], 'n_parameters': 1000000}
-{'epoch': 7, 'train_loss': 0.125, 'train_class_error': 0.0625, 'test_loss': 0.15, 'test_coco_eval_bbox': [0.44, 0.35200000000000004, 0.0, 0.0, 0.0, 0.0, 0.396], 'n_parameters': 1000000}
-{'epoch': 8, 'train_loss': 0.1111111111111111, 'train_class_error': 0.05555555555555555, 'test_loss': 0.13333333333333333, 'test_coco_eval_bbox': [0.46, 0.36800000000000005, 0.0, 0.0, 0.0, 0.0, 0.41400000000000003], 'n_parameters': 1000000}
-{'epoch': 9, 'train_loss': 0.1, 'train_class_error': 0.05, 'test_loss': 0.12, 'test_coco_eval_bbox': [0.48, 0.384, 0.0, 0.0, 0.0, 0.0, 0.432], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.35, 0.27999999999999997, 0.0, 0.0, 0.0, 0.0, 0.315], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.45, 0.36000000000000004, 0.0, 0.0, 0.0, 0.0, 0.405], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.451, 0.3608, 0.0, 0.0, 0.0, 0.0, 0.40590000000000004], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
-{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.33, 0.264, 0.0, 0.0, 0.0, 0.0, 0.29700000000000004], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.31, 0.248, 0.0, 0.0, 0.0, 0.0, 0.279], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.315, 0.252, 0.0, 0.0, 0.0, 0.0, 0.28350000000000003]}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.336, 0.26880000000000004, 0.0, 0.0, 0.0, 0.0, 0.3024]}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35700000000000004, 0.2856, 0.0, 0.0, 0.0, 0.0, 0.32130000000000003]}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35805000000000003, 0.28644000000000003, 0.0, 0.0, 0.0, 0.0, 0.32224500000000006]}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.36, 0.288, 0.0, 0.0, 0.0, 0.0, 0.324], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.38, 0.30400000000000005, 0.0, 0.0, 0.0, 0.0, 0.342], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
-{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.42, 0.336, 0.0, 0.0, 0.0, 0.0, 0.378], 'n_parameters': 1000000}
-{'epoch': 7, 'train_loss': 0.125, 'train_class_error': 0.0625, 'test_loss': 0.15, 'test_coco_eval_bbox': [0.44, 0.35200000000000004, 0.0, 0.0, 0.0, 0.0, 0.396], 'n_parameters': 1000000}
-{'epoch': 8, 'train_loss': 0.1111111111111111, 'train_class_error': 0.05555555555555555, 'test_loss': 0.13333333333333333, 'test_coco_eval_bbox': [0.46, 0.36800000000000005, 0.0, 0.0, 0.0, 0.0, 0.41400000000000003], 'n_parameters': 1000000}
-{'epoch': 9, 'train_loss': 0.1, 'train_class_error': 0.05, 'test_loss': 0.12, 'test_coco_eval_bbox': [0.48, 0.384, 0.0, 0.0, 0.0, 0.0, 0.432], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.35, 0.27999999999999997, 0.0, 0.0, 0.0, 0.0, 0.315], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.4, 0.32000000000000006, 0.0, 0.0, 0.0, 0.0, 0.36000000000000004], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.45, 0.36000000000000004, 0.0, 0.0, 0.0, 0.0, 0.405], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.451, 0.3608, 0.0, 0.0, 0.0, 0.0, 0.40590000000000004], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
-{'epoch': 6, 'train_loss': 0.14285714285714285, 'train_class_error': 0.07142857142857142, 'test_loss': 0.17142857142857143, 'test_coco_eval_bbox': [0.452, 0.36160000000000003, 0.0, 0.0, 0.0, 0.0, 0.4068], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.33, 0.264, 0.0, 0.0, 0.0, 0.0, 0.29700000000000004], 'n_parameters': 1000000}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.31, 0.248, 0.0, 0.0, 0.0, 0.0, 0.279], 'n_parameters': 1000000}
-{'epoch': 0, 'train_loss': 1.0, 'train_class_error': 0.5, 'test_loss': 1.2, 'test_coco_eval_bbox': [0.3, 0.24, 0.0, 0.0, 0.0, 0.0, 0.27], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.315, 0.252, 0.0, 0.0, 0.0, 0.0, 0.28350000000000003]}
-{'epoch': 1, 'train_loss': 0.5, 'train_class_error': 0.25, 'test_loss': 0.6, 'test_coco_eval_bbox': [0.32, 0.256, 0.0, 0.0, 0.0, 0.0, 0.28800000000000003], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.336, 0.26880000000000004, 0.0, 0.0, 0.0, 0.0, 0.3024]}
-{'epoch': 2, 'train_loss': 0.3333333333333333, 'train_class_error': 0.16666666666666666, 'test_loss': 0.39999999999999997, 'test_coco_eval_bbox': [0.34, 0.272, 0.0, 0.0, 0.0, 0.0, 0.30600000000000005], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35700000000000004, 0.2856, 0.0, 0.0, 0.0, 0.0, 0.32130000000000003]}
-{'epoch': 3, 'train_loss': 0.25, 'train_class_error': 0.125, 'test_loss': 0.3, 'test_coco_eval_bbox': [0.341, 0.27280000000000004, 0.0, 0.0, 0.0, 0.0, 0.3069], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35805000000000003, 0.28644000000000003, 0.0, 0.0, 0.0, 0.0, 0.32224500000000006]}
-{'epoch': 4, 'train_loss': 0.2, 'train_class_error': 0.1, 'test_loss': 0.24, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}
-{'epoch': 5, 'train_loss': 0.16666666666666666, 'train_class_error': 0.08333333333333333, 'test_loss': 0.19999999999999998, 'test_coco_eval_bbox': [0.342, 0.2736, 0.0, 0.0, 0.0, 0.0, 0.3078], 'n_parameters': 1000000, 'ema_test_coco_eval_bbox': [0.35910000000000003, 0.28728000000000004, 0.0, 0.0, 0.0, 0.0, 0.32319000000000003]}

From 52bf804e1300a57ceb60b81007acc0741bbb9a91 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 22:29:40 +0000
Subject: [PATCH 10/20] fix callback saving

---
 rfdetr/util/misc.py | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/rfdetr/util/misc.py b/rfdetr/util/misc.py
index 9169317..2587fcf 100644
--- a/rfdetr/util/misc.py
+++ b/rfdetr/util/misc.py
@@ -422,25 +422,11 @@ def save_on_master(obj, f, *args, **kwargs):
     Safely save objects, removing any callbacks that can't be pickled
     """
     if is_main_process():
-        try:
-            if isinstance(obj, dict):
-                obj_copy = {}
-                for k, v in obj.items():
-                    if k == 'args' and hasattr(v, '__dict__'):
-                        args_dict = copy.copy(v.__dict__)
-                        if 'callbacks' in args_dict:
-                            del args_dict['callbacks']
-                        obj_copy[k] = argparse.Namespace(**args_dict)
-                    elif k != 'callbacks':
-                        obj_copy[k] = v
-                obj = obj_copy
-            
-            torch.save(obj, f, *args, **kwargs)
-        except Exception as e:
-            print(f"Error in safe_save_on_master: {e}")
-            if isinstance(obj, dict) and 'model' in obj:
-                print("Falling back to saving only model state_dict")
-                torch.save({'model': obj['model']}, f, *args, **kwargs)
+        if isinstance(obj, dict) and 'model' in obj:
+            print("Falling back to saving only model state_dict")
+            torch.save({'model': obj['model']}, f, *args, **kwargs)
+        else:
+            raise ValueError("Invalid object type for saving")
 
 
 def init_distributed_mode(args):

From c4c4197abcca81c07fd6ad2f5a22b3a7c13b505b Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 23:03:54 +0000
Subject: [PATCH 11/20] fix bug

---
 rfdetr/config.py              |  2 +-
 rfdetr/main.py                | 29 +++++++++--------------------
 rfdetr/util/early_stopping.py |  2 ++
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/rfdetr/config.py b/rfdetr/config.py
index 86079b0..ec06c7d 100644
--- a/rfdetr/config.py
+++ b/rfdetr/config.py
@@ -70,7 +70,7 @@ class TrainConfig(BaseModel):
     use_ema: bool = True
     num_workers: int = 2
     weight_decay: float = 1e-4
-    early_stopping: bool = False
+    early_stopping: bool = True
     early_stopping_patience: int = 5
     early_stopping_min_delta: float = 0.001
     early_stopping_use_ema: bool = False
diff --git a/rfdetr/main.py b/rfdetr/main.py
index 04527e9..457f4d1 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -29,21 +29,21 @@
 import torch
 from torch.utils.data import DataLoader, DistributedSampler
 
-from rfdetr.datasets import build_dataset, get_coco_api_from_dataset
-from rfdetr.engine import evaluate, train_one_epoch
-from rfdetr.models import build_model, build_criterion_and_postprocessors
-from rfdetr.util.drop_scheduler import drop_scheduler
-from rfdetr.util.get_param_dicts import get_param_dict
-import rfdetr.util.misc as utils
-from rfdetr.util.utils import ModelEma, BestMetricHolder, clean_state_dict
-from rfdetr.util.benchmark import benchmark
+from datasets import build_dataset, get_coco_api_from_dataset
+from engine import evaluate, train_one_epoch
+from models import build_model, build_criterion_and_postprocessors
+from util.drop_scheduler import drop_scheduler
+from util.get_param_dicts import get_param_dict
+import util.misc as utils
+from util.utils import ModelEma, BestMetricHolder, clean_state_dict
+from util.benchmark import benchmark
 from torch import nn
 import torch.nn.functional as F
 from peft import LoraConfig, get_peft_model
 from typing import DefaultDict, List, Callable
 from logging import getLogger
 import shutil
-from rfdetr.util.files import download_file
+from util.files import download_file
 import os
 if os.environ.get("USE_FILE_SYSTEM_SHARING", "0") == "1":
     import torch.multiprocessing
@@ -158,17 +158,6 @@ def train(self, callbacks: DefaultDict[str, List[Callable]], **kwargs):
         print("git:\n  {}\n".format(utils.get_sha()))
         print(args)
         device = torch.device(args.device)
-
-        # Initialize early stopping if enabled
-        if args.early_stopping:
-            from rfdetr.util.early_stopping import EarlyStoppingCallback
-            early_stopping_callback = EarlyStoppingCallback(
-                patience=args.early_stopping_patience,
-                min_delta=args.early_stopping_min_delta,
-                use_ema=args.early_stopping_use_ema
-            )
-            early_stopping_callback.set_model(self)
-            callbacks["on_fit_epoch_end"].append(early_stopping_callback.update)
         
         # fix the seed for reproducibility
         seed = args.seed + utils.get_rank()
diff --git a/rfdetr/util/early_stopping.py b/rfdetr/util/early_stopping.py
index 413dc58..aab1ca9 100644
--- a/rfdetr/util/early_stopping.py
+++ b/rfdetr/util/early_stopping.py
@@ -35,6 +35,8 @@ def update(self, log_stats):
             return
         
         # Check if current mAP is better than best so far (by at least min_delta)
+        print(f"DIFF: {current_map - self.best_map}")
+        print(f"MIN_DELTA: {self.min_delta}")
         if current_map > self.best_map + self.min_delta:
             # We have an improvement
             self.best_map = current_map

From eca71c101161f03f52b6d78fc59e5ba49208375d Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 23:05:42 +0000
Subject: [PATCH 12/20] import fix

---
 rfdetr/main.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/rfdetr/main.py b/rfdetr/main.py
index 457f4d1..3c02b0d 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -29,21 +29,21 @@
 import torch
 from torch.utils.data import DataLoader, DistributedSampler
 
-from datasets import build_dataset, get_coco_api_from_dataset
-from engine import evaluate, train_one_epoch
-from models import build_model, build_criterion_and_postprocessors
-from util.drop_scheduler import drop_scheduler
-from util.get_param_dicts import get_param_dict
-import util.misc as utils
-from util.utils import ModelEma, BestMetricHolder, clean_state_dict
-from util.benchmark import benchmark
+from rfdetr.datasets import build_dataset, get_coco_api_from_dataset
+from rfdetr.engine import evaluate, train_one_epoch
+from rfdetr.models import build_model, build_criterion_and_postprocessors
+from rfdetr.util.drop_scheduler import drop_scheduler
+from rfdetr.util.get_param_dicts import get_param_dict
+import rfdetr.util.misc as utils
+from rfdetr.util.utils import ModelEma, BestMetricHolder, clean_state_dict
+from rfdetr.util.benchmark import benchmark
 from torch import nn
 import torch.nn.functional as F
 from peft import LoraConfig, get_peft_model
 from typing import DefaultDict, List, Callable
 from logging import getLogger
 import shutil
-from util.files import download_file
+from rfdetr.util.files import download_file
 import os
 if os.environ.get("USE_FILE_SYSTEM_SHARING", "0") == "1":
     import torch.multiprocessing

From d25f5e19399f9780fea5232debb613478895c58e Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 23:33:58 +0000
Subject: [PATCH 13/20] updated based on max ema or regular

---
 rfdetr/util/early_stopping.py | 48 +++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/rfdetr/util/early_stopping.py b/rfdetr/util/early_stopping.py
index aab1ca9..a6984ea 100644
--- a/rfdetr/util/early_stopping.py
+++ b/rfdetr/util/early_stopping.py
@@ -25,33 +25,49 @@ def __init__(self, model, patience=5, min_delta=0.001, use_ema=False, verbose=Tr
         
     def update(self, log_stats):
         """Update early stopping state based on epoch validation metrics"""
-        # Get the mAP value from the log stats
-        if self.use_ema and 'ema_test_coco_eval_bbox' in log_stats:
-            current_map = log_stats['ema_test_coco_eval_bbox'][0]
-        elif 'test_coco_eval_bbox' in log_stats:
-            current_map = log_stats['test_coco_eval_bbox'][0]
+        regular_map = None
+        ema_map = None
+        
+        if 'test_coco_eval_bbox' in log_stats:
+            regular_map = log_stats['test_coco_eval_bbox'][0]
+        
+        if 'ema_test_coco_eval_bbox' in log_stats:
+            ema_map = log_stats['ema_test_coco_eval_bbox'][0]
+        
+        current_map = None
+        if regular_map is not None and ema_map is not None:
+            if self.use_ema:
+                current_map = ema_map
+                metric_source = "EMA"
+            else:
+                current_map = max(regular_map, ema_map)
+                metric_source = "max(regular, EMA)"
+        elif ema_map is not None:
+            current_map = ema_map
+            metric_source = "EMA"
+        elif regular_map is not None:
+            current_map = regular_map
+            metric_source = "regular"
         else:
-            # No valid mAP metric found, skip early stopping check
+            if self.verbose:
+                print("Early stopping: No valid mAP metric found, skipping check")
             return
         
-        # Check if current mAP is better than best so far (by at least min_delta)
-        print(f"DIFF: {current_map - self.best_map}")
-        print(f"MIN_DELTA: {self.min_delta}")
+        if self.verbose:
+            print(f"Early stopping: Current mAP ({metric_source}): {current_map:.4f}, Best: {self.best_map:.4f}, Diff: {current_map - self.best_map:.4f}, Min delta: {self.min_delta}")
+        
         if current_map > self.best_map + self.min_delta:
-            # We have an improvement
             self.best_map = current_map
             self.counter = 0
             if self.verbose:
-                print(f"Early stopping: mAP improved to {current_map:.4f}")
+                print(f"Early stopping: mAP improved to {current_map:.4f} using {metric_source} metric")
         else:
-            # No improvement
             self.counter += 1
             if self.verbose:
                 print(f"Early stopping: No improvement in mAP for {self.counter} epochs (best: {self.best_map:.4f}, current: {current_map:.4f})")
             
-            # Check if early stopping criteria met
-            if self.counter >= self.patience:
-                print(f"Early stopping triggered: No improvement above {self.min_delta} threshold for {self.patience} epochs")
-                # Request model to stop early
+        if self.counter >= self.patience:
+            print(f"Early stopping triggered: No improvement above {self.min_delta} threshold for {self.patience} epochs")
+            if self.model:
                 if self.model:
                     self.model.request_early_stop()
\ No newline at end of file

From 2050872912deaf66d9c28f1f87833c1bd6732e9d Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 23:35:34 +0000
Subject: [PATCH 14/20] updated based on max ema or regular

---
 rfdetr/util/early_stopping.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/rfdetr/util/early_stopping.py b/rfdetr/util/early_stopping.py
index a6984ea..db7dc9f 100644
--- a/rfdetr/util/early_stopping.py
+++ b/rfdetr/util/early_stopping.py
@@ -69,5 +69,4 @@ def update(self, log_stats):
         if self.counter >= self.patience:
             print(f"Early stopping triggered: No improvement above {self.min_delta} threshold for {self.patience} epochs")
             if self.model:
-                if self.model:
-                    self.model.request_early_stop()
\ No newline at end of file
+                self.model.request_early_stop()
\ No newline at end of file

From 6b8e1f89aa44aeb3def7b7fa16b42f3007ef8a60 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Mon, 31 Mar 2025 23:59:45 +0000
Subject: [PATCH 15/20] filesystem

---
 rfdetr/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rfdetr/main.py b/rfdetr/main.py
index 3c02b0d..780cf75 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -45,7 +45,7 @@
 import shutil
 from rfdetr.util.files import download_file
 import os
-if os.environ.get("USE_FILE_SYSTEM_SHARING", "0") == "1":
+if str(os.environ.get("USE_FILE_SYSTEM_SHARING", "False")).lower() in ["true", "1"]:
     import torch.multiprocessing
     torch.multiprocessing.set_sharing_strategy('file_system')
 

From 49c2c5e735267f69f8e7743059882994111bc3f4 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Tue, 1 Apr 2025 00:13:02 +0000
Subject: [PATCH 16/20] default 10 steps

---
 rfdetr/config.py | 2 +-
 rfdetr/main.py   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/rfdetr/config.py b/rfdetr/config.py
index ec06c7d..3f973e1 100644
--- a/rfdetr/config.py
+++ b/rfdetr/config.py
@@ -71,6 +71,6 @@ class TrainConfig(BaseModel):
     num_workers: int = 2
     weight_decay: float = 1e-4
     early_stopping: bool = True
-    early_stopping_patience: int = 5
+    early_stopping_patience: int = 10
     early_stopping_min_delta: float = 0.001
     early_stopping_use_ema: bool = False
diff --git a/rfdetr/main.py b/rfdetr/main.py
index 780cf75..52d954d 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -755,7 +755,7 @@ def get_args_parser():
     # Early stopping parameters
     parser.add_argument('--early_stopping', action='store_true',
                         help='Enable early stopping based on mAP improvement')
-    parser.add_argument('--early_stopping_patience', default=5, type=int,
+    parser.add_argument('--early_stopping_patience', default=10, type=int,
                         help='Number of epochs with no improvement after which training will be stopped')
     parser.add_argument('--early_stopping_min_delta', default=0.001, type=float,
                         help='Minimum change in mAP to qualify as an improvement')
@@ -893,7 +893,7 @@ def populate_args(
     lr_min_factor=0.0,
     # Early stopping parameters
     early_stopping=True,
-    early_stopping_patience=5,
+    early_stopping_patience=10,
     early_stopping_min_delta=0.001,
     early_stopping_use_ema=False,
     # Additional

From f8a5664a1593378db909e552ba9e0ecea10dc24b Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Tue, 1 Apr 2025 00:52:17 +0000
Subject: [PATCH 17/20] drop redundnant log

---
 rfdetr/util/misc.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rfdetr/util/misc.py b/rfdetr/util/misc.py
index 2587fcf..2b50d30 100644
--- a/rfdetr/util/misc.py
+++ b/rfdetr/util/misc.py
@@ -423,7 +423,6 @@ def save_on_master(obj, f, *args, **kwargs):
     """
     if is_main_process():
         if isinstance(obj, dict) and 'model' in obj:
-            print("Falling back to saving only model state_dict")
             torch.save({'model': obj['model']}, f, *args, **kwargs)
         else:
             raise ValueError("Invalid object type for saving")

From b67bf6e24a9d5ef0766ae55b928af0b3ad48b014 Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Tue, 1 Apr 2025 21:03:17 +0000
Subject: [PATCH 18/20] pull changes

---
 rfdetr/util/early_stopping.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/rfdetr/util/early_stopping.py b/rfdetr/util/early_stopping.py
index db7dc9f..30bf888 100644
--- a/rfdetr/util/early_stopping.py
+++ b/rfdetr/util/early_stopping.py
@@ -2,6 +2,10 @@
 Early stopping callback for RF-DETR training
 """
 
+from logging import getLogger
+
+logger = getLogger(__name__)
+
 class EarlyStoppingCallback:
     """
     Early stopping callback that monitors mAP and stops training if no improvement 
@@ -50,7 +54,7 @@ def update(self, log_stats):
             metric_source = "regular"
         else:
             if self.verbose:
-                print("Early stopping: No valid mAP metric found, skipping check")
+                raise ValueError("No valid mAP metric found!")
             return
         
         if self.verbose:
@@ -59,8 +63,7 @@ def update(self, log_stats):
         if current_map > self.best_map + self.min_delta:
             self.best_map = current_map
             self.counter = 0
-            if self.verbose:
-                print(f"Early stopping: mAP improved to {current_map:.4f} using {metric_source} metric")
+            logger.info(f"Early stopping: mAP improved to {current_map:.4f} using {metric_source} metric")
         else:
             self.counter += 1
             if self.verbose:

From 88de76391c73aafa78f11e19ac9edf3984e98b9d Mon Sep 17 00:00:00 2001
From: Matvezy <mpopov@trinity.edu>
Date: Tue, 1 Apr 2025 22:45:14 +0000
Subject: [PATCH 19/20] fix merging and model saving

---
 rfdetr/main.py      | 1 +
 rfdetr/util/misc.py | 6 +-----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/rfdetr/main.py b/rfdetr/main.py
index f4e24e8..8b683dd 100644
--- a/rfdetr/main.py
+++ b/rfdetr/main.py
@@ -892,6 +892,7 @@ def populate_args(
     early_stopping_patience=10,
     early_stopping_min_delta=0.001,
     early_stopping_use_ema=False,
+    gradient_checkpointing=False,
     # Additional
     subcommand=None,
     **extra_kwargs  # To handle any unexpected arguments
diff --git a/rfdetr/util/misc.py b/rfdetr/util/misc.py
index 1a077bb..e73bbe0 100644
--- a/rfdetr/util/misc.py
+++ b/rfdetr/util/misc.py
@@ -425,11 +425,7 @@ def save_on_master(obj, f, *args, **kwargs):
     Safely save objects, removing any callbacks that can't be pickled
     """
     if is_main_process():
-        if isinstance(obj, dict) and 'model' in obj:
-            torch.save({'model': obj['model']}, f, *args, **kwargs)
-        else:
-            raise ValueError("Invalid object type for saving")
-
+        torch.save(obj, f, *args, **kwargs)
 
 def init_distributed_mode(args):
     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:

From 575abe509845f84c884427f31faba413c8d1d95d Mon Sep 17 00:00:00 2001
From: Piotr Skalski <piotr.skalski92@gmail.com>
Date: Wed, 2 Apr 2025 17:02:13 +0200
Subject: [PATCH 20/20] Update rfdetr/config.py

---
 rfdetr/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rfdetr/config.py b/rfdetr/config.py
index 5b9316f..35745b6 100644
--- a/rfdetr/config.py
+++ b/rfdetr/config.py
@@ -71,7 +71,7 @@ class TrainConfig(BaseModel):
     use_ema: bool = True
     num_workers: int = 2
     weight_decay: float = 1e-4
-    early_stopping: bool = True
+    early_stopping: bool = False
     early_stopping_patience: int = 10
     early_stopping_min_delta: float = 0.001
     early_stopping_use_ema: bool = False