Add if it failed to Performance object

gabriel-piles · gabriel-piles · commit e657341bd206 · 2025-10-07T11:01:02.000+02:00
diff --git a/src/trainable_entity_extractor/adapters/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py b/src/trainable_entity_extractor/adapters/extractors/pdf_to_multi_option_extractor/PdfToMultiOptionExtractor.py
@@ -166,37 +166,6 @@ def get_predictions(
 
         return method.get_samples_for_context(prediction_samples_data), prediction
 
-    def get_best_method(
-        self, multi_option_data: ExtractionData, training_set: ExtractionData, test_set: ExtractionData
-    ) -> Optional[PdfMultiOptionMethod]:
-        best_method_instance = self.METHODS[0]
-        best_performance = 0
-        performance_summary = PerformanceSummary.from_extraction_data(
-            extractor_name=self.get_name(),
-            training_samples_count=len(training_set.samples),
-            testing_samples_count=len(test_set.samples),
-            extraction_data=multi_option_data,
-        )
-        for method in self.METHODS:
-            if self.extraction_identifier.is_training_canceled():
-                self.logger.log(self.extraction_identifier, "Training canceled")
-                return None
-
-            performance = self.get_method_performance(method, training_set, test_set)
-            performance_summary.add_performance(method.get_name(), performance)
-            if performance == 100:
-                self.logger.log(self.extraction_identifier, performance_summary.to_log())
-                self.extraction_identifier.save_content("performance_log.txt", performance_summary.to_log())
-                return method
-
-            if round(performance, 2) > best_performance:
-                best_performance = round(performance, 2)
-                best_method_instance = method
-
-        self.logger.log(self.extraction_identifier, performance_summary.to_log())
-        self.extraction_identifier.save_content("performance_log.txt", performance_summary.to_log())
-        return best_method_instance
-
     def get_method_performance(
         self, method: PdfMultiOptionMethod, train_set: ExtractionData, test_set: ExtractionData
     ) -> float:
diff --git a/src/trainable_entity_extractor/domain/PerformanceLog.py b/src/trainable_entity_extractor/domain/PerformanceLog.py
@@ -5,6 +5,7 @@ class PerformanceLog(BaseModel):
     method_name: str
     performance: float
     execution_seconds: int = 0
+    failed: bool = False
 
     @staticmethod
     def get_execution_time_string(execution_seconds: int):
diff --git a/src/trainable_entity_extractor/domain/PerformanceSummary.py b/src/trainable_entity_extractor/domain/PerformanceSummary.py
@@ -19,10 +19,13 @@ class PerformanceSummary(BaseModel):
     previous_timestamp: int = Field(default_factory=lambda: int(time()))
     empty_pdf_count: int = 0
 
-    def add_performance(self, method_name: str, performance: float):
+    def add_performance(self, method_name: str, performance: float, failed: bool = False):
         current_time = int(time())
         performance = PerformanceLog(
-            method_name=method_name, performance=performance, execution_seconds=int(current_time - self.previous_timestamp)
+            method_name=method_name,
+            performance=performance,
+            execution_seconds=int(current_time - self.previous_timestamp),
+            failed=failed,
         )
         self.previous_timestamp = current_time
         self.performances.append(performance)
@@ -36,7 +39,9 @@ def add_performance_from_sub_job(self, sub_job):
         else:
             performance_score = 0.0
 
-        self.add_performance(sub_job.extractor_job.method_name, performance_score)
+        failed = sub_job.result is None or (hasattr(sub_job.result, "failed") and sub_job.result.failed)
+
+        self.add_performance(sub_job.extractor_job.method_name, performance_score, failed)
 
     def to_log(self) -> str:
         total_time = sum(performance.execution_seconds for performance in self.performances)
diff --git a/src/trainable_entity_extractor/use_cases/OrchestratorUseCase.py b/src/trainable_entity_extractor/use_cases/OrchestratorUseCase.py
@@ -154,7 +154,7 @@ def _log_performance_summary(self, distributed_job: DistributedJob) -> None:
         performance_summary = PerformanceSummary.from_distributed_job(distributed_job)
 
         for sub_job in distributed_job.sub_jobs:
-            if sub_job.status == JobStatus.SUCCESS and sub_job.result:
+            if sub_job.status in [JobStatus.SUCCESS, JobStatus.FAILURE] and sub_job.result:
                 performance_summary.add_performance_from_sub_job(sub_job)
 
         summary_log = performance_summary.to_log()