diff --git a/label_studio/core/current_request.py b/label_studio/core/current_request.py index d4bff7b00458..0727f0d25447 100644 --- a/label_studio/core/current_request.py +++ b/label_studio/core/current_request.py @@ -1,5 +1,4 @@ from threading import local -from typing import Any from django.core.signals import request_finished from django.dispatch import receiver @@ -8,104 +7,18 @@ _thread_locals = local() -class CurrentContext: - @classmethod - def set(cls, key: str, value: Any, shared: bool = True) -> None: - if not hasattr(_thread_locals, 'data'): - _thread_locals.data = {} - if not hasattr(_thread_locals, 'job_data'): - _thread_locals.job_data = {} - - if shared: - _thread_locals.job_data[key] = value - else: - _thread_locals.data[key] = value - - @classmethod - def get(cls, key: str, default=None): - return getattr(_thread_locals, 'job_data', {}).get(key, getattr(_thread_locals, 'data', {}).get(key, default)) - - @classmethod - def set_request(cls, request): - _thread_locals.request = request - if request.user: - cls.set_user(request.user) - - @classmethod - def get_organization_id(cls): - return cls.get('organization_id') - - @classmethod - def set_organization_id(cls, organization_id: int): - cls.set('organization_id', organization_id) - - @classmethod - def get_user(cls): - return cls.get('user') - - @classmethod - def set_user(cls, user): - cls.set('user', user) - if getattr(user, 'active_organization_id', None): - cls.set_organization_id(user.active_organization_id) - - @classmethod - def set_fsm_disabled(cls, disabled: bool): - """ - Temporarily disable/enable FSM for the current thread. - - This is useful for test cleanup and bulk operations where FSM state - tracking is not needed and would cause performance issues. - - Args: - disabled: True to disable FSM, False to enable it - """ - cls.set('fsm_disabled', disabled) - - @classmethod - def is_fsm_disabled(cls) -> bool: - """ - Check if FSM is disabled for the current thread. - - Returns: - True if FSM is disabled, False otherwise - """ - return cls.get('fsm_disabled', False) - - @classmethod - def get_job_data(cls) -> dict: - """ - This data will be shared to jobs spawned by the current thread. - """ - return getattr(_thread_locals, 'job_data', {}) - - @classmethod - def clear(cls) -> None: - if hasattr(_thread_locals, 'data'): - delattr(_thread_locals, 'data') - - if hasattr(_thread_locals, 'job_data'): - delattr(_thread_locals, 'job_data') - - if hasattr(_thread_locals, 'request'): - del _thread_locals.request - - @classmethod - def get_request(cls): - return getattr(_thread_locals, 'request', None) - - def get_current_request(): """returns the request object for this thread""" - result = CurrentContext.get_request() + result = getattr(_thread_locals, 'request', None) return result class ThreadLocalMiddleware(CommonMiddleware): def process_request(self, request): - CurrentContext.set_request(request) + _thread_locals.request = request @receiver(request_finished) def clean_request(sender, **kwargs): - CurrentContext.clear() + if hasattr(_thread_locals, 'request'): + del _thread_locals.request diff --git a/label_studio/core/redis.py b/label_studio/core/redis.py index af28f70b9423..459d07e836bd 100644 --- a/label_studio/core/redis.py +++ b/label_studio/core/redis.py @@ -4,11 +4,9 @@ import sys from datetime import timedelta from functools import partial -from typing import Any import django_rq import redis -from core.current_request import CurrentContext from django.conf import settings from django_rq import get_connection from rq.command import send_stop_job_command @@ -82,40 +80,6 @@ def redis_connected(): return redis_healthcheck() -def _is_serializable(value: Any) -> bool: - """Check if a value can be serialized for job context.""" - return isinstance(value, (str, int, float, bool, list, dict, type(None))) - - -def _capture_context() -> dict: - """ - Capture the current context for passing to a job. - Returns a dictionary of context data that can be serialized. - """ - context_data = {} - - # Get user information - if user := CurrentContext.get_user(): - context_data['user_id'] = user.id - - # Get organization if set separately - if org_id := CurrentContext.get_organization_id(): - context_data['organization_id'] = org_id - - # If organization_id is not set, try to get it from the user, this ensures that we have an organization_id for the job - # And it prefers the original requesting user's organization_id over the current active organization_id of the user which could change during async jobs - if not org_id and user and hasattr(user, 'active_organization_id') and user.active_organization_id: - context_data['organization_id'] = user.active_organization_id - - # Get any custom context values (exclude non-serializable objects) - job_data = CurrentContext.get_job_data() - for key, value in job_data.items(): - if key not in ['user', 'request'] and _is_serializable(value): - context_data[key] = value - - return context_data - - def redis_get(key): if not redis_healthcheck(): return @@ -148,9 +112,7 @@ def redis_delete(key): def start_job_async_or_sync(job, *args, in_seconds=0, **kwargs): """ - Start job async with redis or sync if redis is not connected. - Automatically preserves context for async jobs and clears it after completion. - + Start job async with redis or sync if redis is not connected :param job: Job function :param args: Function arguments :param in_seconds: Job will be delayed for in_seconds @@ -160,29 +122,28 @@ def start_job_async_or_sync(job, *args, in_seconds=0, **kwargs): redis = redis_connected() and kwargs.get('redis', True) queue_name = kwargs.get('queue_name', 'default') - if 'queue_name' in kwargs: del kwargs['queue_name'] if 'redis' in kwargs: del kwargs['redis'] - job_timeout = None if 'job_timeout' in kwargs: job_timeout = kwargs['job_timeout'] del kwargs['job_timeout'] - if redis: - # Async execution with Redis - wrap job for context management + # Auto-capture request_id from thread local and pass it via job meta try: - context_data = _capture_context() + from label_studio.core.current_request import _thread_locals - if context_data: + request_id = getattr(_thread_locals, 'request_id', None) + if request_id: + # Store in job meta for worker access meta = kwargs.get('meta', {}) - # Store context data in job meta for worker access - meta.update(context_data) + meta['request_id'] = request_id kwargs['meta'] = meta except Exception: - logger.info(f'Failed to capture context for job {job.__name__} on queue {queue_name}') + # Fail silently if no request context + pass try: args_info = _truncate_args_for_logging(args, kwargs) @@ -193,7 +154,6 @@ def start_job_async_or_sync(job, *args, in_seconds=0, **kwargs): enqueue_method = queue.enqueue if in_seconds > 0: enqueue_method = partial(queue.enqueue_in, timedelta(seconds=in_seconds)) - job = enqueue_method( job, *args, @@ -204,10 +164,8 @@ def start_job_async_or_sync(job, *args, in_seconds=0, **kwargs): return job else: on_failure = kwargs.pop('on_failure', None) - try: - result = job(*args, **kwargs) - return result + return job(*args, **kwargs) except Exception: exc_info = sys.exc_info() if on_failure: diff --git a/label_studio/core/settings/base.py b/label_studio/core/settings/base.py index 40cdcc042598..5e890b6133b2 100644 --- a/label_studio/core/settings/base.py +++ b/label_studio/core/settings/base.py @@ -216,7 +216,6 @@ 'rest_framework.authtoken', 'rest_framework_simplejwt.token_blacklist', 'drf_generators', - 'fsm', # MUST be before apps that register FSM transitions (projects, tasks) 'core', 'users', 'organizations', @@ -233,6 +232,7 @@ 'ml_model_providers', 'jwt_auth', 'session_policy', + 'fsm', ] MIDDLEWARE = [ diff --git a/label_studio/core/tests/test_models.py b/label_studio/core/tests/test_models.py index 69d06315ee73..40b55f907008 100644 --- a/label_studio/core/tests/test_models.py +++ b/label_studio/core/tests/test_models.py @@ -18,9 +18,6 @@ def _assert_delete_and_restore_equal(self, drow, original): original_dict.pop('_state') original_created_at = original_dict.pop('created_at') original_updated_at = original_dict.pop('updated_at') - # Pop _original_values - this is an internal FSM field that's recreated on __init__ - # and shouldn't be compared - original_dict.pop('_original_values', None) original.delete() for deserialized_object in serializers.deserialize('json', json.dumps([drow.data])): @@ -31,9 +28,6 @@ def _assert_delete_and_restore_equal(self, drow, original): new_dict.pop('_state') new_created_at = new_dict.pop('created_at') new_updated_at = new_dict.pop('updated_at') - # Pop _original_values - this is an internal FSM field that's recreated on __init__ - # and shouldn't be compared - new_dict.pop('_original_values', None) assert new_dict == original_dict # Datetime loses microsecond precision, so we can't compare them directly diff --git a/label_studio/data_manager/actions/basic.py b/label_studio/data_manager/actions/basic.py index 4dfc5b444246..367db5c363d8 100644 --- a/label_studio/data_manager/actions/basic.py +++ b/label_studio/data_manager/actions/basic.py @@ -99,9 +99,7 @@ def delete_tasks_annotations(project, queryset, **kwargs): drafts = drafts.filter(user=int(annotator_id)) project.summary.remove_created_drafts_and_labels(drafts) - # count before delete to return the number of deleted items, not including cascade deletions - count = annotations.count() - annotations.delete() + count, _ = annotations.delete() drafts.delete() # since task-level annotation drafts will not have been deleted by CASCADE emit_webhooks_for_instance(project.organization, project, WebhookAction.ANNOTATIONS_DELETED, annotations_ids) request = kwargs['request'] diff --git a/label_studio/fsm/README.md b/label_studio/fsm/README.md index e4b85681a817..d2e45c3d85c4 100644 --- a/label_studio/fsm/README.md +++ b/label_studio/fsm/README.md @@ -56,7 +56,7 @@ class OrderStateChoices(models.TextChoices): ### 2. Create State Model ```python -from fsm.state_models import BaseState +from fsm.models import BaseState from fsm.registry import register_state_model @register_state_model('order') diff --git a/label_studio/fsm/annotation_transitions.py b/label_studio/fsm/annotation_transitions.py deleted file mode 100644 index 80c0cbe85f8c..000000000000 --- a/label_studio/fsm/annotation_transitions.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -FSM Transitions for Annotation model. - -This module defines declarative transitions for the Annotation entity. -Annotation transitions can update related task states via post_transition_hooks. -""" - -from typing import Any, Dict - -from fsm.registry import register_state_transition -from fsm.state_choices import AnnotationStateChoices -from fsm.transitions import ModelChangeTransition, StateModelType, TransitionContext - - -@register_state_transition('annotation', 'annotation_submitted', triggers_on_create=True, triggers_on_update=False) -class AnnotationSubmittedTransition(ModelChangeTransition): - """ - Transition when an annotation is submitted. - - This is the default transition for newly created annotations. - - Trigger: Automatically on creation only (triggers_on_create=True, triggers_on_update=False) - """ - - @property - def target_state(self) -> str: - return AnnotationStateChoices.SUBMITTED - - def get_reason(self, context: TransitionContext) -> str: - """Return detailed reason for annotation submission.""" - return 'Annotation submitted for review' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - """Execute annotation submission transition.""" - annotation = context.entity - - return { - 'reason': 'Annotation submitted for review', - 'task_id': annotation.task_id, - 'project_id': annotation.project_id, - 'completed_by_id': annotation.completed_by_id, - 'lead_time': annotation.lead_time, - } - - def post_transition_hook(self, context: TransitionContext, state_record: StateModelType) -> None: - """ - Post-transition hook for annotation submission. - - Updates task state to COMPLETED when annotation is submitted. - Then updates project state based on task completion status. - Handles "cold start" scenarios where task may not have state record yet. - """ - from fsm.project_transitions import update_project_state_after_task_change - from fsm.state_choices import TaskStateChoices - from fsm.state_manager import StateManager - from fsm.utils import get_or_initialize_state - - annotation = context.entity - task = annotation.task - project = annotation.project - - # Get current task state (initialize if needed) - current_task_state = StateManager.get_current_state_value(task) - - if current_task_state is None: - # Task has no state record - initialize it - # Since annotation was just submitted, task should be COMPLETED - current_task_state = get_or_initialize_state( - task, user=context.current_user, inferred_state=TaskStateChoices.COMPLETED - ) - - # Transition task to COMPLETED if not already - if current_task_state != TaskStateChoices.COMPLETED: - StateManager.execute_transition(entity=task, transition_name='task_completed', user=context.current_user) - - # Update project state based on task changes - update_project_state_after_task_change(project, user=context.current_user) - - -@register_state_transition( - 'annotation', 'annotation_updated', triggers_on_create=False, triggers_on_update=True, force_state_record=True -) -class AnnotationUpdatedTransition(ModelChangeTransition): - """ - Transition when an annotation is updated. - - Updates keep the annotation in SUBMITTED state but create audit trail records. - - Trigger: On update (triggers_on_create=False, triggers_on_update=True, force_state_record=True) - """ - - @property - def target_state(self) -> str: - return AnnotationStateChoices.SUBMITTED - - def get_reason(self, context: TransitionContext) -> str: - """Return detailed reason for annotation update.""" - return 'Annotation updated' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - """Execute annotation update transition.""" - annotation = context.entity - - return { - 'reason': 'Annotation updated', - 'task_id': annotation.task_id, - 'project_id': annotation.project_id, - 'updated_by_id': getattr(annotation, 'updated_by_id', None), - 'changed_fields': list(self.changed_fields.keys()) if self.changed_fields else [], - } - - def post_transition_hook(self, context: TransitionContext, state_record: StateModelType) -> None: - """Post-transition hook for annotation updates.""" - pass diff --git a/label_studio/fsm/apps.py b/label_studio/fsm/apps.py index 3ccb6275306d..fd08fc0d6da2 100644 --- a/label_studio/fsm/apps.py +++ b/label_studio/fsm/apps.py @@ -11,48 +11,3 @@ class FsmConfig(AppConfig): default_auto_field = 'django.db.models.UUIDField' name = 'fsm' verbose_name = 'Label Studio FSM' - - def ready(self): - """ - Import models and state_choices to ensure registration happens on Django startup. - - The @register_state_model and @register_state_choices decorators run during - module import, so we must import these modules to populate the registries. - This ensures state models are available throughout the application lifecycle. - - CENTRALIZED TRANSITION REGISTRATION: - This is the ONLY place in LSO where FSM transitions should be imported. - When running LSE, transitions are skipped here and registered in lse_fsm/apps.py instead. - """ - from core.utils.common import is_community - - # Always import base models and state_choices (needed for registry) - from . import ( - models, # noqa: F401 - FsmHistoryStateModel base class - state_choices, # noqa: F401 - State choice definitions - ) - - # Import state models only in community edition - # LSE will register its own extended state models in lse_fsm/apps.py - if is_community(): - from . import ( - state_models, # noqa: F401 - OSS state models (TaskState, AnnotationState, etc.) - ) - - logger.debug('FSM: Registered OSS state models') - - logger.debug('FSM models and state choices registered') - - # Only import LSO transitions when running community edition - # When running LSE, skip these entirely - LSE provides its own transitions - if is_community(): - # Import all LSO transitions centrally from fsm/ - ONLY place to do this - from . import ( - annotation_transitions, # noqa: F401 - project_transitions, # noqa: F401 - task_transitions, # noqa: F401 - ) - - logger.info('LSO FSM: Registered LSO transitions (community edition)') - else: - logger.info('LSO FSM: Skipping LSO transitions (running LSE)') diff --git a/label_studio/fsm/functions.py b/label_studio/fsm/functions.py deleted file mode 100644 index b2ea11b872c8..000000000000 --- a/label_studio/fsm/functions.py +++ /dev/null @@ -1,127 +0,0 @@ -""" -FSM utility functions for backfilling and managing state transitions. - -This module contains reusable functions for FSM state management that are -used across different parts of the codebase. -""" - -import logging - -logger = logging.getLogger(__name__) - - -def backfill_fsm_states_for_tasks(storage_id, tasks_created, link_class): - """ - Backfill initial FSM states for tasks created during storage sync. - - This function creates initial CREATED state records for all tasks that were - created during a storage sync operation. It's designed to be called after - tasks have been successfully created and linked to storage. - - Args: - storage_id: The ID of the storage that created the tasks - tasks_created: Number of tasks that were created - link_class: The link model class (e.g., S3ImportStorageLink) to query tasks - - Note: - - CurrentContext must be available before calling this function - - This function is safe to call in both LSO and LSE environments - - Failures are logged but don't propagate to prevent breaking storage sync - """ - if tasks_created <= 0: - return - - try: - from lse_fsm.state_inference import backfill_state_for_entity - from tasks.models import Task - - # Get tasks created in this sync - task_ids = list( - link_class.objects.filter(storage=storage_id) - .order_by('-created_at')[:tasks_created] - .values_list('task_id', flat=True) - ) - - tasks = Task.objects.filter(id__in=task_ids) - - logger.info(f'Storage sync: creating initial FSM states for {len(task_ids)} tasks') - - # Backfill initial CREATED state for each task - for task in tasks: - backfill_state_for_entity(task, 'task', create_record=True) - - logger.info(f'Storage sync: FSM states created for {len(task_ids)} tasks') - except ImportError: - # LSE not available (OSS), skip FSM sync - logger.debug('LSE not available, skipping FSM state backfill for storage sync') - except Exception as e: - # Don't fail storage sync if FSM sync fails - logger.error(f'FSM sync after storage sync failed: {e}', exc_info=True) - - -def update_task_state_after_annotation_deletion(task, project): - """ - Update task FSM state after an annotation has been deleted. - - This function ensures that the task's FSM state reflects its current labeled status - after an annotation has been deleted. It will: - 1. Check if FSM is enabled - 2. Get the current task state - 3. Determine the expected state based on task.is_labeled - 4. Execute appropriate transition if state doesn't match - 5. Update project state if task state was changed - - Args: - task: The Task instance whose annotation was deleted - project: The Project instance containing the task - - Note: - - Requires CurrentContext to be set with a valid user - - Failures are logged but don't propagate to prevent breaking annotation deletion - - Will initialize state if task has no FSM state record yet - """ - from core.current_request import CurrentContext - from fsm.project_transitions import update_project_state_after_task_change - from fsm.state_choices import TaskStateChoices - from fsm.state_manager import get_state_manager - from fsm.utils import is_fsm_enabled - - # Get user from context for FSM - user = CurrentContext.get_user() - - if not is_fsm_enabled(user=user): - return - - try: - StateManager = get_state_manager() - - # Get current state - may be None if entity has no state record yet - current_task_state = StateManager.get_current_state_value(task) - - # Determine what the state should be based on task's labeled status - expected_state = TaskStateChoices.COMPLETED if task.is_labeled else TaskStateChoices.IN_PROGRESS - - # If no state exists, initialize it based on current condition - if current_task_state is None: - # Initialize state for entities that existed before FSM was deployed - if task.is_labeled: - StateManager.execute_transition(entity=task, transition_name='task_completed', user=user) - else: - StateManager.execute_transition(entity=task, transition_name='task_in_progress', user=user) - # Update project state based on task changes - update_project_state_after_task_change(project, user=user) - # If state exists but doesn't match the task's labeled status, fix it - elif current_task_state != expected_state: - if expected_state == TaskStateChoices.IN_PROGRESS: - StateManager.execute_transition(entity=task, transition_name='task_in_progress', user=user) - else: - StateManager.execute_transition(entity=task, transition_name='task_completed', user=user) - # Update project state based on task changes - update_project_state_after_task_change(project, user=user) - - except Exception as e: - # Final safety net - log but don't break annotation deletion - logger.warning( - f'FSM state update failed during annotation deletion: {str(e)}', - extra={'task_id': task.id, 'project_id': project.id}, - ) diff --git a/label_studio/fsm/models.py b/label_studio/fsm/models.py index 31def79172af..2d154baea65e 100644 --- a/label_studio/fsm/models.py +++ b/label_studio/fsm/models.py @@ -1,582 +1,349 @@ """ -FSM Base Model for Label Studio. - -This module contains only FsmHistoryStateModel - the base class that models -inherit from to get FSM integration. State model definitions are in state_models.py -to avoid registration issues in LSE. +Core FSM models for Label Studio. """ -import logging -from typing import Any, Dict +from datetime import datetime +from typing import Any, Dict, Optional +from django.conf import settings from django.db import models +from django.db.models import QuerySet, UUIDField +from fsm.registry import register_state_model +from fsm.state_choices import AnnotationStateChoices, ProjectStateChoices, TaskStateChoices +from fsm.utils import UUID7Field, generate_uuid7, timestamp_from_uuid7 -logger = logging.getLogger(__name__) - - -# ============================================================================= -# FsmHistoryStateModel - Base Model for FSM Integration -# ============================================================================= - -class FsmHistoryStateModel(models.Model): +class BaseState(models.Model): """ - FSM History State Model - Base class for models that participate in FSM state tracking. - - This class provides explicit FSM integration through model lifecycle hooks, - replacing the implicit signal-based approach with predictable, testable behavior. - - Key features: - - Intercepts save operations to trigger FSM transitions - - Tracks field changes for transition logic - - Maintains CurrentContext for user/org tracking - - Provides explicit transition determination - - Fails gracefully - FSM errors don't break saves - - Usage: - class Task(FsmHistoryStateModel): - # ... model fields ... - - def _determine_fsm_transition(self) -> Optional[str]: - if self._state.adding: # Creating new instance - return 'task_created' - - changed = self._get_changed_fields() - if 'is_labeled' in changed and changed['is_labeled'][1]: - return 'task_labeled' - - return None - - def _get_fsm_transition_data(self) -> Dict[str, Any]: - return { - 'project_id': self.project_id, - 'overlap': self.overlap - } + Abstract base class for all state models using UUID7 for optimal time-series performance. + + This is the core of the FSM system, providing: + - UUID7 primary key with natural time ordering + - Standard state transition metadata + - Audit trail information + - Context data storage + - Performance-optimized helper methods + + Benefits of this architecture: + - INSERT-only operations for maximum concurrency + - Natural time ordering eliminates need for created_at indexes + - Global uniqueness enables distributed system support + - Time-based partitioning for large amounts of state records with consistent performance + - Complete audit trail by design """ + # UUID7 Primary Key - provides natural time ordering and global uniqueness + id = UUIDField( + primary_key=True, + default=generate_uuid7, + editable=False, + help_text='UUID7 provides natural time ordering and global uniqueness', + ) + + # Optional organization field - can be overridden or left null + # Applications can add their own organization/tenant fields as needed + organization_id = models.PositiveIntegerField( + null=True, + blank=True, + db_index=True, + help_text='Organization ID that owns this state record (for multi-tenant applications)', + ) + + # Core State Fields + state = models.CharField(max_length=50, db_index=True, help_text='Current state of the entity') + previous_state = models.CharField( + max_length=50, null=True, blank=True, help_text='Previous state before this transition' + ) + + # Transition Metadata + transition_name = models.CharField( + max_length=100, + null=True, + blank=True, + help_text='Name of the transition method that triggered this state change', + ) + triggered_by = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + help_text='User who triggered this state transition', + ) + + # Context & Audit + context_data = models.JSONField( + default=dict, help_text='Additional context data for this transition (e.g., validation results, external IDs)' + ) + reason = models.TextField(blank=True, help_text='Human-readable reason for this state transition') + + # Timestamp (redundant with UUID7 but useful for human readability) + created_at = models.DateTimeField( + auto_now_add=True, + db_index=False, # UUID7 provides natural ordering, no index needed + help_text='Human-readable timestamp for debugging (UUID7 id contains precise timestamp)', + ) + class Meta: abstract = True - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # Track original field values for change detection - # Initialize as empty dict for safe access - self._original_values = {} - - @classmethod - def from_db(cls, db, field_names, values): + # UUID7 provides natural ordering, reducing index requirements + ordering = ['-id'] # Most recent first + get_latest_by = 'id' + + def __str__(self): + entity_id = getattr(self, f'{self._get_entity_name()}_id', 'unknown') + return f'{self._get_entity_name().title()} {entity_id}: {self.previous_state} → {self.state}' + + @property + def entity(self): + """Get the related entity object""" + entity_name = self._get_entity_name() + return getattr(self, entity_name) + + @property + def timestamp_from_uuid(self) -> datetime: + """Extract timestamp from UUID7 ID""" + return timestamp_from_uuid7(self.id) + + @property + def is_terminal_state(self) -> bool: """ - Override from_db to capture original values when loading from database. + Check if this is a terminal state (no outgoing transitions). - Django calls this method instead of __init__ when loading models from the database. - We need to capture the original field values here for change detection. - """ - instance = super().from_db(db, field_names, values) - # Initialize as empty dict for safe access - instance._original_values = {} - # Capture original values immediately after loading from DB - # This ensures we have the baseline for change detection on the first save - instance._capture_original_values() - return instance - - def _capture_original_values(self): + Override in subclasses with specific terminal states. """ - Capture current field values for change detection. + return False - This allows us to detect which fields changed during save operations, - which is crucial for determining appropriate FSM transitions. + def _get_entity_name(self) -> str: + """Extract entity name from model name (e.g., TaskState → task)""" + model_name = self.__class__.__name__ + if model_name.endswith('State'): + return model_name[:-5].lower() + return 'entity' - For ForeignKey fields, we store the PK instead of the object to avoid - circular references and recursion issues. - - Deferred fields (not yet loaded from DB) are skipped to prevent infinite - recursion when accessing them would trigger refresh_from_db(). + @classmethod + def get_current_state(cls, entity) -> Optional['BaseState']: + """ + Get current state using UUID7 natural ordering. - This is called after each save to refresh the baseline for the next save. + Uses UUID7's natural time ordering to efficiently find the latest state + without requiring created_at indexes or complex queries. """ - self._original_values = {} + entity_field = f'{cls._get_entity_field_name()}' + return cls.objects.filter(**{entity_field: entity}).order_by('-id').first() - # Get deferred fields to avoid triggering recursive database loads - # Deferred fields haven't been loaded yet, so they can't have changed - deferred_fields = self.get_deferred_fields() + @classmethod + def get_current_state_value(cls, entity) -> Optional[str]: + """ + Get current state value as string using UUID7 natural ordering. - for field in self._meta.fields: - # Skip deferred fields to prevent recursion via refresh_from_db() - if field.attname in deferred_fields: - continue + Uses UUID7's natural time ordering to efficiently find the latest state + without requiring created_at indexes or complex queries. + """ + entity_field = f'{cls._get_entity_field_name()}' + current_state = cls.objects.filter(**{entity_field: entity}).order_by('-id').first() + return current_state.state if current_state else None - value = getattr(self, field.name, None) - # For ForeignKey fields, store PK to avoid circular references - if field.is_relation and field.many_to_one and value is not None: - self._original_values[field.name] = value.pk if hasattr(value, 'pk') else value - else: - self._original_values[field.name] = value + @classmethod + def get_state_history(cls, entity, limit: int = 100) -> QuerySet['BaseState']: + """Get complete state history for an entity""" + entity_field = f'{cls._get_entity_field_name()}' + return cls.objects.filter(**{entity_field: entity}).order_by('-id')[:limit] - def __reduce_ex__(self, protocol): + @classmethod + def get_states_in_range(cls, entity, start_time: datetime, end_time: datetime) -> QuerySet['BaseState']: """ - Override serialization to exclude internal FSM tracking fields. + Efficient time-range queries using UUID7. - Django's serialization uses pickle which calls __reduce_ex__. - We exclude _original_values since it's only needed for runtime - change detection, not for serialization/restoration. + Uses UUID7's embedded timestamp for direct time-based filtering + without requiring timestamp indexes. """ - # Get the default reduction - reduction = super().__reduce_ex__(protocol) - - # reduction is a tuple: (callable, args, state, ...) - # state is the instance __dict__ - if len(reduction) >= 3 and isinstance(reduction[2], dict): - state = reduction[2].copy() - # Remove internal FSM fields from serialization - state.pop('_original_values', None) - # Return new reduction with cleaned state - return (reduction[0], reduction[1], state) + reduction[3:] + entity_field = f'{cls._get_entity_field_name()}' + queryset = cls.objects.filter(**{entity_field: entity}) + return UUID7Field.filter_by_time_range(queryset, start_time, end_time).order_by('id') - return reduction + @classmethod + def get_states_since(cls, entity, since: datetime): + """Get all states since a specific timestamp""" + entity_field = f'{cls._get_entity_field_name()}' + queryset = cls.objects.filter(**{entity_field: entity}) + return UUID7Field.filter_since_time(queryset, since).order_by('id') - def _get_changed_fields(self) -> Dict[str, tuple]: + @classmethod + def get_denormalized_fields(cls, entity) -> Dict[str, Any]: """ - Get fields that changed since the last load/save. + Get denormalized fields to include in the state record. + + Override this method in subclasses to provide denormalized data + that should be stored with each state transition for performance + optimization and auditing purposes. + + Args: + entity: The entity instance being transitioned Returns: - Dict mapping field names to (old_value, new_value) tuples - Note: For ForeignKey fields, old_value will be the PK, new_value will be the object + Dictionary of field names to values that should be stored + in the state record Example: - changed = self._get_changed_fields() - if 'is_labeled' in changed: - old_val, new_val = changed['is_labeled'] - if not old_val and new_val: - # Task became labeled - pass - """ - # If no original values captured yet, nothing has changed - # Use hasattr check to handle cases where _original_values doesn't exist - if not hasattr(self, '_original_values') or not self._original_values: - return {} - - changed = {} - for field in self._meta.fields: - # Only check fields that were captured in _original_values - # Fields that were deferred during capture won't be in _original_values - # and should be considered unchanged - if field.name not in self._original_values: - continue - - old_value = self._original_values[field.name] - new_value = getattr(self, field.name, None) - - # For ForeignKey fields, old_value is stored as PK, so compare PK to PK - if field.is_relation and field.many_to_one: - new_pk = new_value.pk if new_value and hasattr(new_value, 'pk') else new_value - if old_value != new_pk: - changed[field.name] = (old_value, new_value) - elif old_value != new_value: - changed[field.name] = (old_value, new_value) - return changed - - def _determine_fsm_transitions(self, is_creating: bool = None, changed_fields: dict = None) -> list: + @classmethod + def get_denormalized_fields(cls, entity): + return { + 'project_id': entity.project_id, + 'organization_id': entity.project.organization_id, + 'task_type': entity.task_type, + 'priority': entity.priority + } """ - Determine which FSM transitions should be triggered based on model state. + return {} - This method automatically discovers registered transitions for this entity - and checks which ones should execute based on their trigger metadata. + @classmethod + def _get_entity_field_name(cls) -> str: + """Get the foreign key field name for the entity""" + model_name = cls.__name__ + if model_name.endswith('State'): + return model_name[:-5].lower() + return 'entity' - Args: - is_creating: Whether this is a creation. If None, checks self._state.adding - changed_fields: Dict of changed fields. If None, computes them. - Override this method ONLY if you need custom transition logic beyond - what the declarative triggers provide. +# Core state models for basic Label Studio entities - Returns: - List of transition names to execute (in order) - Note: - In most cases, you don't need to override this. Just register transitions - with appropriate trigger metadata using @register_state_transition decorator. +@register_state_model('task') +class TaskState(BaseState): + """ + Core task state tracking for Label Studio. + Provides basic task state management with: + - Simple 3-state workflow (CREATED → IN_PROGRESS → COMPLETED) + - High-performance queries with UUID7 ordering + """ - Example of custom override (if needed): - def _determine_fsm_transitions(self, is_creating=None, changed_fields=None) -> list: - # Get default transitions - transitions = super()._determine_fsm_transitions(is_creating, changed_fields) + # Entity Relationship + task = models.ForeignKey('tasks.Task', related_name='fsm_states', on_delete=models.CASCADE) - # Add custom logic - if self.some_complex_condition(): - transitions.append('custom_transition') + # Override state field to add choices constraint + state = models.CharField(max_length=50, choices=TaskStateChoices.choices, db_index=True) - return transitions - """ - from fsm.registry import transition_registry - - entity_name = self._meta.model_name - - # Use provided is_creating, or fall back to checking _state.adding - if is_creating is None: - is_creating = self._state.adding - - # Use provided changed_fields, or compute them - if changed_fields is None: - changed_fields = {} if is_creating else self._get_changed_fields() - - # Debug logging for transition determination - if entity_name == 'project' and not is_creating: - logger.debug( - f'FSM: Determining transitions for {entity_name}', - extra={ - 'entity_id': self.pk, - 'is_creating': is_creating, - 'changed_fields': list(changed_fields.keys()), - 'changed_fields_detail': changed_fields, - }, - ) - - # Get all registered transitions for this entity - registered_transitions = transition_registry.get_transitions_for_entity(entity_name) - if not registered_transitions: - return [] - - transitions_to_execute = [] - - for transition_name, transition_class in registered_transitions.items(): - # Check if this transition should execute based on trigger metadata - should_execute = False - - # Check creation trigger - if is_creating and getattr(transition_class, '_triggers_on_create', False): - should_execute = True - - # Check update triggers - elif not is_creating and getattr(transition_class, '_triggers_on_update', True): - trigger_fields = getattr(transition_class, '_trigger_fields', []) - - # If no specific fields, check if transition has custom logic - if not trigger_fields: - # Let the transition's should_execute method decide - # We'll add it and let it validate later - should_execute = True - else: - # Check if any trigger fields changed - for field in trigger_fields: - if field in changed_fields: - should_execute = True - break - - # If trigger metadata says we should execute, also check the transition's should_execute() method - if should_execute: - try: - # Instantiate the transition to check should_execute() if it exists - # We need a minimal context to check should_execute - from fsm.transitions import TransitionContext - - # Create a temporary transition instance with full context - # Convert changed_fields to the format expected by ModelChangeTransition - formatted_changed_fields = {k: {'old': v[0], 'new': v[1]} for k, v in changed_fields.items()} - - # Create transition with all relevant data for should_execute() check - temp_transition = transition_class( - is_creating=is_creating, changed_fields=formatted_changed_fields - ) - - # Check if should_execute is overridden (not using the base implementation) - # The base implementation always returns True, so we only check if it's been customized - from fsm.transitions import BaseTransition - - should_execute_method = getattr(type(temp_transition), 'should_execute', None) - base_should_execute = getattr(BaseTransition, 'should_execute', None) - - # Only call should_execute if it's been overridden in the subclass - if should_execute_method and should_execute_method != base_should_execute: - # Build a minimal context for should_execute check - # NOTE: We skip getting current state here to avoid recursion issues - # The actual state will be retrieved during transition execution - context = TransitionContext( - entity=self, - current_user=None, # Will be set properly during execution - current_state_object=None, # Skip to avoid recursion - current_state=None, # Skip to avoid recursion - target_state=temp_transition.target_state, - organization_id=getattr(self, 'organization_id', None), - ) - - # Call should_execute to do final filtering - if not temp_transition.should_execute(context): - should_execute = False - logger.debug( - f'FSM: Transition {transition_name} filtered out by should_execute()', - extra={ - 'entity_type': entity_name, - 'entity_id': self.pk, - 'transition_name': transition_name, - }, - ) - except Exception as e: - # If should_execute check fails, log but still add the transition - # Let it fail during actual execution with proper error handling - logger.debug( - f'FSM: Error checking should_execute for {transition_name}: {e}', - extra={ - 'entity_type': entity_name, - 'entity_id': self.pk, - 'transition_name': transition_name, - 'error': str(e), - }, - ) - - if should_execute: - transitions_to_execute.append(transition_name) - - return transitions_to_execute - - def _get_fsm_transition_data(self) -> Dict[str, Any]: - """ - Get data to pass to the FSM transition. + project_id = models.PositiveIntegerField( + db_index=True, help_text='From task.project_id - denormalized for performance' + ) - Override in subclasses to provide transition-specific data that should - be stored in the state record's context_data field. + class Meta: + app_label = 'fsm' + indexes = [ + # Critical: Latest state lookup (current state determined by latest UUID7 id) + # Index with DESC order explicitly supports ORDER BY id DESC queries + models.Index(fields=['task_id', '-id'], name='task_current_state_idx'), + # Reporting and filtering + models.Index(fields=['project_id', 'state', '-id'], name='task_project_state_idx'), + models.Index(fields=['organization_id', 'state', '-id'], name='task_org_reporting_idx'), + # History queries + models.Index(fields=['task_id', 'id'], name='task_history_idx'), + ] + # No constraints needed - INSERT-only approach + ordering = ['-id'] - Returns: - Dictionary of data to pass to transition + @classmethod + def get_denormalized_fields(cls, entity): + """Get denormalized fields for TaskState creation""" + return { + 'project_id': entity.project_id, + } - Example: - def _get_fsm_transition_data(self) -> Dict[str, Any]: - return { - 'project_id': self.project_id, - 'completed_by_id': self.completed_by_id, - 'annotation_count': self.annotations.count() - } - """ - return {} + @property + def is_terminal_state(self) -> bool: + """Check if this is a terminal task state""" + return self.state == TaskStateChoices.COMPLETED - def _should_execute_fsm(self) -> bool: - """ - Check if FSM processing should be executed. - Returns False if: - - Feature flag is disabled - - User context is unavailable (tests must set CurrentContext explicitly) - - Explicitly skipped via instance attribute +@register_state_model('annotation') +class AnnotationState(BaseState): + """ + Core annotation state tracking for Label Studio. + Provides basic annotation state management with: + - Simple 3-state workflow (DRAFT → SUBMITTED → COMPLETED) + """ - Returns: - True if FSM should execute, False otherwise + # Entity Relationship + annotation = models.ForeignKey('tasks.Annotation', on_delete=models.CASCADE, related_name='fsm_states') - Note: - CurrentContext is available in web requests and background jobs. - In tests, it must be set explicitly for the user/organization. - """ - # Check for instance-level skip flag - if getattr(self, '_skip_fsm', False): - return False - - # Use the centralized FSM enabled check from utils - # This handles feature flag and thread-local overrides - try: - from core.current_request import CurrentContext - from fsm.utils import is_fsm_enabled - - # Get user from CurrentContext - don't fall back to AnonymousUser - # If no user in context (e.g., tests without explicit setup), return False - try: - user = CurrentContext.get_user() - user_type = type(user).__name__ if user else None - user_authenticated = getattr(user, 'is_authenticated', None) if user else None - logger.info( - f'FSM check for {self.__class__.__name__}(id={getattr(self, "pk", None)}): ' - f'user_type={user_type}, authenticated={user_authenticated}' - ) - if user is None: - logger.info(f'FSM check: User is None, skipping FSM for {self.__class__.__name__}') - return False - # Check if user is authenticated (not AnonymousUser) - if not user.is_authenticated: - logger.info( - f'FSM check: User {user_type} not authenticated, skipping FSM for {self.__class__.__name__}' - ) - return False - except Exception: - # CurrentContext not available or no user set - # This is expected in tests that don't set up context - logger.info(f'FSM check: Exception getting user, skipping FSM for {self.__class__.__name__}') - return False - - return is_fsm_enabled(user=user) - except Exception as e: - logger.debug(f'FSM check failed: {e}') - return False - - def save(self, *args, **kwargs): - """ - Override save to trigger FSM transitions based on model changes. + # Override state field to add choices constraint + state = models.CharField(max_length=50, choices=AnnotationStateChoices.choices, db_index=True) - This method: - 1. Captures the current state (creating vs updating) - 2. Performs the actual database save - 3. Determines if an FSM transition is needed - 4. Executes the transition if needed - 5. Gracefully handles FSM errors without breaking the save + # Denormalized fields for performance (avoid JOINs in common queries) + task_id = models.PositiveIntegerField( + db_index=True, help_text='From annotation.task_id - denormalized for performance' + ) + project_id = models.PositiveIntegerField( + db_index=True, help_text='From annotation.task.project_id - denormalized for performance' + ) + completed_by_id = models.PositiveIntegerField( + null=True, db_index=True, help_text='From annotation.completed_by_id - denormalized for performance' + ) - Args: - *args: Positional arguments passed to super().save() - **kwargs: Keyword arguments passed to super().save() - Special kwarg: skip_fsm=True to bypass FSM processing + class Meta: + app_label = 'fsm' + indexes = [ + # Critical: Latest state lookup + models.Index(fields=['annotation_id', '-id'], name='anno_current_state_idx'), + # Filtering and reporting + models.Index(fields=['task_id', 'state', '-id'], name='anno_task_state_idx'), + models.Index(fields=['completed_by_id', 'state', '-id'], name='anno_user_report_idx'), + models.Index(fields=['project_id', 'state', '-id'], name='anno_project_report_idx'), + ] + ordering = ['-id'] - Returns: - Whatever super().save() returns - """ - # Check for explicit FSM skip flag - skip_fsm = kwargs.pop('skip_fsm', False) - - # Also check CurrentContext for skip_fsm flag (for context manager usage) - if not skip_fsm: - from core.current_request import CurrentContext - - skip_fsm = CurrentContext.get('skip_fsm', False) - - # Check if this is a creation vs update - is_creating = self._state.adding - - # Capture changed fields before save (only for updates) - # Note: _original_values should already be populated by from_db() or previous save() - changed_fields = {} if is_creating else self._get_changed_fields() - - # Perform the actual save - result = super().save(*args, **kwargs) - - # After successful save, trigger FSM transitions if enabled and not skipped - should_execute = not skip_fsm and self._should_execute_fsm() - - logger.debug( - f'FSM check for {self.__class__.__name__} {self.pk}: skip_fsm={skip_fsm}, should_execute={should_execute}', - extra={ - 'entity_type': self.__class__.__name__, - 'entity_id': self.pk, - 'skip_fsm': skip_fsm, - 'should_execute': should_execute, - }, - ) - if not skip_fsm and should_execute: - try: - # Pass is_creating and changed_fields that were captured before save() - transitions = self._determine_fsm_transitions(is_creating=is_creating, changed_fields=changed_fields) - logger.debug( - f'FSM transitions determined for {self.__class__.__name__} {self.pk}: {transitions}', - extra={ - 'entity_type': self.__class__.__name__, - 'entity_id': self.pk, - 'transitions': transitions, - 'is_creating': is_creating, - }, - ) - for transition_name in transitions: - try: - self._execute_fsm_transition( - transition_name=transition_name, is_creating=is_creating, changed_fields=changed_fields - ) - except Exception as e: - # Log error for this specific transition but continue with others - logger.error( - f'FSM transition {transition_name} failed for {self.__class__.__name__} {self.pk}', - extra={ - 'event': 'fsm.transition_failed_on_save', - 'entity_type': self.__class__.__name__, - 'entity_id': self.pk, - 'transition_name': transition_name, - 'error': str(e), - 'is_creating': is_creating, - }, - exc_info=True, - ) - except Exception as e: - # Log error in determining transitions - logger.error( - f'FSM transition discovery failed for {self.__class__.__name__} {self.pk}', - extra={ - 'event': 'fsm.transition_discovery_failed', - 'entity_type': self.__class__.__name__, - 'entity_id': self.pk, - 'error': str(e), - 'is_creating': is_creating, - }, - exc_info=True, - ) - - # Update original values after save for next time - self._capture_original_values() - - return result - - def _execute_fsm_transition(self, transition_name: str, is_creating: bool, changed_fields: Dict[str, tuple]): - """ - Execute an FSM transition. + @classmethod + def get_denormalized_fields(cls, entity): + """Get denormalized fields for AnnotationState creation""" + return { + 'task_id': entity.task.id, + 'project_id': entity.task.project_id, + 'completed_by_id': entity.completed_by_id if entity.completed_by_id else None, + } + + @property + def is_terminal_state(self) -> bool: + """Check if this is a terminal annotation state""" + return self.state == AnnotationStateChoices.COMPLETED + + +@register_state_model('project') +class ProjectState(BaseState): + """ + Core project state tracking for Label Studio. + Provides basic project state management with: + - Simple 3-state workflow (CREATED → IN_PROGRESS → COMPLETED) + - Project lifecycle tracking + """ - This method handles the actual transition execution, including: - - Getting current context (user, org_id) - - Preparing transition data - - Calling the state manager + # Entity Relationship + project = models.ForeignKey('projects.Project', on_delete=models.CASCADE, related_name='fsm_states') - Args: - transition_name: Name of the registered transition to execute - is_creating: Whether this is a new model creation - changed_fields: Dict of changed fields (field_name -> (old, new)) + # Override state field to add choices constraint + state = models.CharField(max_length=50, choices=ProjectStateChoices.choices, db_index=True) - Note: - This is only called after _should_execute_fsm() returns True, - so CurrentContext should be available with a valid user. - """ - from core.current_request import CurrentContext - from fsm.state_manager import get_state_manager - - StateManager = get_state_manager() - - # Get context - should be available since _should_execute_fsm passed - user = CurrentContext.get_user() - org_id = CurrentContext.get_organization_id() - - # Get transition-specific data from the model - transition_data = self._get_fsm_transition_data() - - # Add metadata about the change - transition_data.update( - { - 'is_creating': is_creating, - 'changed_fields': {k: {'old': v[0], 'new': v[1]} for k, v in changed_fields.items()}, - } - ) - - logger.info( - f'Executing FSM transition for {self.__class__.__name__}', - extra={ - 'event': 'fsm.transition_executing', - 'entity_type': self.__class__.__name__, - 'entity_id': self.pk, - 'transition_name': transition_name, - 'is_creating': is_creating, - 'user_id': user.id if user else None, - 'organization_id': org_id, - }, - ) - - # Execute the registered transition - try: - StateManager.execute_transition( - entity=self, - transition_name=transition_name, - transition_data=transition_data, - user=user, - organization_id=org_id, - ) - - logger.info( - f'FSM transition executed successfully for {self.__class__.__name__}', - extra={ - 'event': 'fsm.transition_success', - 'entity_type': self.__class__.__name__, - 'entity_id': self.pk, - 'transition_name': transition_name, - 'user_id': user.id if user else None, - 'organization_id': org_id, - }, - ) - except Exception: - # Re-raise to be caught by save() method - raise + created_by_id = models.PositiveIntegerField( + null=True, db_index=True, help_text='From project.created_by_id - denormalized for performance' + ) + + class Meta: + app_label = 'fsm' + indexes = [ + # Critical: Latest state lookup + models.Index(fields=['project_id', '-id'], name='project_current_state_idx'), + # Filtering and reporting + models.Index(fields=['organization_id', 'state', '-id'], name='project_org_state_idx'), + models.Index(fields=['organization_id', '-id'], name='project_org_reporting_idx'), + ] + ordering = ['-id'] + + @classmethod + def get_denormalized_fields(cls, entity): + """Get denormalized fields for ProjectState creation""" + return { + 'created_by_id': entity.created_by_id if entity.created_by_id else None, + } + + @property + def is_terminal_state(self) -> bool: + """Check if this is a terminal project state""" + return self.state == ProjectStateChoices.COMPLETED diff --git a/label_studio/fsm/project_transitions.py b/label_studio/fsm/project_transitions.py deleted file mode 100644 index 570da46d64fd..000000000000 --- a/label_studio/fsm/project_transitions.py +++ /dev/null @@ -1,222 +0,0 @@ -""" -FSM Transitions for Project model. - -This module defines declarative transitions for the Project entity, -replacing the previous signal-based approach with explicit, testable transitions. -""" - -from typing import Any, Dict - -from fsm.registry import register_state_transition -from fsm.state_choices import ProjectStateChoices -from fsm.transitions import ModelChangeTransition, TransitionContext - - -@register_state_transition('project', 'project_created', triggers_on_create=True, triggers_on_update=False) -class ProjectCreatedTransition(ModelChangeTransition): - """ - Transition when a new project is created. - - This is the initial state transition that occurs when a project is - first saved to the database. - - Trigger: Automatically on creation (triggers_on_create=True, triggers_on_update=False) - """ - - @property - def target_state(self) -> str: - return ProjectStateChoices.CREATED - - def should_execute(self, context: TransitionContext) -> bool: - """Only execute on creation, never on updates.""" - return self.is_creating - - def get_reason(self, context: TransitionContext) -> str: - """Return detailed reason for project creation.""" - return 'Project created' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - """ - Execute project creation transition. - - Args: - context: Transition context containing project and user information - - Returns: - Context data to store with the state record - """ - project = context.entity - - return { - 'reason': 'Project created', - 'organization_id': project.organization_id, - 'title': project.title, - 'created_by_id': project.created_by_id if project.created_by_id else None, - 'label_config_present': bool(project.label_config), - } - - -# Note: Project state transitions (IN_PROGRESS, COMPLETED) are triggered by task state changes -# via update_project_state_after_task_change() helper function, not by direct project model changes. - - -@register_state_transition('project', 'project_in_progress', triggers_on_create=False, triggers_on_update=False) -class ProjectInProgressTransition(ModelChangeTransition): - """ - Transition when project moves to IN_PROGRESS state. - - Triggered when: First annotation is submitted on any task - From: CREATED -> IN_PROGRESS - """ - - @property - def target_state(self) -> str: - return ProjectStateChoices.IN_PROGRESS - - def get_reason(self, context: TransitionContext) -> str: - return 'Project moved to in progress - first annotation submitted' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - project = context.entity - return { - 'reason': 'Project moved to in progress - first annotation submitted', - 'organization_id': project.organization_id, - 'total_tasks': project.tasks.count(), - } - - -@register_state_transition('project', 'project_completed', triggers_on_create=False, triggers_on_update=False) -class ProjectCompletedTransition(ModelChangeTransition): - """ - Transition when project moves to COMPLETED state. - - Triggered when: All tasks in project are COMPLETED - From: IN_PROGRESS -> COMPLETED - """ - - @property - def target_state(self) -> str: - return ProjectStateChoices.COMPLETED - - def get_reason(self, context: TransitionContext) -> str: - return 'Project completed - all tasks completed' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - project = context.entity - return { - 'reason': 'Project completed - all tasks completed', - 'organization_id': project.organization_id, - 'total_tasks': project.tasks.count(), - } - - -@register_state_transition( - 'project', 'project_in_progress_from_completed', triggers_on_create=False, triggers_on_update=False -) -class ProjectInProgressFromCompletedTransition(ModelChangeTransition): - """ - Transition when project moves back to IN_PROGRESS from COMPLETED. - - Triggered when: Any task becomes not COMPLETED (e.g., annotations deleted) - From: COMPLETED -> IN_PROGRESS - """ - - @property - def target_state(self) -> str: - return ProjectStateChoices.IN_PROGRESS - - def get_reason(self, context: TransitionContext) -> str: - return 'Project moved back to in progress - task became incomplete' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - project = context.entity - return { - 'reason': 'Project moved back to in progress - task became incomplete', - 'organization_id': project.organization_id, - 'total_tasks': project.tasks.count(), - } - - -def update_project_state_after_task_change(project, user=None): - """ - Update project FSM state based on task states. - - This helper function is called after any task state change to update the parent project's state. - It handles "cold start" scenarios where tasks or the project may not have state records yet. - - State transition logic: - - CREATED -> IN_PROGRESS: When any task becomes COMPLETED - - IN_PROGRESS -> COMPLETED: When ALL tasks are COMPLETED - - COMPLETED -> IN_PROGRESS: When ANY task is not COMPLETED - - Args: - project: Project instance to update - user: User triggering the change (for FSM context) - """ - from fsm.state_choices import ProjectStateChoices, TaskStateChoices - from fsm.state_manager import StateManager - from fsm.utils import get_or_initialize_state, infer_entity_state_from_data - - # Get task state counts - from tasks.models import Task - - tasks = Task.objects.filter(project=project) - total_tasks = tasks.count() - - if total_tasks == 0: - # No tasks - ensure project is in CREATED state - current_project_state = get_or_initialize_state(project, user=user) - return - - # Count completed tasks - handle both tasks with and without state records - completed_tasks_count = 0 - - for task in tasks: - # Get or initialize task state - task_state = StateManager.get_current_state_value(task) - - if task_state is None: - # Task has no state record - infer from data - task_state = infer_entity_state_from_data(task) - - # Initialize the task state - if task_state: - get_or_initialize_state(task, user=user, inferred_state=task_state) - - # Count completed tasks - if task_state == TaskStateChoices.COMPLETED: - completed_tasks_count += 1 - - # Determine target project state - if completed_tasks_count == 0: - # No completed tasks -> should be CREATED - target_state = ProjectStateChoices.CREATED - elif completed_tasks_count == total_tasks: - # All tasks completed -> should be COMPLETED - target_state = ProjectStateChoices.COMPLETED - else: - # Some tasks completed -> should be IN_PROGRESS - target_state = ProjectStateChoices.IN_PROGRESS - - # Get current project state (initialize if needed) - current_project_state = StateManager.get_current_state_value(project) - - if current_project_state is None: - # Project has no state - initialize with target state - get_or_initialize_state(project, user=user, inferred_state=target_state) - return - - # Execute appropriate transition if state should change - if current_project_state != target_state: - if current_project_state == ProjectStateChoices.CREATED and target_state == ProjectStateChoices.IN_PROGRESS: - StateManager.execute_transition(entity=project, transition_name='project_in_progress', user=user) - elif ( - current_project_state == ProjectStateChoices.IN_PROGRESS and target_state == ProjectStateChoices.COMPLETED - ): - StateManager.execute_transition(entity=project, transition_name='project_completed', user=user) - elif ( - current_project_state == ProjectStateChoices.COMPLETED and target_state == ProjectStateChoices.IN_PROGRESS - ): - StateManager.execute_transition( - entity=project, transition_name='project_in_progress_from_completed', user=user - ) diff --git a/label_studio/fsm/registry.py b/label_studio/fsm/registry.py index 66766663bc9f..5311bba468fd 100644 --- a/label_studio/fsm/registry.py +++ b/label_studio/fsm/registry.py @@ -12,7 +12,7 @@ from django.db.models import Model, TextChoices if typing.TYPE_CHECKING: - from fsm.state_models import BaseState + from fsm.models import BaseState from fsm.transitions import BaseTransition logger = logging.getLogger(__name__) @@ -300,54 +300,21 @@ def clear(self): transition_registry = TransitionRegistry() -def register_state_transition( - entity_name: str, - transition_name: str, - triggers_on_create: bool = False, - triggers_on_update: bool = True, - triggers_on: list = None, - force_state_record: bool = False, -): +def register_state_transition(entity_name: str, transition_name: str): """ - Decorator to register a state transition class with trigger metadata. - - This decorator not only registers the transition but also configures when - it should be triggered based on model changes. + Decorator to register a state transition class. Args: - entity_name: Name of the entity type (e.g., 'task', 'project') - transition_name: Name of the transition (e.g., 'task_created') - triggers_on_create: If True, triggers when entity is created - triggers_on_update: If True, can trigger on updates (default: True) - triggers_on: List of field names that trigger this transition - force_state_record: If True, creates state record even if state doesn't change (for audit trails) + entity_name: Name of the entity type + transition_name: Name of the transition (defaults to class name in snake_case) Example: - # Trigger only on creation - @register_state_transition('task', 'task_created', triggers_on_create=True) - class TaskCreatedTransition(ModelChangeTransition): - pass - - # Trigger when specific fields change - @register_state_transition('project', 'project_published', triggers_on=['is_published']) - class ProjectPublishedTransition(ModelChangeTransition): - pass - - # Trigger when any of several fields change - @register_state_transition('project', 'settings_changed', - triggers_on=['maximum_annotations', 'overlap_cohort_percentage']) - class ProjectSettingsChangedTransition(ModelChangeTransition): - pass + @register_state_transition('task', 'start_task') + class StartTaskTransition(BaseTransition[Task, TaskState]): + # ... implementation """ def decorator(transition_class: 'BaseTransition') -> 'BaseTransition': - # Store trigger metadata and transition name on the class - transition_class._triggers_on_create = triggers_on_create - transition_class._triggers_on_update = triggers_on_update - transition_class._trigger_fields = triggers_on or [] - transition_class._transition_name = transition_name # Store the registered transition name - transition_class._force_state_record = force_state_record # Store whether to force state record creation - transition_registry.register(entity_name, transition_name, transition_class) return transition_class diff --git a/label_studio/fsm/state_manager.py b/label_studio/fsm/state_manager.py index d19d2b5d740d..3b6e32d20957 100644 --- a/label_studio/fsm/state_manager.py +++ b/label_studio/fsm/state_manager.py @@ -9,13 +9,12 @@ from datetime import datetime from typing import Any, Dict, List, Optional, Type -from core.current_request import CurrentContext -from core.feature_flags import flag_set from django.conf import settings from django.core.cache import cache +from django.db import transaction from django.db.models import Model, QuerySet +from fsm.models import BaseState from fsm.registry import get_state_model_for_entity -from fsm.state_models import BaseState from fsm.transition_executor import execute_transition_with_state_manager logger = logging.getLogger(__name__) @@ -54,13 +53,6 @@ class StateManager: CACHE_TTL = getattr(settings, 'FSM_CACHE_TTL', 300) # 5 minutes default CACHE_PREFIX = 'fsm:current' - @classmethod - def _is_fsm_enabled(cls, user='auto') -> bool: - if user == 'auto': - user = CurrentContext.get_user() - """Check if FSM feature is enabled via feature flag.""" - return flag_set('fflag_feat_fit_568_finite_state_management', user=user) - @classmethod def get_cache_key(cls, entity: Model) -> str: """Generate cache key for entity's current state""" @@ -86,21 +78,17 @@ def get_current_state_value(cls, entity: Model) -> Optional[str]: # Task is finished pass """ - if not cls._is_fsm_enabled(): - return None # Feature disabled, return no state - cache_key = cls.get_cache_key(entity) # Try cache first cached_state = cache.get(cache_key) if cached_state is not None: logger.info( - 'FSM: Cache hit', + 'FSM cache hit', extra={ 'event': 'fsm.cache_hit', 'entity_type': entity._meta.label_lower, 'entity_id': entity.pk, - 'organization_id': CurrentContext.get_organization_id(), 'state': cached_state, }, ) @@ -118,12 +106,11 @@ def get_current_state_value(cls, entity: Model) -> Optional[str]: if current_state is not None: cache.set(cache_key, current_state, cls.CACHE_TTL) logger.info( - 'FSM: Cache miss', + 'FSM cache miss', extra={ 'event': 'fsm.cache_miss', 'entity_type': entity._meta.label_lower, 'entity_id': entity.pk, - 'organization_id': CurrentContext.get_organization_id(), }, ) @@ -131,12 +118,11 @@ def get_current_state_value(cls, entity: Model) -> Optional[str]: except Exception as e: logger.error( - 'FSM: Error getting current state', + 'Error getting current state', extra={ 'event': 'fsm.get_state_error', 'entity_type': entity._meta.label_lower, 'entity_id': entity.pk, - 'organization_id': CurrentContext.get_organization_id(), 'error': str(e), }, exc_info=True, @@ -172,10 +158,8 @@ def transition_state( new_state: str, transition_name: str = None, user=None, - organization_id=None, context: Dict[str, Any] = None, reason: str = '', - force_state_record: bool = False, ) -> bool: """ Perform state transition with audit trail. @@ -190,10 +174,8 @@ def transition_state( new_state: Target state transition_name: Name of transition method (for audit) user: User triggering the transition - organization_id: Organization ID context: Additional context data reason: Human-readable reason for transition - force_state_record: If True, creates state record even if state doesn't change (for audit trails) Returns: True if transition succeeded, False otherwise @@ -208,78 +190,32 @@ def transition_state( new_state='IN_PROGRESS', transition_name='start_annotation', user=request.user, - organization_id=request.user.active_organization_id, context={'assignment_id': assignment.id}, reason='User started annotation work' ) """ - if not cls._is_fsm_enabled(user=user): - return True # Feature disabled, silently succeed - state_model = get_state_model_for_entity(entity) if not state_model: raise StateManagerError(f'No state model found for {entity._meta.model_name} when transitioning state') current_state = cls.get_current_state_value(entity) - # Prevent same-state transitions - only create state records for actual state changes - # This avoids creating redundant data when the effective state doesn't change - # However, allow forced state records for audit trails (e.g., annotation updates) - # IMPORTANT: Also check if a state record exists in DB - if not, we must create one - # even if inferred state matches target state (to persist the inferred state) - if current_state == new_state and not force_state_record: - # Verify a state record actually exists in DB (not just inferred) - state_record_exists = state_model.objects.filter(**{entity._meta.model_name: entity}).exists() - if state_record_exists: - return True # Skip transition - record exists and state unchanged - # else: No record exists (state was inferred), continue to create record - - # Optimistic concurrency control using cache-based locking - cache_key = cls.get_cache_key(entity) - lock_key = f'{cache_key}:lock' - - if organization_id is None: - organization_id = CurrentContext.get_organization_id() - try: - # Try to acquire an optimistic lock using cache add (atomic operation) - # add() only succeeds if the key doesn't exist - lock_acquired = cache.add(lock_key, 'locked', timeout=5) # 5 second timeout - - if not lock_acquired: - # Another process is currently transitioning this entity - logger.info( - 'FSM: Concurrent transition detected, skipping', - extra={ - 'event': 'fsm.concurrent_transition_skipped', - 'entity_type': entity._meta.label_lower, - 'entity_id': entity.pk, - 'target_state': new_state, - 'organization_id': organization_id, - }, - ) - return True - - try: + with transaction.atomic(): # INSERT-only approach - no UPDATE operations needed # Get denormalized fields from the state model class denormalized_fields = state_model.get_denormalized_fields(entity) # Get organization from entity or denormalized fields, or user's active organization - if organization_id is None: - organization_id = getattr( - entity, 'organization_id', getattr(denormalized_fields, 'organization_id', None) - ) - if organization_id is not None: - CurrentContext.set_organization_id(organization_id) + organization_id = getattr( + entity, 'organization_id', getattr(denormalized_fields, 'organization_id', None) + ) if not organization_id and user and hasattr(user, 'active_organization') and user.active_organization: organization_id = user.active_organization.id - if organization_id is not None: - CurrentContext.set_organization_id(organization_id) logger.info( - 'FSM: State transition starting', + 'State transition starting', extra={ 'event': 'fsm.transition_state_start', 'entity_type': entity._meta.label_lower, @@ -294,13 +230,8 @@ def transition_state( }, ) - # CRITICAL FIX: Use state model's correct field name instead of entity._meta.model_name - # This fixes the architectural entity field mapping issue where entity._meta.model_name - # doesn't always match the actual field name defined in FSM state models - entity_field_name = state_model._get_entity_field_name() - new_state_record = state_model.objects.create( - **{entity_field_name: entity}, + **{entity._meta.model_name: entity}, state=new_state, previous_state=current_state, transition_name=transition_name, @@ -311,26 +242,28 @@ def transition_state( **denormalized_fields, ) - # Write-through cache: Update immediately - # This ensures the cache is updated atomically with the database - cache.set(cache_key, new_state, cls.CACHE_TTL) + # Update cache with new state after transaction commits + cache_key = cls.get_cache_key(entity) - logger.info( - 'FSM: Cache updated for transition state', - extra={ - 'event': 'fsm.transition_state_cache_updated', - 'entity_type': entity._meta.label_lower, - 'entity_id': entity.pk, - 'state': new_state, - **{ - 'user_id': user.id if user else None, - 'organization_id': organization_id if organization_id else None, + def update_cache(key, state, user_id, org_id): + cache.set(key, state, cls.CACHE_TTL) + logger.info( + 'Cache updated for transition state', + extra={ + 'event': 'fsm.transition_state_cache_updated', + 'entity_type': entity._meta.label_lower, + 'entity_id': entity.pk, + 'state': state, + **{'user_id': user_id if user_id else None, 'organization_id': org_id if org_id else None}, }, - }, + ) + + transaction.on_commit( + lambda: update_cache(cache_key, new_state, user.id if user else None, organization_id) ) logger.info( - 'FSM: State transition successful', + 'State transition successful', extra={ 'event': 'fsm.transition_state_success', 'entity_type': entity._meta.label_lower, @@ -345,20 +278,12 @@ def transition_state( ) return True - finally: - # Always release the lock, regardless of success or failure - cache.delete(lock_key) - except Exception as e: - # On failure, clean up lock and invalidate potentially stale cache - cache.delete(lock_key) + # On failure, invalidate potentially stale cache + cache_key = cls.get_cache_key(entity) cache.delete(cache_key) - - # Get organization_id for error logging if it wasn't set earlier - organization_id = CurrentContext.get_organization_id() - logger.error( - 'FSM: State transition failed', + 'State transition failed', extra={ 'event': 'fsm.transition_state_failed', 'entity_type': entity._meta.label_lower, @@ -423,9 +348,9 @@ def invalidate_cache(cls, entity: Model): """Invalidate cached state for an entity""" cache_key = cls.get_cache_key(entity) cache.delete(cache_key) - organization_id = CurrentContext.get_organization_id() + organization_id = getattr(entity, 'organization_id', None) logger.info( - 'FSM: Cache invalidated', + 'Cache invalidated', extra={ 'event': 'fsm.cache_invalidated', 'entity_type': entity._meta.label_lower, @@ -443,8 +368,11 @@ def warm_cache(cls, entities: List[Model]): bulk queries and advanced caching strategies. """ cache_updates = {} - organization_id = CurrentContext.get_organization_id() + organization_id = None for entity in entities: + if organization_id is None: + if hasattr(entity, 'organization_id'): + organization_id = entity.organization_id current_state = cls.get_current_state_value(entity) if current_state: cache_key = cls.get_cache_key(entity) @@ -453,7 +381,7 @@ def warm_cache(cls, entities: List[Model]): if cache_updates: cache.set_many(cache_updates, cls.CACHE_TTL) logger.info( - 'FSM: Cache warmed', + 'Cache warmed', extra={ 'event': 'fsm.cache_warmed', 'entity_count': len(cache_updates), diff --git a/label_studio/fsm/state_models.py b/label_studio/fsm/state_models.py deleted file mode 100644 index 00cd3258c96d..000000000000 --- a/label_studio/fsm/state_models.py +++ /dev/null @@ -1,367 +0,0 @@ -""" -FSM State Models for Label Studio. - -This module contains the state model definitions (BaseState and concrete state models). -These are separated from models.py to avoid registration issues in LSE where -extended state models need to be registered instead of the base OSS models. - -When importing FsmHistoryStateModel, these state models won't be automatically -imported and registered, allowing LSE to register its own extended versions. -""" - -import logging -from datetime import datetime -from typing import Any, Dict, Optional - -from django.conf import settings -from django.db import models -from django.db.models import QuerySet, UUIDField -from fsm.registry import register_state_model -from fsm.state_choices import ( - AnnotationStateChoices, - ProjectStateChoices, - TaskStateChoices, -) -from fsm.utils import UUID7Field, generate_uuid7, timestamp_from_uuid7 - -logger = logging.getLogger(__name__) - - -class BaseState(models.Model): - """ - Abstract base class for all state models using UUID7 for optimal time-series performance. - - This is the core of the FSM system, providing: - - UUID7 primary key with natural time ordering - - Standard state transition metadata - - Audit trail information - - Context data storage - - Performance-optimized helper methods - - Benefits of this architecture: - - INSERT-only operations for maximum concurrency - - Natural time ordering eliminates need for created_at indexes - - Global uniqueness enables distributed system support - - Time-based partitioning for large amounts of state records with consistent performance - - Complete audit trail by design - """ - - # UUID7 Primary Key - provides natural time ordering and global uniqueness - id = UUIDField( - primary_key=True, - default=generate_uuid7, - editable=False, - help_text='UUID7 provides natural time ordering and global uniqueness', - ) - - # Optional organization field - can be overridden or left null - # Applications can add their own organization/tenant fields as needed - organization_id = models.PositiveIntegerField( - null=True, - blank=True, - db_index=True, - help_text='Organization ID that owns this state record (for multi-tenant applications)', - ) - - # Core State Fields - state = models.CharField(max_length=50, db_index=True, help_text='Current state of the entity') - previous_state = models.CharField( - max_length=50, null=True, blank=True, help_text='Previous state before this transition' - ) - - # Transition Metadata - transition_name = models.CharField( - max_length=100, - null=True, - blank=True, - help_text='Name of the transition method that triggered this state change', - ) - triggered_by = models.ForeignKey( - settings.AUTH_USER_MODEL, - on_delete=models.SET_NULL, - null=True, - help_text='User who triggered this state transition', - ) - - # Context & Audit - context_data = models.JSONField( - default=dict, help_text='Additional context data for this transition (e.g., validation results, external IDs)' - ) - reason = models.TextField(blank=True, help_text='Human-readable reason for this state transition') - - # Timestamp (redundant with UUID7 but useful for human readability) - created_at = models.DateTimeField( - auto_now_add=True, - db_index=False, # UUID7 provides natural ordering, no index needed - help_text='Human-readable timestamp for debugging (UUID7 id contains precise timestamp)', - ) - - class Meta: - abstract = True - # UUID7 provides natural ordering, reducing index requirements - ordering = ['-id'] # Most recent first - get_latest_by = 'id' - - def __str__(self): - entity_id = getattr(self, f'{self._get_entity_name()}_id', 'unknown') - return f'{self._get_entity_name().title()} {entity_id}: {self.previous_state} → {self.state}' - - @property - def entity(self): - """Get the related entity object""" - entity_name = self._get_entity_name() - return getattr(self, entity_name) - - @property - def timestamp_from_uuid(self) -> datetime: - """Extract timestamp from UUID7 ID""" - return timestamp_from_uuid7(self.id) - - @property - def is_terminal_state(self) -> bool: - """ - Check if this is a terminal state (no outgoing transitions). - - Override in subclasses with specific terminal states. - """ - return False - - def _get_entity_name(self) -> str: - """Extract entity name from model name (e.g., TaskState → task)""" - model_name = self.__class__.__name__ - if model_name.endswith('State'): - return model_name[:-5].lower() - return 'entity' - - @classmethod - def get_current_state(cls, entity) -> Optional['BaseState']: - """ - Get current state using UUID7 natural ordering. - - Uses UUID7's natural time ordering to efficiently find the latest state - without requiring created_at indexes or complex queries. - """ - entity_field = f'{cls._get_entity_field_name()}' - return cls.objects.filter(**{entity_field: entity}).order_by('-id').first() - - @classmethod - def get_current_state_value(cls, entity) -> Optional[str]: - """ - Get current state value as string using UUID7 natural ordering. - - Uses UUID7's natural time ordering to efficiently find the latest state - without requiring created_at indexes or complex queries. - """ - entity_field = f'{cls._get_entity_field_name()}' - current_state = cls.objects.filter(**{entity_field: entity}).order_by('-id').first() - return current_state.state if current_state else None - - @classmethod - def get_state_history(cls, entity, limit: int = 100) -> QuerySet['BaseState']: - """Get complete state history for an entity""" - entity_field = f'{cls._get_entity_field_name()}' - return cls.objects.filter(**{entity_field: entity}).order_by('-id')[:limit] - - @classmethod - def get_states_in_range(cls, entity, start_time: datetime, end_time: datetime) -> QuerySet['BaseState']: - """ - Efficient time-range queries using UUID7. - - Uses UUID7's embedded timestamp for direct time-based filtering - without requiring timestamp indexes. - """ - entity_field = f'{cls._get_entity_field_name()}' - queryset = cls.objects.filter(**{entity_field: entity}) - return UUID7Field.filter_by_time_range(queryset, start_time, end_time).order_by('id') - - @classmethod - def get_states_since(cls, entity, since: datetime): - """Get all states since a specific timestamp""" - entity_field = f'{cls._get_entity_field_name()}' - queryset = cls.objects.filter(**{entity_field: entity}) - return UUID7Field.filter_since_time(queryset, since).order_by('id') - - @classmethod - def get_denormalized_fields(cls, entity) -> Dict[str, Any]: - """ - Get denormalized fields to include in the state record. - - Override this method in subclasses to provide denormalized data - that should be stored with each state transition for performance - optimization and auditing purposes. - - Args: - entity: The entity instance being transitioned - - Returns: - Dictionary of field names to values that should be stored - in the state record - - Example: - @classmethod - def get_denormalized_fields(cls, entity): - return { - 'project_id': entity.project_id, - 'organization_id': entity.project.organization_id, - 'task_type': entity.task_type, - 'priority': entity.priority - } - """ - return {} - - @classmethod - def _get_entity_field_name(cls) -> str: - """Get the foreign key field name for the entity""" - model_name = cls.__name__ - if model_name.endswith('State'): - return model_name[:-5].lower() - return 'entity' - - -# ============================================================================= -# Core State Models for Label Studio OSS -# ============================================================================= -# Note: These are registered here for OSS. LSE will register its own extended -# versions in lse_fsm/models.py instead of importing these. - - -@register_state_model('task') -class TaskState(BaseState): - """ - Core task state tracking for Label Studio. - Provides basic task state management with: - - Simple 3-state workflow (CREATED → IN_PROGRESS → COMPLETED) - - High-performance queries with UUID7 ordering - """ - - # Entity Relationship - task = models.ForeignKey('tasks.Task', related_name='fsm_states', on_delete=models.CASCADE) - - # Override state field to add choices constraint - state = models.CharField(max_length=50, choices=TaskStateChoices.choices, db_index=True) - - project_id = models.PositiveIntegerField( - db_index=True, help_text='From task.project_id - denormalized for performance' - ) - - class Meta: - app_label = 'fsm' - indexes = [ - # Critical: Latest state lookup (current state determined by latest UUID7 id) - # Index with DESC order explicitly supports ORDER BY id DESC queries - models.Index(fields=['task_id', '-id'], name='task_current_state_idx'), - # Reporting and filtering - models.Index(fields=['project_id', 'state', '-id'], name='task_project_state_idx'), - models.Index(fields=['organization_id', 'state', '-id'], name='task_org_reporting_idx'), - # History queries - models.Index(fields=['task_id', 'id'], name='task_history_idx'), - ] - # No constraints needed - INSERT-only approach - ordering = ['-id'] - - @classmethod - def get_denormalized_fields(cls, entity): - """Get denormalized fields for TaskState creation""" - return { - 'project_id': entity.project_id, - } - - @property - def is_terminal_state(self) -> bool: - """Check if this is a terminal task state""" - return self.state == TaskStateChoices.COMPLETED - - -@register_state_model('annotation') -class AnnotationState(BaseState): - """ - Core annotation state tracking for Label Studio. - Provides basic annotation state management with: - - Simple 3-state workflow (DRAFT → SUBMITTED → COMPLETED) - """ - - # Entity Relationship - annotation = models.ForeignKey('tasks.Annotation', on_delete=models.CASCADE, related_name='fsm_states') - - # Override state field to add choices constraint - state = models.CharField(max_length=50, choices=AnnotationStateChoices.choices, db_index=True) - - # Denormalized fields for performance (avoid JOINs in common queries) - task_id = models.PositiveIntegerField( - db_index=True, help_text='From annotation.task_id - denormalized for performance' - ) - project_id = models.PositiveIntegerField( - db_index=True, help_text='From annotation.task.project_id - denormalized for performance' - ) - completed_by_id = models.PositiveIntegerField( - null=True, db_index=True, help_text='From annotation.completed_by_id - denormalized for performance' - ) - - class Meta: - app_label = 'fsm' - indexes = [ - # Critical: Latest state lookup - models.Index(fields=['annotation_id', '-id'], name='anno_current_state_idx'), - # Filtering and reporting - models.Index(fields=['task_id', 'state', '-id'], name='anno_task_state_idx'), - models.Index(fields=['completed_by_id', 'state', '-id'], name='anno_user_report_idx'), - models.Index(fields=['project_id', 'state', '-id'], name='anno_project_report_idx'), - ] - ordering = ['-id'] - - @classmethod - def get_denormalized_fields(cls, entity): - """Get denormalized fields for AnnotationState creation""" - return { - 'task_id': entity.task.id, - 'project_id': entity.task.project_id, - 'completed_by_id': entity.completed_by_id if entity.completed_by_id else None, - } - - @property - def is_terminal_state(self) -> bool: - """Check if this is a terminal annotation state""" - return self.state == AnnotationStateChoices.COMPLETED - - -@register_state_model('project') -class ProjectState(BaseState): - """ - Core project state tracking for Label Studio. - Provides basic project state management with: - - Simple 3-state workflow (CREATED → IN_PROGRESS → COMPLETED) - - Project lifecycle tracking - """ - - # Entity Relationship - project = models.ForeignKey('projects.Project', on_delete=models.CASCADE, related_name='fsm_states') - - # Override state field to add choices constraint - state = models.CharField(max_length=50, choices=ProjectStateChoices.choices, db_index=True) - - created_by_id = models.PositiveIntegerField( - null=True, db_index=True, help_text='From project.created_by_id - denormalized for performance' - ) - - class Meta: - app_label = 'fsm' - indexes = [ - # Critical: Latest state lookup - models.Index(fields=['project_id', '-id'], name='project_current_state_idx'), - # Filtering and reporting - models.Index(fields=['organization_id', 'state', '-id'], name='project_org_state_idx'), - models.Index(fields=['organization_id', '-id'], name='project_org_reporting_idx'), - ] - ordering = ['-id'] - - @classmethod - def get_denormalized_fields(cls, entity): - """Get denormalized fields for ProjectState creation""" - return { - 'created_by_id': entity.created_by_id if entity.created_by_id else None, - } - - @property - def is_terminal_state(self) -> bool: - """Check if this is a terminal project state""" - return self.state == ProjectStateChoices.COMPLETED diff --git a/label_studio/fsm/task_transitions.py b/label_studio/fsm/task_transitions.py deleted file mode 100644 index 270f46eacdc5..000000000000 --- a/label_studio/fsm/task_transitions.py +++ /dev/null @@ -1,116 +0,0 @@ -""" -FSM Transitions for Task model. - -This module defines declarative transitions for the Task entity. - -Note: Most task state transitions (annotation_started, annotation_complete, completed) -are triggered by Annotation changes, not Task field changes. Those are handled by -annotation transitions via post_transition_hooks that update the parent task. -""" - -from typing import Any, Dict - -from fsm.registry import register_state_transition -from fsm.state_choices import TaskStateChoices -from fsm.transitions import ModelChangeTransition, TransitionContext - - -@register_state_transition('task', 'task_created', triggers_on_create=True, triggers_on_update=False) -class TaskCreatedTransition(ModelChangeTransition): - """ - Transition when a new task is created. - - This is the initial state transition that occurs when a task is - first saved to the database. - - Trigger: Automatically on creation (triggers_on_create=True) - - Note: Other task transitions (annotation_started, completed, etc.) are - triggered by Annotation model changes, not Task field changes. - """ - - @property - def target_state(self) -> str: - return TaskStateChoices.CREATED - - def get_reason(self, context: TransitionContext) -> str: - """Return detailed reason for task creation.""" - return 'Task created in the system' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - """ - Execute task creation transition. - - Args: - context: Transition context containing task and user information - - Returns: - Context data to store with the state record - """ - task = context.entity - - return { - 'reason': 'Task created in the system', - 'project_id': task.project_id, - 'data_keys': list(task.data.keys()) if task.data else [], - } - - -# Note: Task state transitions (COMPLETED, IN_PROGRESS) are triggered by annotation changes -# via post_transition_hooks in annotation transitions, not by direct task model changes. - - -@register_state_transition('task', 'task_completed', triggers_on_create=False, triggers_on_update=False) -class TaskCompletedTransition(ModelChangeTransition): - """ - Transition when task moves to COMPLETED state. - - Triggered when: First annotation is submitted on this task - From: CREATED -> COMPLETED or IN_PROGRESS -> COMPLETED - """ - - @property - def target_state(self) -> str: - return TaskStateChoices.COMPLETED - - def get_reason(self, context: TransitionContext) -> str: - return 'Task completed - annotation submitted' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - task = context.entity - return { - 'reason': 'Task completed - annotation submitted', - 'task_id': task.id, - 'project_id': task.project_id, - 'total_annotations': task.total_annotations, - 'cancelled_annotations': task.cancelled_annotations, - 'is_labeled': task.is_labeled, - } - - -@register_state_transition('task', 'task_in_progress', triggers_on_create=False, triggers_on_update=False) -class TaskInProgressTransition(ModelChangeTransition): - """ - Transition when task moves to IN_PROGRESS state. - - Triggered when: All annotations are deleted from a completed task - From: COMPLETED -> IN_PROGRESS - """ - - @property - def target_state(self) -> str: - return TaskStateChoices.IN_PROGRESS - - def get_reason(self, context: TransitionContext) -> str: - return 'Task moved to in progress - annotations deleted' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - task = context.entity - return { - 'reason': 'Task moved to in progress - annotations deleted', - 'task_id': task.id, - 'project_id': task.project_id, - 'total_annotations': task.total_annotations, - 'cancelled_annotations': task.cancelled_annotations, - 'is_labeled': task.is_labeled, - } diff --git a/label_studio/fsm/tests/conftest.py b/label_studio/fsm/tests/conftest.py deleted file mode 100644 index 8c41b86bd409..000000000000 --- a/label_studio/fsm/tests/conftest.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -FSM Test Configuration. - -Ensures proper test isolation for FSM tests. -""" - -import logging -from copy import deepcopy - -import pytest -from django.core.cache import cache -from fsm.registry import state_choices_registry, state_model_registry, transition_registry - -logger = logging.getLogger(__name__) - - -@pytest.fixture(autouse=True, scope='function') -def fsm_test_isolation(): - """ - Ensure test isolation by: - 1. Saving and restoring the transition registry - 2. Clearing CurrentContext and cache - - This prevents test leakage when tests clear the registry for their own purposes. - """ - from core.current_request import CurrentContext - - # Restore original registry state after test to avoid test pollution - original_transitions = deepcopy(transition_registry._transitions) - original_state_choices = deepcopy(state_choices_registry._choices) - original_state_models = deepcopy(state_model_registry._models) - - yield - - # Clear context and cache after test - CurrentContext.clear() - cache.clear() - transition_registry.clear() - state_choices_registry.clear() - state_model_registry.clear() - - transition_registry._transitions = original_transitions - state_choices_registry._choices = original_state_choices - state_model_registry._models = original_state_models diff --git a/label_studio/fsm/tests/test_api_example.py b/label_studio/fsm/tests/test_api_example.py index 39826889662a..c705a371e884 100644 --- a/label_studio/fsm/tests/test_api_example.py +++ b/label_studio/fsm/tests/test_api_example.py @@ -34,8 +34,6 @@ class APIIntegrationExampleTests(TestCase): """ def setUp(self): - from copy import deepcopy - self.mock_entity = Mock() self.mock_entity.pk = 1 self.mock_entity._meta.model_name = 'task' @@ -45,14 +43,9 @@ def setUp(self): self.mock_user.id = 42 self.mock_user.username = 'api_user' - # Save registry state and clear for this test - self._original_transitions = deepcopy(transition_registry._transitions) + # Clear registry transition_registry._transitions.clear() - def tearDown(self): - # Restore original transition registry to prevent test leakage - transition_registry._transitions = self._original_transitions - def test_rest_api_task_assignment_example(self): """ API EXAMPLE: REST endpoint for task assignment diff --git a/label_studio/fsm/tests/test_fsm_integration.py b/label_studio/fsm/tests/test_fsm_integration.py index 3af49f9917b6..3c523c3d58ff 100644 --- a/label_studio/fsm/tests/test_fsm_integration.py +++ b/label_studio/fsm/tests/test_fsm_integration.py @@ -4,12 +4,13 @@ and API endpoints. """ -from datetime import datetime +from datetime import datetime, timedelta, timezone from unittest.mock import patch +import pytest from django.test import TestCase +from fsm.models import AnnotationState, ProjectState, TaskState from fsm.state_manager import get_state_manager -from fsm.state_models import AnnotationState, ProjectState, TaskState from projects.tests.factories import ProjectFactory from tasks.tests.factories import AnnotationFactory, TaskFactory from users.tests.factories import UserFactory @@ -105,17 +106,8 @@ class TestStateManager(TestCase): """Test StateManager functionality with mocked transaction support""" def setUp(self): - from core.current_request import CurrentContext - self.user = UserFactory(email='test@example.com') - - # Set up CurrentContext BEFORE creating entities that need FSM - CurrentContext.set_user(self.user) - self.project = ProjectFactory(created_by=self.user) - if hasattr(self.project, 'organization') and self.project.organization: - CurrentContext.set_organization_id(self.project.organization.id) - self.task = TaskFactory(project=self.project, data={'text': 'test'}) self.StateManager = get_state_manager() @@ -125,32 +117,29 @@ def setUp(self): cache.clear() # Ensure registry is properly initialized for TaskState + from fsm.models import TaskState from fsm.registry import state_model_registry - from fsm.state_models import TaskState if not state_model_registry.get_model('task'): state_model_registry.register_model('task', TaskState) - def tearDown(self): - from core.current_request import CurrentContext - - CurrentContext.clear() - def test_get_current_state_empty(self): - """Test getting current state when task is created""" - # With FsmHistoryStateModel, tasks automatically get a state on creation + """Test getting current state when no states exist""" current_state = self.StateManager.get_current_state_value(self.task) - assert current_state == 'CREATED' # FsmHistoryStateModel auto-creates state + assert current_state is None - @patch('fsm.state_manager.flag_set') - def test_transition_state(self, mock_flag_set): - """Test state transition functionality with immediate cache updates""" + @patch('django.db.transaction.on_commit') + def test_transition_state(self, mock_on_commit): + """Test state transition functionality with mocked transaction.on_commit""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Mock transaction.on_commit to immediately execute the callback + def execute_callback(callback): + callback() + + mock_on_commit.side_effect = execute_callback # Initial transition success = self.StateManager.transition_state( @@ -162,8 +151,10 @@ def test_transition_state(self, mock_flag_set): ) assert success + # Verify transaction.on_commit was called once for cache update + assert mock_on_commit.call_count == 1 - # Check current state - should work with immediate cache update + # Check current state - should work with mocked cache update current_state = self.StateManager.get_current_state_value(self.task) assert current_state == 'CREATED' @@ -177,19 +168,24 @@ def test_transition_state(self, mock_flag_set): ) assert success + # Verify transaction.on_commit was called again (total 2 times) + assert mock_on_commit.call_count == 2 current_state = self.StateManager.get_current_state_value(self.task) assert current_state == 'IN_PROGRESS' - @patch('fsm.state_manager.flag_set') - def test_get_current_state_object(self, mock_flag_set): + @patch('django.db.transaction.on_commit') + def test_get_current_state_object(self, mock_on_commit): """Test getting current state object with full details""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Mock transaction.on_commit to immediately execute the callback + def execute_callback(callback): + callback() + + mock_on_commit.side_effect = execute_callback # Create some state transitions self.StateManager.transition_state(entity=self.task, new_state='CREATED', user=self.user) @@ -197,6 +193,9 @@ def test_get_current_state_object(self, mock_flag_set): entity=self.task, new_state='IN_PROGRESS', user=self.user, context={'test': 'data'} ) + # Verify transaction.on_commit was called twice (once per transition) + assert mock_on_commit.call_count == 2 + current_state_obj = self.StateManager.get_current_state_object(self.task) assert current_state_obj is not None @@ -205,15 +204,18 @@ def test_get_current_state_object(self, mock_flag_set): assert current_state_obj.triggered_by == self.user assert current_state_obj.context_data == {'test': 'data'} - @patch('fsm.state_manager.flag_set') - def test_get_state_history(self, mock_flag_set): + @patch('django.db.transaction.on_commit') + def test_get_state_history(self, mock_on_commit): """Test state history retrieval""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Mock transaction.on_commit to immediately execute the callback + def execute_callback(callback): + callback() + + mock_on_commit.side_effect = execute_callback transitions = [('CREATED', 'create_task'), ('IN_PROGRESS', 'start_work'), ('COMPLETED', 'finish_work')] @@ -222,6 +224,9 @@ def test_get_state_history(self, mock_flag_set): entity=self.task, new_state=state, user=self.user, transition_name=transition ) + # Verify transaction.on_commit was called 3 times (once per transition) + assert mock_on_commit.call_count == 3 + history = self.StateManager.get_state_history(self.task, limit=10) # Should have 3 state records @@ -236,35 +241,52 @@ def test_get_state_history(self, mock_flag_set): assert history[1].previous_state == 'CREATED' assert history[0].previous_state == 'IN_PROGRESS' - @patch('fsm.state_manager.flag_set') - def test_get_states_in_time_range(self, mock_flag_set): - """Test time-based state queries using StateManager""" + @patch('django.db.transaction.on_commit') + def test_get_states_in_time_range(self, mock_on_commit): + """Test time-based state queries using UUID7""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Mock transaction.on_commit to immediately execute the callback + def execute_callback(callback): + callback() + + mock_on_commit.side_effect = execute_callback + + before_time = datetime.now(timezone.utc) - timedelta(seconds=1) # Create some states self.StateManager.transition_state(entity=self.task, new_state='CREATED', user=self.user) self.StateManager.transition_state(entity=self.task, new_state='IN_PROGRESS', user=self.user) - # Get state history (which gives us states ordered by time) - states = self.StateManager.get_state_history(self.task) + # Verify transaction.on_commit was called twice (once per transition) + assert mock_on_commit.call_count == 2 + + # Record time after creating states + after_time = datetime.now(timezone.utc) + timedelta(seconds=1) - # Should have at least the states we created - assert len(states) >= 2 + # Query states in time range + states_in_range = self.StateManager.get_states_in_time_range(self.task, before_time, after_time) - @patch('fsm.state_manager.flag_set') - def test_immediate_cache_update_success_case(self, mock_flag_set): - """Test that cache is updated immediately on successful transitions""" + # Should find both states + assert len(states_in_range) == 2 + + @patch('django.db.transaction.on_commit') + def test_transaction_on_commit_success_case(self, mock_on_commit): + """Test that transaction.on_commit is called exactly once per successful transition""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Track callback executions + callbacks_executed = [] + + def track_and_execute(callback): + callbacks_executed.append(callback) + callback() # Execute the callback + + mock_on_commit.side_effect = track_and_execute # Perform a successful transition success = self.StateManager.transition_state( @@ -275,8 +297,12 @@ def test_immediate_cache_update_success_case(self, mock_flag_set): reason='Initial task creation', ) - # Verify success and immediate cache update + # Verify success and transaction.on_commit was called assert success + assert mock_on_commit.call_count == 1 + assert len(callbacks_executed) == 1 + + # Verify the cache was properly updated by executing the callback current_state = self.StateManager.get_current_state_value(self.task) assert current_state == 'CREATED' @@ -289,62 +315,78 @@ def test_immediate_cache_update_success_case(self, mock_flag_set): ) assert success + assert mock_on_commit.call_count == 2 + assert len(callbacks_executed) == 2 + current_state = self.StateManager.get_current_state_value(self.task) assert current_state == 'IN_PROGRESS' - @patch('fsm.state_manager.flag_set') - def test_transaction_on_commit_success_case(self, mock_flag_set): - """Test successful state transitions""" + @patch('django.db.transaction.on_commit') + @patch('fsm.state_manager.get_state_model_for_entity') + def test_transaction_on_commit_failure_case(self, mock_get_state_model, mock_on_commit): + """Test that transaction.on_commit is NOT called when transition fails""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Mock get_state_model_for_entity to return None (no state model found) + mock_get_state_model.return_value = None - # Perform a successful transition - success = self.StateManager.transition_state( - entity=self.task, - new_state='CREATED', - user=self.user, - transition_name='create_task', - ) + # Attempt a transition that should fail due to missing state model + with pytest.raises(Exception): # Should raise StateManagerError + self.StateManager.transition_state( + entity=self.task, + new_state='CREATED', + user=self.user, + transition_name='create_task', + reason='This should fail', + ) - # Verify transition succeeded - assert success - assert self.StateManager.get_current_state_value(self.task) == 'CREATED' + # Verify transaction.on_commit was NOT called since transition failed + assert mock_on_commit.call_count == 0 - @patch('fsm.state_manager.flag_set') - def test_transaction_on_commit_database_failure_case(self, mock_flag_set): - """Test state transitions work correctly""" + # Verify cache was not updated (should raise exception) + with pytest.raises(Exception): # Should raise StateManagerError + self.StateManager.get_current_state_value(self.task) + + @patch('django.db.transaction.on_commit') + @patch('fsm.models.TaskState.objects.create') + def test_transaction_on_commit_database_failure_case(self, mock_create, mock_on_commit): + """Test that transaction.on_commit is NOT called when database operation fails""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Mock database create operation to fail + mock_create.side_effect = Exception('Database constraint violation') - # Perform a transition - self.StateManager.transition_state( - entity=self.task, - new_state='CREATED', - user=self.user, - transition_name='create_task', - ) + # Attempt a transition that should fail due to database error + with pytest.raises(Exception): # Should raise StateManagerError + self.StateManager.transition_state( + entity=self.task, + new_state='CREATED', + user=self.user, + transition_name='create_task', + reason='This should fail in DB', + ) - # Verify state was set correctly + # Verify transaction.on_commit was NOT called since transaction failed + assert mock_on_commit.call_count == 0 + + # Verify cache was deleted due to failure (cache.delete should be called) current_state = self.StateManager.get_current_state_value(self.task) - assert current_state == 'CREATED' + assert current_state is None - @patch('fsm.state_manager.flag_set') - def test_immediate_cache_update_content(self, mock_flag_set): - """Test that cache is immediately updated during transition""" + @patch('django.db.transaction.on_commit') + def test_transaction_on_commit_callback_content(self, mock_on_commit): + """Test that the transaction.on_commit callback properly updates the cache""" from django.core.cache import cache cache.clear() - # Enable FSM feature flag - mock_flag_set.return_value = True + # Capture the callback without executing it + captured_callbacks = [] + mock_on_commit.side_effect = lambda callback: captured_callbacks.append(callback) # Perform a transition success = self.StateManager.transition_state( @@ -354,81 +396,20 @@ def test_immediate_cache_update_content(self, mock_flag_set): ) assert success + assert len(captured_callbacks) == 1 - # Cache should be immediately updated during transition + # Before executing callback, cache should be empty cache_key = self.StateManager.get_cache_key(self.task) cached_state = cache.get(cache_key) - assert cached_state == 'CREATED' - - # Verify get_current_state_value uses the cached value - current_state = self.StateManager.get_current_state_value(self.task) - assert current_state == 'CREATED' + assert cached_state is None - @patch('fsm.state_manager.flag_set') - def test_cache_cleanup_on_transaction_rollback(self, mock_flag_set): - """Test cache behavior with state transitions""" - from django.core.cache import cache + # Execute the callback manually + captured_callbacks[0]() - cache.clear() - - # Enable FSM feature flag - mock_flag_set.return_value = True - - # Create a state transition - self.StateManager.transition_state( - entity=self.task, - new_state='CREATED', - user=self.user, - transition_name='create_task', - reason='Test transition', - ) - - # Verify cache contains the state - cache_key = self.StateManager.get_cache_key(self.task) + # After callback execution, cache should be updated cached_state = cache.get(cache_key) assert cached_state == 'CREATED' - @patch('fsm.state_manager.flag_set') - def test_same_state_transition_prevention(self, mock_flag_set): - """Test that same-state transitions are prevented""" - from django.core.cache import cache - - cache.clear() - - # Enable FSM feature flag - mock_flag_set.return_value = True - - # Create initial state - success = self.StateManager.transition_state( - entity=self.task, - new_state='CREATED', - user=self.user, - ) - assert success - - # Verify initial state is set - current_state = self.StateManager.get_current_state_value(self.task) - assert current_state == 'CREATED' - - # Get initial state count - from fsm.state_models import TaskState - - initial_count = TaskState.objects.filter(task=self.task).count() - assert initial_count == 1 - - # Attempt same-state transition (should be skipped) - success = self.StateManager.transition_state( - entity=self.task, - new_state='CREATED', # Same state as current - user=self.user, - reason='This should be skipped', - ) - assert success # Returns True but doesn't create new record - - # Verify no new state record was created - final_count = TaskState.objects.filter(task=self.task).count() - assert final_count == initial_count # Should still be 1 - - # Verify state is still CREATED + # Verify get_current_state_value uses the cached value current_state = self.StateManager.get_current_state_value(self.task) assert current_state == 'CREATED' diff --git a/label_studio/fsm/tests/test_integration_django_models.py b/label_studio/fsm/tests/test_integration_django_models.py index 24818dba47a2..2b768126b36c 100644 --- a/label_studio/fsm/tests/test_integration_django_models.py +++ b/label_studio/fsm/tests/test_integration_django_models.py @@ -4,19 +4,16 @@ Django models and the StateManager, providing realistic usage examples. """ -from datetime import datetime, timezone +from datetime import datetime from typing import Any, Dict from unittest.mock import Mock, patch -from core.current_request import CurrentContext from django.contrib.auth import get_user_model from django.test import TestCase -from fsm.registry import register_state_transition +from fsm.models import TaskState +from fsm.registry import register_state_transition, transition_registry from fsm.state_choices import AnnotationStateChoices, TaskStateChoices -from fsm.state_models import TaskState from fsm.transitions import BaseTransition, TransitionContext, TransitionValidationError -from organizations.models import Organization -from projects.models import Project from pydantic import Field @@ -76,6 +73,8 @@ def setUp(self): self.user.id = 123 self.user.username = 'integration_test_user' + transition_registry.clear() + @patch('fsm.registry.get_state_model_for_entity') @patch('fsm.state_manager.StateManager.get_current_state_object') @patch('fsm.state_manager.StateManager.transition_state') @@ -669,50 +668,3 @@ def transition(self, context: TransitionContext) -> Dict[str, Any]: valid_transition.validate_transition(context_no_user) assert 'User authentication required' in str(cm.value) - - -class TestBaseStatePropertiesCoverage(TestCase): - """Test coverage for BaseState model properties and methods""" - - def setUp(self): - """Set up test fixtures""" - self.user = User.objects.create(email='test_coverage@example.com') - self.org = Organization.objects.create(title='Test Org Coverage', created_by=self.user) - - # Set CurrentContext BEFORE creating entities that need FSM - CurrentContext.set_user(self.user) - CurrentContext.set_organization_id(self.org.id) - - self.project = Project.objects.create( - title='Test Project Coverage', created_by=self.user, organization=self.org - ) - - def tearDown(self): - """Clean up after tests""" - CurrentContext.clear() - - def test_base_state_entity_property(self): - """Test BaseState.entity property retrieves related entity""" - from fsm.state_models import ProjectState - - # Get the auto-created state - state_record = ProjectState.objects.filter(project=self.project).first() - assert state_record is not None - - # Test entity property - retrieved_entity = state_record.entity - assert retrieved_entity.id == self.project.id - - def test_base_state_timestamp_from_uuid(self): - """Test BaseState.timestamp_from_uuid property extracts timestamp from UUID7""" - from fsm.state_models import ProjectState - - before = datetime.now(timezone.utc) - state_record = ProjectState.objects.filter(project=self.project).first() - datetime.now(timezone.utc) - - # Test timestamp extraction - timestamp = state_record.timestamp_from_uuid - assert isinstance(timestamp, datetime) - # Timestamp should be within reasonable range - assert timestamp.year == before.year diff --git a/label_studio/fsm/tests/test_lso_integration.py b/label_studio/fsm/tests/test_lso_integration.py deleted file mode 100644 index c349f8843eb6..000000000000 --- a/label_studio/fsm/tests/test_lso_integration.py +++ /dev/null @@ -1,555 +0,0 @@ -""" -Label Studio Open Source FSM Integration Tests. - -Tests the core FSM functionality with real Django models in LSO, -focusing on coverage of state_manager.py and utils modules. -""" - -import logging -from datetime import datetime, timezone -from unittest.mock import patch - -import pytest -from core.current_request import CurrentContext -from django.contrib.auth import get_user_model -from django.core.cache import cache -from fsm.state_choices import AnnotationStateChoices, ProjectStateChoices, TaskStateChoices -from fsm.state_manager import StateManager, StateManagerError -from fsm.state_models import AnnotationState, ProjectState, TaskState -from fsm.utils import get_current_state_safe, is_fsm_enabled, resolve_organization_id -from organizations.tests.factories import OrganizationFactory -from projects.tests.factories import ProjectFactory -from tasks.models import Annotation -from tasks.tests.factories import AnnotationFactory, TaskFactory -from users.tests.factories import UserFactory - -User = get_user_model() -logger = logging.getLogger(__name__) - - -@pytest.mark.django_db -class TestLSOFSMIntegration: - """ - Test LSO FSM integration with real models. - - Focuses on improving coverage of state_manager.py and utils.py - by testing error paths, cache behavior, and bulk operations. - """ - - @pytest.fixture(autouse=True) - def setup_test_data(self): - """Set up test data.""" - cache.clear() - self.org = OrganizationFactory() - self.user = UserFactory() - CurrentContext.set_user(self.user) - CurrentContext.set_organization_id(self.org.id) - yield - cache.clear() - CurrentContext.clear() - - def test_project_creation_generates_state(self): - """ - Test that creating a project automatically generates a state record. - - Validates: - - Project model extends FsmHistoryStateModel - - Automatic state transition on creation - - State is CREATED for new projects - """ - project = ProjectFactory(organization=self.org) - - # Check state was created - state = StateManager.get_current_state_value(project) - assert state == ProjectStateChoices.CREATED, f'Expected CREATED, got {state}' - - # Check state history exists - history = list(ProjectState.objects.filter(project=project).order_by('created_at')) - assert len(history) == 1 - assert history[0].state == ProjectStateChoices.CREATED - assert history[0].transition_name == 'project_created' - - def test_task_creation_generates_state(self): - """ - Test that creating a task automatically generates a state record. - - Validates: - - Task model extends FsmHistoryStateModel - - Automatic state transition on creation - - State is CREATED for new tasks - """ - project = ProjectFactory(organization=self.org) - task = TaskFactory(project=project) - - # Check state was created - state = StateManager.get_current_state_value(task) - assert state == TaskStateChoices.CREATED, f'Expected CREATED, got {state}' - - # Check state history exists - history = list(TaskState.objects.filter(task=task).order_by('created_at')) - assert len(history) == 1 - assert history[0].state == TaskStateChoices.CREATED - - def test_annotation_creation_generates_state(self): - """ - Test that creating an annotation automatically generates a state record. - - Validates: - - Annotation model extends FsmHistoryStateModel - - Automatic state transition on creation - - State is SUBMITTED for new annotations in LSO - """ - project = ProjectFactory(organization=self.org) - task = TaskFactory(project=project) - annotation = AnnotationFactory(task=task, completed_by=self.user) - - # Check state was created - state = StateManager.get_current_state_value(annotation) - assert state == AnnotationStateChoices.SUBMITTED, f'Expected SUBMITTED, got {state}' - - # Check state history exists - history = list(AnnotationState.objects.filter(annotation=annotation).order_by('created_at')) - assert len(history) >= 1 - assert history[0].state == AnnotationStateChoices.SUBMITTED - - def test_cache_functionality(self): - """ - Test that StateManager caching works correctly. - - Validates: - - State retrieval works consistently - - Cache doesn't cause incorrect state returns - - Multiple accesses return same state - """ - project = ProjectFactory(organization=self.org) - - # First access - cache.clear() - state1 = StateManager.get_current_state_value(project) - assert state1 == ProjectStateChoices.CREATED - - # Second access - should return same state (whether from cache or DB) - state2 = StateManager.get_current_state_value(project) - assert state2 == ProjectStateChoices.CREATED - - # States should match - assert state1 == state2 - - def test_get_current_state_safe_with_no_state(self): - """ - Test get_current_state_safe returns None for entities without states. - - Validates: - - Utility function handles entities with no state records - - No exceptions raised - - Returns None gracefully - """ - # Create a project but delete its state records - project = ProjectFactory(organization=self.org) - ProjectState.objects.filter(project=project).delete() - cache.clear() - - # Should return None, not raise - state = get_current_state_safe(project) - assert state is None - - def test_resolve_organization_id_from_entity(self): - """ - Test resolve_organization_id utility function. - - Validates: - - Extracts organization_id from entity with direct attribute - - Extracts organization_id from entity with project relation - - Falls back to CurrentContext - """ - project = ProjectFactory(organization=self.org) - task = TaskFactory(project=project) - - # Test direct attribute - org_id = resolve_organization_id(project) - assert org_id == self.org.id - - # Test via project relation - org_id = resolve_organization_id(task) - assert org_id == self.org.id - - def test_is_fsm_enabled_in_lso(self): - """ - Test is_fsm_enabled checks feature flag correctly. - - Validates: - - Feature flag check works - - Returns True when enabled - """ - result = is_fsm_enabled(user=self.user) - assert result is True - - def test_state_manager_error_handling(self): - """ - Test StateManager error handling for invalid entities. - - Validates: - - Proper error raised for entities without state model - - Error includes helpful message - """ - # Create a mock entity that doesn't have a state model - from unittest.mock import Mock - - mock_entity = Mock() - mock_entity._meta = Mock() - mock_entity._meta.model_name = 'nonexistent_model' - mock_entity._meta.label_lower = 'test.nonexistent' - mock_entity.pk = 1 - - # Should raise StateManagerError - with pytest.raises(StateManagerError) as exc_info: - StateManager.get_current_state_value(mock_entity) - - assert 'No state model found' in str(exc_info.value) - - def test_warm_cache_bulk_operation(self): - """ - Test bulk cache warming for multiple entities. - - Validates: - - Warm cache operation works for multiple entities - - Subsequent state retrievals are faster (cached) - - Correct states for all entities - """ - project = ProjectFactory(organization=self.org) - tasks = [TaskFactory(project=project) for _ in range(5)] - - # Warm cache for all tasks - cache.clear() - StateManager.warm_cache(tasks) - - # Verify all states can be retrieved and are correct - for task in tasks: - state = StateManager.get_current_state_value(task) - assert state == TaskStateChoices.CREATED - - def test_get_state_history(self): - """ - Test retrieving state history for an entity. - - Validates: - - History retrieval works - - States are in chronological order - - All transition metadata captured - """ - project = ProjectFactory(organization=self.org) - - # Get history - history = StateManager.get_state_history(project) - - # Should have at least creation state - assert len(history) >= 1 - assert history[0].state == ProjectStateChoices.CREATED - assert history[0].transition_name == 'project_created' - - def test_get_state_history_ordering(self): - """ - Test that state history is returned in chronological order. - - Validates: - - History is ordered by creation time - - Oldest states appear first - - All state records are included - """ - project = ProjectFactory(organization=self.org) - - # Get history - history = StateManager.get_state_history(project) - - # Should have at least one state (creation) - assert len(history) >= 1 - - # Verify ordering - each timestamp should be >= the previous - timestamps = [h.created_at for h in history] - assert timestamps == sorted(timestamps) - - def test_annotation_from_draft_workflow(self): - """ - Test annotation created from draft has correct state. - - Validates: - - Draft-based annotation workflow - - Correct transition triggered - - State is SUBMITTED in LSO - """ - project = ProjectFactory(organization=self.org) - task = TaskFactory(project=project) - - # Create annotation with draft flag - annotation = Annotation( - task=task, - completed_by=self.user, - result=[{'test': 'data'}], - was_cancelled=False, - ) - annotation.save() - - # Should be in SUBMITTED state - state = StateManager.get_current_state_value(annotation) - assert state == AnnotationStateChoices.SUBMITTED - - def test_state_manager_with_multiple_transitions(self): - """ - Test that multiple state transitions are recorded correctly. - - Validates: - - Multiple transitions create multiple records - - History ordering is correct - - Each transition has correct metadata - """ - project = ProjectFactory(organization=self.org) - - # Create multiple state changes by updating project - StateManager.get_current_state_value(project) - - # Update project settings (should create a state record in LSE, but not in LSO) - project.maximum_annotations = 5 - project.save() - - # Get history - history = list(ProjectState.objects.filter(project=project).order_by('created_at')) - - # In LSO, settings changes don't create new state records - # so we should still have just the creation record - assert len(history) == 1 - assert history[0].state == ProjectStateChoices.CREATED - - -@pytest.mark.django_db -class TestLSOFSMUtilities: - """ - Test LSO FSM utility functions. - - Focuses on improving coverage of utils.py module. - """ - - @pytest.fixture(autouse=True) - def setup_test_data(self): - """Set up test data.""" - cache.clear() - self.org = OrganizationFactory() - self.user = UserFactory() - CurrentContext.set_user(self.user) - CurrentContext.set_organization_id(self.org.id) - yield - cache.clear() - CurrentContext.clear() - - def test_resolve_organization_id_with_user(self): - """ - Test resolve_organization_id with user parameter. - - Validates: - - User parameter takes precedence - - Correct organization extracted - """ - project = ProjectFactory(organization=self.org) - - org_id = resolve_organization_id(project, user=self.user) - assert org_id is not None - - def test_resolve_organization_id_fallback_to_context(self): - """ - Test resolve_organization_id falls back to CurrentContext. - - Validates: - - CurrentContext used when no other source available - - Correct ID returned - """ - CurrentContext.set_organization_id(self.org.id) - - # Create entity without organization_id attribute - from unittest.mock import Mock - - mock_entity = Mock(spec=[]) # No attributes - - org_id = resolve_organization_id(mock_entity) - assert org_id == self.org.id - - def test_get_current_state_safe_with_state(self): - """ - Test get_current_state_safe returns correct state value. - - Validates: - - Function returns state string (not None) - - State value is correct - - Works correctly with database query - """ - project = ProjectFactory(organization=self.org) - cache.clear() - - # Should return state value string, not None - state_value = get_current_state_safe(project) - assert state_value is not None - assert state_value == ProjectStateChoices.CREATED - - def test_state_manager_handles_concurrent_access(self): - """ - Test StateManager handles concurrent access correctly. - - Validates: - - No race conditions in state retrieval - - Cache consistency - - Multiple simultaneous requests work correctly - """ - project = ProjectFactory(organization=self.org) - - # Simulate multiple concurrent accesses - states = [StateManager.get_current_state_value(project) for _ in range(10)] - - # All should return the same state - assert all(s == ProjectStateChoices.CREATED for s in states) - - def test_get_current_state_value_when_fsm_disabled(self): - """ - Test get_current_state_value returns None when FSM is disabled. - - Validates: - - Returns None instead of raising when feature flag is off - - Handles disabled state gracefully - """ - project = ProjectFactory(organization=self.org) - - with patch.object(StateManager, '_is_fsm_enabled', return_value=False): - result = StateManager.get_current_state_value(project) - assert result is None - - def test_get_states_in_time_range(self): - """ - Test get_states_in_time_range for time-based queries. - - Validates: - - UUID7-based time range queries work - - Returns states within specified time range - """ - from datetime import timedelta - - project = ProjectFactory(organization=self.org) - - # Get states from the last day - start_time = datetime.now(timezone.utc) - timedelta(days=1) - end_time = datetime.now(timezone.utc) - - states = StateManager.get_states_in_time_range(project, start_time, end_time) - - # Should have at least the creation state - assert len(states) >= 1 - - def test_get_state_history_with_limit(self): - """ - Test get_state_history with limit parameter. - - Validates: - - Limit parameter restricts number of results - - Ordering is correct (most recent first) - """ - project = ProjectFactory(organization=self.org) - - # Get history with limit - history = StateManager.get_state_history(project, limit=10) - - # Should have at least the initial creation state - assert len(history) >= 1 - assert history[0].state == ProjectStateChoices.CREATED - - def test_invalidate_cache(self): - """ - Test cache invalidation for entity state. - - Validates: - - Cache is cleared for specific entity - - Subsequent lookups hit database - """ - project = ProjectFactory(organization=self.org) - - # Get state to populate cache - state = StateManager.get_current_state_value(project) - assert state == ProjectStateChoices.CREATED - - # Invalidate cache - StateManager.invalidate_cache(project) - - # Next lookup should work (will hit DB) - state_after = StateManager.get_current_state_value(project) - assert state_after == ProjectStateChoices.CREATED - - def test_get_current_state_object(self): - """ - Test get_current_state_object returns full state record. - - Validates: - - Returns BaseState instance with full audit information - - Contains all expected fields - """ - project = ProjectFactory(organization=self.org) - - # Get current state object - state_object = StateManager.get_current_state_object(project) - - assert state_object is not None - assert state_object.state == ProjectStateChoices.CREATED - assert hasattr(state_object, 'triggered_by') - assert hasattr(state_object, 'transition_name') - - def test_transition_state_fsm_disabled(self): - """ - Test transition_state returns True when FSM is disabled. - - Validates: - - Returns True without creating state record - - Handles disabled state gracefully - """ - project = ProjectFactory(organization=self.org) - - with patch.object(StateManager, '_is_fsm_enabled', return_value=False): - result = StateManager.transition_state( - entity=project, new_state='NEW_STATE', transition_name='test', user=self.user - ) - assert result is True - - def test_warm_cache_multiple_entities(self): - """ - Test warm_cache with multiple entities for bulk operations. - - Validates: - - Cache is populated for all entities - - Subsequent get_current_state_value calls are fast (from cache) - """ - projects = [ProjectFactory(organization=self.org) for _ in range(3)] - - # Warm cache for all projects - StateManager.warm_cache(projects) - - # Verify all are cached (should not hit DB) - for project in projects: - state = StateManager.get_current_state_value(project) - assert state == ProjectStateChoices.CREATED - - def test_fsm_disabled_via_current_context(self): - """ - Test CurrentContext.set_fsm_disabled() directly. - - Validates: - - Can disable FSM via CurrentContext - - is_fsm_enabled() respects the flag - - State is properly restored - """ - from core.current_request import CurrentContext - - # Initially enabled - assert is_fsm_enabled() is True - - # Disable FSM - CurrentContext.set_fsm_disabled(True) - assert is_fsm_enabled() is False - assert CurrentContext.is_fsm_disabled() is True - - # Re-enable FSM - CurrentContext.set_fsm_disabled(False) - assert is_fsm_enabled() is True - assert CurrentContext.is_fsm_disabled() is False diff --git a/label_studio/fsm/tests/test_registry.py b/label_studio/fsm/tests/test_registry.py index ee42c9ab289c..a2d498ffae2c 100644 --- a/label_studio/fsm/tests/test_registry.py +++ b/label_studio/fsm/tests/test_registry.py @@ -12,6 +12,7 @@ from fsm.registry import ( register_state_model, register_state_transition, + state_choices_registry, state_model_registry, transition_registry, ) @@ -34,6 +35,11 @@ class RegistryTests(TestCase): """Tests for registry functionality and edge cases""" def setUp(self): + # Clear registries to ensure clean state + state_choices_registry.clear() + state_model_registry.clear() + transition_registry.clear() + self.entity = MockEntity() def test_registry_state_model_with_denormalizer(self): diff --git a/label_studio/fsm/tests/test_transitions.py b/label_studio/fsm/tests/test_transitions.py index fa216f80ba9a..a8677896cc89 100644 --- a/label_studio/fsm/tests/test_transitions.py +++ b/label_studio/fsm/tests/test_transitions.py @@ -78,6 +78,13 @@ def setUp(self): self.user = User.objects.create_user(email='test@example.com', password='test123') self.mock_entity = MockEntity() + # Clear registry to avoid test pollution + transition_registry.clear() + + def tearDown(self): + """Clean up after tests""" + transition_registry.clear() + def test_base_transition_class(self): """Test BaseTransition abstract functionality""" @@ -328,6 +335,13 @@ def setUp(self): self.user = User.objects.create_user(email='test@example.com', password='test123') self.mock_entity = MockEntity() + # Clear registry to avoid test pollution + transition_registry.clear() + + def tearDown(self): + """Clean up after tests""" + transition_registry.clear() + def test_get_available_transitions(self): """Test get_available_transitions utility""" @@ -471,6 +485,13 @@ def setUp(self): self.user.id = 123 self.user.username = 'testuser' + # Clear registry to avoid conflicts + transition_registry.clear() + + def tearDown(self): + """Clean up after tests""" + transition_registry.clear() + def test_basic_transition_implementation(self): """ USAGE EXAMPLE: Basic transition implementation @@ -652,6 +673,11 @@ class ValidationAndErrorHandlingTests(TestCase): def setUp(self): self.task = MockTask() + transition_registry.clear() + + def tearDown(self): + """Clean up after tests""" + transition_registry.clear() def test_pydantic_validation_errors(self): """Test Pydantic field validation errors""" diff --git a/label_studio/fsm/tests/test_utils.py b/label_studio/fsm/tests/test_utils.py index dfbcc76fa2b3..7a2e4f798621 100644 --- a/label_studio/fsm/tests/test_utils.py +++ b/label_studio/fsm/tests/test_utils.py @@ -199,6 +199,13 @@ class TransitionUtilsTests(TestCase): """Tests for transition_utils module edge cases and error handling""" def setUp(self): + # Clear registries to ensure clean state + from fsm.registry import state_choices_registry, state_model_registry + + state_choices_registry.clear() + state_model_registry.clear() + transition_registry.clear() + self.entity = MockEntity() def test_transition_utils_unexpected_validation_error(self): @@ -361,178 +368,3 @@ def transition(self, context): flows = get_entity_state_flow(self.entity) # Should not include the transition that requires fields assert not any(f['transition_name'] == 'required_transition' for f in flows) - - -class TestUUID7FieldCoverage(TestCase): - """Test coverage for UUID7Field utility methods""" - - def test_get_latest_by_uuid7(self): - """Test UUID7Field.get_latest_by_uuid7 utility method""" - from fsm.utils import UUID7Field - - mock_first = Mock() - mock_queryset = Mock() - mock_queryset.order_by.return_value.first.return_value = mock_first - - result = UUID7Field.get_latest_by_uuid7(mock_queryset) - - mock_queryset.order_by.assert_called_once_with('-id') - assert result == mock_first - - def test_filter_by_time_range(self): - """Test UUID7Field.filter_by_time_range utility method""" - from fsm.utils import UUID7Field - - start_time = datetime(2024, 1, 1, tzinfo=timezone.utc) - end_time = datetime(2024, 1, 2, tzinfo=timezone.utc) - - mock_filtered = Mock() - mock_queryset = Mock() - mock_queryset.filter.return_value = mock_filtered - - result = UUID7Field.filter_by_time_range(mock_queryset, start_time, end_time) - - assert mock_queryset.filter.called - assert result == mock_filtered - - def test_filter_since_time(self): - """Test UUID7Field.filter_since_time utility method""" - from fsm.utils import UUID7Field - - since_time = datetime(2024, 1, 1, tzinfo=timezone.utc) - - mock_filtered = Mock() - mock_queryset = Mock() - mock_queryset.filter.return_value = mock_filtered - - result = UUID7Field.filter_since_time(mock_queryset, since_time) - - assert mock_queryset.filter.called - assert result == mock_filtered - - -class TestResolveOrganizationIdCoverage(TestCase): - """Test coverage for resolve_organization_id edge cases""" - - def setUp(self): - from core.current_request import CurrentContext - - CurrentContext.clear() - - def tearDown(self): - from core.current_request import CurrentContext - - CurrentContext.clear() - - def test_resolve_organization_id_with_none_entity(self): - """Test resolve_organization_id when entity is None""" - from fsm.utils import resolve_organization_id - - result = resolve_organization_id(entity=None, user=None) - assert result is None - - def test_resolve_organization_id_from_context(self): - """Test resolve_organization_id returns cached context value""" - from core.current_request import CurrentContext - from fsm.utils import resolve_organization_id - - CurrentContext.set_organization_id(999) - - # Even with entity, should return context value - mock_entity = Mock() - mock_entity.organization_id = 123 - - result = resolve_organization_id(entity=mock_entity) - assert result == 999 - - def test_resolve_organization_id_from_entity_direct(self): - """Test resolve_organization_id from entity.organization_id""" - from fsm.utils import resolve_organization_id - - mock_entity = Mock() - mock_entity.organization_id = 456 - - result = resolve_organization_id(entity=mock_entity) - assert result == 456 - - def test_resolve_organization_id_from_project_relationship(self): - """Test resolve_organization_id via entity.project.organization_id""" - from fsm.utils import resolve_organization_id - - mock_project = Mock() - mock_project.organization_id = 789 - - mock_entity = Mock() - mock_entity.organization_id = None - mock_entity.project = mock_project - - result = resolve_organization_id(entity=mock_entity) - assert result == 789 - - def test_resolve_organization_id_from_task_project_relationship(self): - """Test resolve_organization_id via entity.task.project.organization_id""" - from fsm.utils import resolve_organization_id - - mock_project = Mock() - mock_project.organization_id = 321 - - mock_task = Mock() - mock_task.project = mock_project - - mock_entity = Mock() - mock_entity.organization_id = None - mock_entity.project = None - mock_entity.task = mock_task - - result = resolve_organization_id(entity=mock_entity) - assert result == 321 - - def test_resolve_organization_id_from_user_active_organization(self): - """Test resolve_organization_id from user.active_organization""" - from fsm.utils import resolve_organization_id - - mock_active_org = Mock() - mock_active_org.id = 654 - - mock_user = Mock() - mock_user.active_organization = mock_active_org - - mock_entity = Mock() - mock_entity.organization_id = None - mock_entity.project = None - mock_entity.task = None - - result = resolve_organization_id(entity=mock_entity, user=mock_user) - assert result == 654 - - def test_resolve_organization_id_caches_result(self): - """Test that resolve_organization_id caches the result in CurrentContext""" - from core.current_request import CurrentContext - from fsm.utils import resolve_organization_id - - mock_entity = Mock() - mock_entity.organization_id = 987 - - result = resolve_organization_id(entity=mock_entity) - assert result == 987 - - # Verify it's cached - cached = CurrentContext.get_organization_id() - assert cached == 987 - - -class TestGetCurrentStateSafeCoverage(TestCase): - """Test coverage for get_current_state_safe error handling""" - - def test_get_current_state_safe_when_fsm_disabled(self): - """Test get_current_state_safe returns None when FSM is disabled""" - from fsm.utils import get_current_state_safe - - mock_entity = Mock() - mock_entity.pk = 1 - mock_entity._meta = Mock() - mock_entity._meta.label_lower = 'test.entity' - - with patch('fsm.utils.is_fsm_enabled', return_value=False): - result = get_current_state_safe(mock_entity) - assert result is None diff --git a/label_studio/fsm/transition_executor.py b/label_studio/fsm/transition_executor.py index 1e3773577a76..ff3a8ed5cf99 100644 --- a/label_studio/fsm/transition_executor.py +++ b/label_studio/fsm/transition_executor.py @@ -10,8 +10,8 @@ from typing import Any, Dict, Type from django.db.models import Model +from fsm.models import BaseState from fsm.registry import get_state_model_for_entity, transition_registry -from fsm.state_models import BaseState from fsm.transitions import TransitionContext logger = logging.getLogger(__name__) @@ -54,38 +54,26 @@ def execute_transition_with_state_manager( if not transition_class: raise ValueError(f"Transition '{transition_name}' not found for entity '{entity_name}'") - # Create transition instance to check if it needs state tracking - transition = transition_class(**transition_data) - - # Check if this is a "side-effect only" transition (no state tracking) - is_side_effect_only = transition.target_state is None + # Get the state model for the entity + state_model = get_state_model_for_entity(entity) + if not state_model: + raise ValueError(f"No state model registered for entity '{entity_name}'") - if is_side_effect_only: - # No state model needed for side-effect only transitions - state_model = None - current_state_object = None - current_state = None - else: - # Get the state model for the entity - state_model = get_state_model_for_entity(entity) - if not state_model: - raise ValueError(f"No state model registered for entity '{entity_name}'") + # Create transition instance with provided data + transition = transition_class(**transition_data) - # Get current state information directly from state model - current_state_object = state_model.get_current_state(entity) - current_state = current_state_object.state if current_state_object else None + # Get current state information directly from state model + current_state_object = state_model.get_current_state(entity) + current_state = current_state_object.state if current_state_object else None # Build transition context - # Extract organization_id from context_kwargs if provided, otherwise use entity's org_id - organization_id = context_kwargs.pop('organization_id', getattr(entity, 'organization_id', None)) - context = TransitionContext( entity=entity, current_user=user, current_state_object=current_state_object, current_state=current_state, target_state=transition.target_state, - organization_id=organization_id, + organization_id=getattr(entity, 'organization_id', None), **context_kwargs, ) @@ -106,24 +94,7 @@ def execute_transition_with_state_manager( # Phase 1: Prepare and validate the transition transition_context_data = transition.prepare_and_validate(context) - # Phase 2: Create the state record via StateManager methods (skip for side-effect only transitions) - if is_side_effect_only: - # For side-effect only transitions, execute hooks without creating state records - logger.info( - 'Executing side-effect only transition', - extra={ - 'event': 'fsm.side_effect_transition', - 'entity_type': entity_name, - 'entity_id': entity.pk, - 'transition_name': transition_name, - }, - ) - transition.post_transition_hook(context, None) - return None - - # Check if this transition forces state record creation (for audit trails) - force_state_record = getattr(transition, '_force_state_record', False) - + # Phase 2: Create the state record via StateManager methods success = state_manager_class.transition_state( entity=entity, new_state=transition.target_state, @@ -131,7 +102,6 @@ def execute_transition_with_state_manager( user=user, context=transition_context_data, reason=transition.get_reason(context), - force_state_record=force_state_record, ) if not success: diff --git a/label_studio/fsm/transitions.py b/label_studio/fsm/transitions.py index 0bbcd0d67ebb..ba9cbb6e2df9 100644 --- a/label_studio/fsm/transitions.py +++ b/label_studio/fsm/transitions.py @@ -10,11 +10,14 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, Generic, Optional, TypeVar +from django.contrib.auth import get_user_model from django.db.models import Model from pydantic import BaseModel, ConfigDict, Field +User = get_user_model() + if TYPE_CHECKING: - from fsm.state_models import BaseState + from fsm.models import BaseState # Type variables for generic transition context EntityType = TypeVar('EntityType', bound=Model) @@ -132,15 +135,10 @@ def transition_name(self) -> str: """ Name of this transition for audit purposes. - Returns the registered transition name from the decorator, or falls back - to the class name in snake_case. + Defaults to the class name in snake_case. """ - # Use the registered name if available (set by @register_state_transition decorator) - if hasattr(self.__class__, '_transition_name'): - return self.__class__._transition_name - - # Fallback to class name in snake_case for backward compatibility class_name = self.__class__.__name__ + # Convert CamelCase to snake_case result = '' for i, char in enumerate(class_name): if char.isupper() and i > 0: @@ -324,146 +322,3 @@ def finalize(self, context: TransitionContext[EntityType, StateModelType], state finally: # Always clear context when done self.context = None - - -class ModelChangeTransition(BaseTransition, Generic[EntityType, StateModelType]): - """ - Specialized transition class for model-triggered state changes. - - This class extends BaseTransition with additional context about model changes, - making it ideal for transitions triggered by FsmHistoryStateModel.save() operations. - - Features: - - Access to changed fields (old vs new values) - - Knowledge of whether entity is being created or updated - - Automatic integration with FsmHistoryStateModel lifecycle - - Declarative trigger field specification - - Example usage: - @register_state_transition('task', 'task_created', triggers_on_create=True) - class TaskCreatedTransition(ModelChangeTransition[Task, TaskState]): - @property - def target_state(self) -> str: - return 'CREATED' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - return {'reason': 'Task created'} - - @register_state_transition('task', 'task_labeled', triggers_on=['is_labeled']) - class TaskLabeledTransition(ModelChangeTransition[Task, TaskState]): - @property - def target_state(self) -> str: - return 'ANNOTATION_COMPLETE' - - def transition(self, context: TransitionContext) -> Dict[str, Any]: - return {'reason': 'Task became labeled'} - """ - - # Additional fields specific to model changes - changed_fields: Dict[str, Dict[str, Any]] = Field( - default_factory=dict, description="Fields that changed: {field_name: {'old': value, 'new': value}}" - ) - is_creating: bool = Field(default=False, description='Whether this is a new entity creation') - - # Class-level metadata for trigger configuration (set by decorator) - _triggers_on_create: bool = False - _triggers_on_update: bool = True - _trigger_fields: list = [] # Fields that trigger this transition - - def should_execute(self, context: TransitionContext[EntityType, StateModelType]) -> bool: - """ - Determine if this transition should execute based on model changes. - - Override in subclasses to provide specific logic based on: - - Whether entity is being created (is_creating) - - Which fields changed (changed_fields) - - Current and target states - - Default implementation always returns True. - - Args: - context: The transition context with entity and state information - - Returns: - True if transition should execute, False to skip - - Example: - def should_execute(self, context: TransitionContext) -> bool: - # Only execute if is_labeled changed to True - if 'is_labeled' in self.changed_fields: - old_val = self.changed_fields['is_labeled']['old'] - new_val = self.changed_fields['is_labeled']['new'] - return not old_val and new_val - return False - """ - return True - - def validate_transition(self, context: TransitionContext[EntityType, StateModelType]) -> bool: - """ - Validate whether this transition should execute. - - Extends parent validation with should_execute() check. - - Args: - context: The transition context - - Returns: - True if transition is valid and should execute - - Raises: - TransitionValidationError: If validation fails - """ - # First check parent validation - if not super().validate_transition(context): - return False - - # Then check if we should execute based on model changes - if not self.should_execute(context): - return False - - return True - - @classmethod - def from_model_change( - cls, is_creating: bool, changed_fields: Dict[str, tuple], **extra_data - ) -> 'ModelChangeTransition': - """ - Factory method to create a transition from model change data. - - This is called by FsmHistoryStateModel when a transition needs to be executed. - - Args: - is_creating: Whether the model is being created - changed_fields: Dict of changed fields (field_name -> (old, new)) - **extra_data: Additional data to pass to the transition - - Returns: - Configured transition instance - """ - # Convert changed_fields from tuple format to dict format - converted_fields = { - field_name: {'old': old_val, 'new': new_val} for field_name, (old_val, new_val) in changed_fields.items() - } - - return cls(is_creating=is_creating, changed_fields=converted_fields, **extra_data) - - def get_reason(self, context: TransitionContext[EntityType, StateModelType]) -> str: - """ - Get a human-readable reason for this model change transition. - - Override to provide more specific reasons based on model changes. - - Args: - context: The transition context - - Returns: - Human-readable reason string - """ - if self.is_creating: - return f'{context.entity.__class__.__name__} created' - - if self.changed_fields: - fields = ', '.join(self.changed_fields.keys()) - return f'{context.entity.__class__.__name__} updated ({fields} changed)' - - return f'{context.entity.__class__.__name__} modified' diff --git a/label_studio/fsm/utils.py b/label_studio/fsm/utils.py index 8f3b9f67f992..2aa6ae733a43 100644 --- a/label_studio/fsm/utils.py +++ b/label_studio/fsm/utils.py @@ -1,29 +1,17 @@ """ -FSM utility functions. - -This module provides: -1. UUID7 utilities for time-series optimization (uses uuid-utils library) -2. FSM-specific helper functions for organization resolution and state management +UUID7 utilities for time-series optimization. UUID7 provides natural time ordering and global uniqueness, making it ideal for INSERT-only architectures with millions of records. + +Uses the uuid-utils library for RFC 9562 compliant UUID7 generation. """ -import logging import uuid from datetime import datetime, timezone from typing import Optional, Tuple import uuid_utils -from core.current_request import CurrentContext -from core.feature_flags import flag_set - -logger = logging.getLogger(__name__) - - -# ============================================================================= -# UUID7 Utilities (using uuid-utils library) -# ============================================================================= def generate_uuid7() -> uuid.UUID: @@ -200,308 +188,3 @@ def generate(self, offset_ms: int = 0) -> uuid.UUID: # UUID7 format: timestamp_ms(48) + ver(4) + rand_a(12) + var(2) + rand_b(62) uuid_int = (timestamp_ms << 80) | (0x7 << 76) | ((self._counter & 0xFFF) << 64) | (0b10 << 62) return uuid.UUID(int=uuid_int) - - -# ============================================================================= -# FSM Helper Utilities -# ============================================================================= - - -def resolve_organization_id(entity=None, user=None): - """ - Resolve organization_id using consistent logic without additional queries. - - This provides organization_id resolution for logging and state tracking - without duplicating database queries. - - Args: - entity: The entity to resolve organization_id for - user: Optional user for fallback organization resolution - - Returns: - organization_id or None - """ - # Try context cache first - organization_id = CurrentContext.get_organization_id() - if organization_id: - return organization_id - - # Allow for function calls without entity - if entity is None: - return None - - # Try direct organization_id attribute first - organization_id = getattr(entity, 'organization_id', None) - - # If entity doesn't have direct organization_id, try relationships - if not organization_id: - # For entities with project relationship (most common case) - if hasattr(entity, 'project') and entity.project: - organization_id = getattr(entity.project, 'organization_id', None) - # For entities with task.project relationship - elif hasattr(entity, 'task') and entity.task and hasattr(entity.task, 'project') and entity.task.project: - organization_id = getattr(entity.task.project, 'organization_id', None) - - # Fallback to user's active organization - if not organization_id and user and hasattr(user, 'active_organization') and user.active_organization: - organization_id = user.active_organization.id - - # Cache the result in current context if we found an organization_id - if organization_id is not None: - CurrentContext.set_organization_id(organization_id) - - return organization_id - - -def is_fsm_enabled(user=None) -> bool: - """ - Check if FSM is enabled via feature flags and thread-local override. - - The check order is: - 1. Check thread-local override (for test cleanup, bulk operations) - 2. Check feature flag - - Args: - user: User for feature flag evaluation (optional) - - Returns: - True if FSM should be active - """ - # Check thread-local override first - if CurrentContext.is_fsm_disabled(): - return False - - # Then check feature flag - return flag_set('fflag_feat_fit_568_finite_state_management', user=user) - - -def get_current_state_safe(entity, user=None) -> Optional[str]: - """ - Safely get current state with error handling. - - Args: - entity: The entity to get state for - user: The user making the request (for feature flag checking) - - Returns: - Current state string or None if failed - """ - if not is_fsm_enabled(user): - return None - - try: - from fsm.state_manager import get_state_manager - - StateManager = get_state_manager() - return StateManager.get_current_state_value(entity) - except Exception as e: - logger.warning( - f'Failed to get current state for {entity._meta.label_lower} {entity.pk}: {str(e)}', - extra={ - 'event': 'fsm.get_state_error', - 'entity_type': entity._meta.label_lower, - 'entity_id': entity.pk, - 'organization_id': resolve_organization_id(entity, user), - 'error': str(e), - }, - ) - return None - - -def infer_entity_state_from_data(entity) -> Optional[str]: - """ - Infer what the FSM state should be based on entity's current data. - - This is used for "cold start" scenarios where entities exist in the database - but don't have FSM state records yet (e.g., after FSM deployment to production - with pre-existing data). - - Args: - entity: The entity to infer state for (Task, Project, or Annotation) - - Returns: - Inferred state value, or None if entity type not supported - - Examples: - >>> task = Task.objects.get(id=123) - >>> task.is_labeled = True - >>> infer_entity_state_from_data(task) - 'COMPLETED' - - >>> project = Project.objects.get(id=456) - >>> infer_entity_state_from_data(project) - 'CREATED' - """ - from fsm.state_choices import AnnotationStateChoices, ProjectStateChoices, TaskStateChoices - - entity_type = entity._meta.model_name.lower() - - if entity_type == 'task': - # Task state depends on whether it has been labeled - return TaskStateChoices.COMPLETED if entity.is_labeled else TaskStateChoices.CREATED - elif entity_type == 'project': - # Project state depends on task completion - # If no tasks exist, project is CREATED - # If any tasks are completed, project is at least IN_PROGRESS - # If all tasks are completed, project is COMPLETED - tasks = entity.tasks.all() - if not tasks.exists(): - return ProjectStateChoices.CREATED - - # Count labeled tasks to determine project state - total_tasks = tasks.count() - labeled_tasks = tasks.filter(is_labeled=True).count() - - if labeled_tasks == 0: - return ProjectStateChoices.CREATED - elif labeled_tasks == total_tasks: - return ProjectStateChoices.COMPLETED - else: - return ProjectStateChoices.IN_PROGRESS - elif entity_type == 'annotation': - # Annotations are SUBMITTED when created - return AnnotationStateChoices.SUBMITTED - else: - logger.warning( - f'Cannot infer state for unknown entity type: {entity_type}', - extra={ - 'event': 'fsm.infer_state_unknown_type', - 'entity_type': entity_type, - 'entity_id': entity.pk, - }, - ) - return None - - -def get_or_initialize_state(entity, user=None, inferred_state=None) -> Optional[str]: - """ - Get current state, or initialize it if it doesn't exist. - - This function handles "cold start" scenarios where pre-existing entities - don't have FSM state records. It will: - 1. Try to get the current state - 2. If None, infer the state from entity data - 3. Initialize the state with an appropriate transition - 4. Return the state value (never returns None if initialization succeeds) - - Args: - entity: The entity to get or initialize state for - user: User for FSM context (optional) - inferred_state: Pre-computed inferred state (optional, will compute if not provided) - - Returns: - Current or newly initialized state value, or None if FSM disabled or failed - - Examples: - >>> task = Task.objects.get(id=123) # Pre-existing task without state - >>> state = get_or_initialize_state(task, user=request.user) - >>> # state is now 'COMPLETED' or 'CREATED' based on task.is_labeled - >>> # and a state record has been created - """ - if not is_fsm_enabled(user): - return None - - try: - from fsm.state_manager import get_state_manager - - StateManager = get_state_manager() - - # Try to get existing state - current_state = StateManager.get_current_state_value(entity) - - if current_state is not None: - # State already exists, return it - return current_state - - # No state exists - need to initialize it - if inferred_state is None: - inferred_state = infer_entity_state_from_data(entity) - - if inferred_state is None: - logger.warning( - f'Cannot initialize state for {entity._meta.model_name} {entity.pk} - inference failed', - extra={ - 'event': 'fsm.initialize_state_failed', - 'entity_type': entity._meta.model_name, - 'entity_id': entity.pk, - }, - ) - return None - - # Initialize state with appropriate transition - entity_type = entity._meta.model_name.lower() - transition_name = _get_initialization_transition_name(entity_type, inferred_state) - - if transition_name: - logger.info( - f'Initializing FSM state for pre-existing {entity_type} {entity.pk}', - extra={ - 'event': 'fsm.cold_start_initialization', - 'entity_type': entity_type, - 'entity_id': entity.pk, - 'inferred_state': inferred_state, - 'transition_name': transition_name, - }, - ) - StateManager.execute_transition(entity=entity, transition_name=transition_name, user=user) - return inferred_state - else: - logger.warning( - f'No initialization transition found for {entity_type} -> {inferred_state}', - extra={ - 'event': 'fsm.no_initialization_transition', - 'entity_type': entity_type, - 'entity_id': entity.pk, - 'inferred_state': inferred_state, - }, - ) - return None - - except Exception as e: - logger.error( - f'Failed to get or initialize state for {entity._meta.model_name} {entity.pk}: {str(e)}', - extra={ - 'event': 'fsm.get_or_initialize_error', - 'entity_type': entity._meta.model_name, - 'entity_id': entity.pk, - 'error': str(e), - }, - exc_info=True, - ) - return None - - -def _get_initialization_transition_name(entity_type: str, target_state: str) -> Optional[str]: - """ - Get the appropriate transition name for initializing an entity to a target state. - - Args: - entity_type: Type of entity ('task', 'project', 'annotation') - target_state: The target state to initialize to - - Returns: - Transition name, or None if no appropriate transition exists - """ - from fsm.state_choices import AnnotationStateChoices, ProjectStateChoices, TaskStateChoices - - if entity_type == 'task': - if target_state == TaskStateChoices.CREATED: - return 'task_created' - elif target_state == TaskStateChoices.COMPLETED: - return 'task_completed' - elif target_state == TaskStateChoices.IN_PROGRESS: - return 'task_in_progress' - elif entity_type == 'project': - if target_state == ProjectStateChoices.CREATED: - return 'project_created' - elif target_state == ProjectStateChoices.IN_PROGRESS: - return 'project_in_progress' - elif target_state == ProjectStateChoices.COMPLETED: - return 'project_completed' - elif entity_type == 'annotation': - if target_state == AnnotationStateChoices.SUBMITTED: - return 'annotation_submitted' - elif target_state == AnnotationStateChoices.COMPLETED: - return 'annotation_submitted' # Use submitted transition for initialization - - return None diff --git a/label_studio/io_storages/base_models.py b/label_studio/io_storages/base_models.py index 137d7cd3c983..d5b07501ff1b 100644 --- a/label_studio/io_storages/base_models.py +++ b/label_studio/io_storages/base_models.py @@ -18,7 +18,7 @@ import rq import rq.exceptions from core.feature_flags import flag_set -from core.redis import is_job_in_queue, is_job_on_worker, redis_connected, start_job_async_or_sync +from core.redis import is_job_in_queue, is_job_on_worker, redis_connected from core.utils.common import load_func from core.utils.iterators import iterate_queryset from data_export.serializers import ExportDataSerializer @@ -474,8 +474,7 @@ def add_task(cls, project, maximum_annotations, max_inner_id, storage, link_obje data.pop('data') with transaction.atomic(): - # Create task without skip_fsm (it's not a model field) - task = Task( + task = Task.objects.create( data=data, project=project, overlap=maximum_annotations, @@ -485,8 +484,6 @@ def add_task(cls, project, maximum_annotations, max_inner_id, storage, link_obje cancelled_annotations=cancelled_annotations, inner_id=max_inner_id, ) - # Save with skip_fsm flag to bypass FSM during bulk import - task.save(skip_fsm=True) link_class.create(task, storage=storage, **link_kwargs) logger.debug(f'Create {storage.__class__.__name__} link with {link_kwargs} for {task=}') @@ -643,12 +640,6 @@ def _scan_and_create_links(self, link_class): self.project.organization, self.project, WebhookAction.TASKS_CREATED, tasks_for_webhook ) - # Create initial FSM states for all tasks created during storage sync - # CurrentContext is now available because we use start_job_async_or_sync - from fsm.functions import backfill_fsm_states_for_tasks - - backfill_fsm_states_for_tasks(self.id, tasks_created, link_class) - self.project.update_tasks_states( maximum_annotations_changed=False, overlap_cohort_percentage_changed=False, tasks_number_changed=True ) @@ -674,13 +665,10 @@ def sync(self): ): if not self.info_set_queued(): return - # Use start_job_async_or_sync to automatically capture and restore CurrentContext - # This ensures user_id, organization_id, and request_id are available in the worker - sync_job = start_job_async_or_sync( + sync_job = queue.enqueue( import_sync_background, self.__class__, self.id, - queue_name='low', meta=meta, project_id=self.project.id, organization_id=self.project.organization.id, diff --git a/label_studio/io_storages/tests/test_multitask_import.py b/label_studio/io_storages/tests/test_multitask_import.py index 49c0be619c00..1cf591957fa2 100644 --- a/label_studio/io_storages/tests/test_multitask_import.py +++ b/label_studio/io_storages/tests/test_multitask_import.py @@ -72,9 +72,6 @@ def _test_storage_import(project, storage_class, task_data, **storage_kwargs): # Setup storage with required credentials storage = storage_class(project=project, **storage_kwargs) - # Save the storage to the database before syncing - storage.save() - # Validate connection before sync try: storage.validate_connection() @@ -82,11 +79,8 @@ def _test_storage_import(project, storage_class, task_data, **storage_kwargs): pytest.fail(f'Storage connection validation failed: {str(e)}') # Sync storage - # Mock redis_connected to force synchronous execution in tests - import mock - - with mock.patch('io_storages.base_models.redis_connected', return_value=False): - storage.sync() + # Don't have to wait for sync to complete because it's blocking without rq + storage.sync() # Validate tasks were imported correctly tasks_response = client.get(f'/api/tasks?project={project.id}') diff --git a/label_studio/jwt_auth/auth.py b/label_studio/jwt_auth/auth.py index 54f0c32b08a3..d0bcc710ec53 100644 --- a/label_studio/jwt_auth/auth.py +++ b/label_studio/jwt_auth/auth.py @@ -14,16 +14,9 @@ class TokenAuthenticationPhaseout(TokenAuthentication): def authenticate(self, request): """Authenticate the request and log if successful.""" - from core.current_request import CurrentContext from core.feature_flags import flag_set auth_result = super().authenticate(request) - - # Update CurrentContext with authenticated user - if auth_result is not None: - user, _ = auth_result - CurrentContext.set_user(user) - JWT_ACCESS_TOKEN_ENABLED = flag_set('fflag__feature_develop__prompts__dia_1829_jwt_token_auth') if JWT_ACCESS_TOKEN_ENABLED and (auth_result is not None): user, _ = auth_result diff --git a/label_studio/projects/apps.py b/label_studio/projects/apps.py deleted file mode 100644 index 0392e3f6c0c6..000000000000 --- a/label_studio/projects/apps.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Projects Django App Configuration""" - -import logging - -from django.apps import AppConfig - -logger = logging.getLogger(__name__) - - -class ProjectsConfig(AppConfig): - name = 'projects' - - def ready(self): - """ - Projects app initialization. - - Note: FSM transitions are now registered centrally in fsm/apps.py. - Do NOT import transitions here to avoid duplicate registration. - """ - pass diff --git a/label_studio/projects/models.py b/label_studio/projects/models.py index 695e131f9e42..0992e1a4a13d 100644 --- a/label_studio/projects/models.py +++ b/label_studio/projects/models.py @@ -34,7 +34,6 @@ from django.db.models.expressions import RawSQL from django.utils.functional import cached_property from django.utils.translation import gettext_lazy as _ -from fsm.models import FsmHistoryStateModel from label_studio_sdk._extensions.label_studio_tools.core.label_config import parse_config from labels_manager.models import Label from projects.functions import ( @@ -126,7 +125,7 @@ def get_queryset(self): recalculate_all_stats = load_func(settings.RECALCULATE_ALL_STATS) -class Project(ProjectMixin, FsmHistoryStateModel): +class Project(ProjectMixin, models.Model): class SkipQueue(models.TextChoices): # requeue to the end of the same annotator’s queue => annotator gets this task at the end of the queue REQUEUE_FOR_ME = 'REQUEUE_FOR_ME', 'Requeue for me' @@ -853,12 +852,6 @@ def save(self, *args, update_fields=None, recalc=True, **kwargs): elif self.num_annotations == 0 and self.num_drafts == 0: summary.reset(tasks_data_based=False) - # ============================================================================ - # FSM Integration - # ============================================================================ - # Project uses FsmHistoryStateModel for FSM integration. All transition logic is defined - # in projects/transitions.py with declarative triggers. No custom methods needed. - def get_member_ids(self): if hasattr(self, 'team_link'): # project has defined team scope diff --git a/label_studio/pytest.ini b/label_studio/pytest.ini index 96fe9aff4396..4e5057f25638 100644 --- a/label_studio/pytest.ini +++ b/label_studio/pytest.ini @@ -8,5 +8,3 @@ env = D:SENTRY_DSN= D:USE_ENFORCE_CSRF_CHECKS=0 D:TEST_ENVIRONMENT=1 - COLLECT_ANALYTICS=0 - fflag_feat_fit_568_finite_state_management=1 diff --git a/label_studio/tasks/apps.py b/label_studio/tasks/apps.py deleted file mode 100644 index dc0c898fefbe..000000000000 --- a/label_studio/tasks/apps.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Tasks Django App Configuration""" - -import logging - -from django.apps import AppConfig - -logger = logging.getLogger(__name__) - - -class TasksConfig(AppConfig): - name = 'tasks' - - def ready(self): - """ - Tasks app initialization. - - Note: FSM transitions are now registered centrally in fsm/apps.py. - Do NOT import transitions here to avoid duplicate registration. - """ - pass diff --git a/label_studio/tasks/models.py b/label_studio/tasks/models.py index 816b0a6b0bae..90182b5e6df3 100644 --- a/label_studio/tasks/models.py +++ b/label_studio/tasks/models.py @@ -37,7 +37,6 @@ from django.utils.timesince import timesince from django.utils.timezone import now from django.utils.translation import gettext_lazy as _ -from fsm.models import FsmHistoryStateModel from label_studio_sdk.label_interface.objects import PredictionValue from rest_framework.exceptions import ValidationError from tasks.choices import ActionType @@ -47,7 +46,7 @@ TaskMixin = load_func(settings.TASK_MIXIN) -class Task(TaskMixin, FsmHistoryStateModel): +class Task(TaskMixin, models.Model): """Business tasks from project""" id = models.AutoField( @@ -585,7 +584,7 @@ def bulk_create(self, objs, batch_size=None): AnnotationMixin = load_func(settings.ANNOTATION_MIXIN) -class Annotation(AnnotationMixin, FsmHistoryStateModel): +class Annotation(AnnotationMixin, models.Model): """Annotations & Labeling results""" objects = AnnotationManager() @@ -754,7 +753,7 @@ def update_task(self): self.task.updated_by = request.user update_fields.append('updated_by') - self.task.save(update_fields=update_fields, skip_fsm=True) + self.task.save(update_fields=update_fields) def save(self, *args, update_fields=None, **kwargs): request = get_current_request() @@ -774,24 +773,13 @@ def save(self, *args, update_fields=None, **kwargs): return result def delete(self, *args, **kwargs): - # Store task and project references before deletion - result = super().delete(*args, **kwargs) self.update_task() self.on_delete_update_counters() - return result - def _update_task_state_after_deletion(self, task, project): - """Update task FSM state after annotation deletion.""" - from fsm.functions import update_task_state_after_annotation_deletion - - update_task_state_after_annotation_deletion(task, project) - def on_delete_update_counters(self): task = self.task - project = self.project - logger.debug(f'Start updating counters for task {task.id}.') if self.was_cancelled: cancelled = task.annotations.all().filter(was_cancelled=True).count() @@ -806,15 +794,12 @@ def on_delete_update_counters(self): task.update_is_labeled() Task.objects.filter(id=task.id).update(is_labeled=task.is_labeled) - # FSM: Update task state - self._update_task_state_after_deletion(task, project) - # remove annotation counters in project summary followed by deleting an annotation logger.debug('Remove annotation counters in project summary followed by deleting an annotation') self.decrease_project_summary_counters() -class TaskLock(FsmHistoryStateModel): +class TaskLock(models.Model): task = models.ForeignKey( 'tasks.Task', on_delete=models.CASCADE, @@ -832,7 +817,7 @@ class TaskLock(FsmHistoryStateModel): created_at = models.DateTimeField(_('created at'), auto_now_add=True, help_text='Creation time', null=True) -class AnnotationDraft(FsmHistoryStateModel): +class AnnotationDraft(models.Model): result = JSONField(_('result'), help_text='Draft result in JSON format') lead_time = models.FloatField( _('lead time'), @@ -1012,7 +997,7 @@ def update_task(self): self.task.updated_by = request.user update_fields.append('updated_by') - self.task.save(update_fields=update_fields, skip_fsm=True) + self.task.save(update_fields=update_fields) def save(self, *args, update_fields=None, **kwargs): if self.project_id is None and self.task_id: @@ -1322,7 +1307,7 @@ def update_project_summary_annotations_and_is_labeled(sender, instance, created, else: instance.task.total_annotations = instance.task.annotations.all().filter(was_cancelled=False).count() instance.task.update_is_labeled() - instance.task.save(update_fields=['is_labeled', 'total_annotations', 'cancelled_annotations'], skip_fsm=True) + instance.task.save(update_fields=['is_labeled', 'total_annotations', 'cancelled_annotations']) logger.debug(f'Updated total_annotations and cancelled_annotations for {instance.task.id}.') diff --git a/label_studio/tests/test_fsm_lso_workflows.py b/label_studio/tests/test_fsm_lso_workflows.py deleted file mode 100644 index 7cab0b6255db..000000000000 --- a/label_studio/tests/test_fsm_lso_workflows.py +++ /dev/null @@ -1,684 +0,0 @@ -""" -LSO FSM Workflow Tests - -Tests FSM state tracking through realistic user workflows using the SDK/API. -Validates that FSM correctly tracks state changes during actual user journeys. - -This test file focuses on LSO-specific functionality: -- Project lifecycle: CREATED -> IN_PROGRESS -> COMPLETED -- Task lifecycle: CREATED -> COMPLETED -> IN_PROGRESS -> COMPLETED -- Annotation lifecycle: SUBMITTED (on create), SUBMITTED (on update) - -LSE-specific transitions (reviews, project settings, annotation drafts) are tested in LSE. -""" - -import pytest -from fsm.state_choices import AnnotationStateChoices, ProjectStateChoices, TaskStateChoices -from fsm.state_manager import StateManager -from label_studio_sdk.client import LabelStudio -from projects.models import Project -from tasks.models import Annotation, Task - -pytestmark = pytest.mark.django_db - - -# Helper functions - - -def assert_project_state(project_id, expected_state): - """Assert project has expected FSM state""" - project = Project.objects.get(pk=project_id) - actual = StateManager.get_current_state_value(project) - assert actual == expected_state, f'Expected project state {expected_state}, got {actual}' - - -def assert_task_state(task_id, expected_state): - """Assert task has expected FSM state""" - task = Task.objects.get(pk=task_id) - actual = StateManager.get_current_state_value(task) - assert actual == expected_state, f'Expected task state {expected_state}, got {actual}' - - -def assert_annotation_state(annotation_id, expected_state): - """Assert annotation has expected FSM state""" - annotation = Annotation.objects.get(pk=annotation_id) - actual = StateManager.get_current_state_value(annotation) - assert actual == expected_state, f'Expected annotation state {expected_state}, got {actual}' - - -class TestProjectWorkflows: - """Test project FSM state tracking through realistic workflows""" - - def test_project_creation_workflow(self, django_live_url, business_client): - """ - User creates project -> Project state = CREATED - - Validates: - - Project is created with CREATED state - - FSM captures project creation - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - # Create project via SDK - project = ls.projects.create( - title='Test Project - Creation Workflow', - label_config='', - ) - - # Verify project state - assert_project_state(project.id, ProjectStateChoices.CREATED) - - def test_project_in_progress_workflow(self, django_live_url, business_client): - """ - First annotation on any task -> Project CREATED -> IN_PROGRESS - - Validates: - - Project starts in CREATED state - - First annotation submission triggers project IN_PROGRESS - - Task transitions to COMPLETED - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - # Create project and tasks - project = ls.projects.create( - title='Test Project - In Progress Workflow', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - ls.tasks.create(project=project.id, data={'text': 'Task 2'}) - - # Verify initial states - assert_project_state(project.id, ProjectStateChoices.CREATED) - tasks = list(ls.tasks.list(project=project.id)) - assert len(tasks) == 2 - assert_task_state(tasks[0].id, TaskStateChoices.CREATED) - - # Submit annotation on first task - ls.annotations.create( - id=tasks[0].id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - - # Verify task completed and project in progress - assert_task_state(tasks[0].id, TaskStateChoices.COMPLETED) - assert_project_state(project.id, ProjectStateChoices.IN_PROGRESS) - - def test_project_completion_workflow(self, django_live_url, business_client): - """ - All tasks completed -> Project IN_PROGRESS -> COMPLETED - - Validates: - - Project moves to COMPLETED when all tasks are completed - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - # Create project and tasks - project = ls.projects.create( - title='Test Project - Completion Workflow', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - ls.tasks.create(project=project.id, data={'text': 'Task 2'}) - - tasks = list(ls.tasks.list(project=project.id)) - - # Submit annotation on first task -> project IN_PROGRESS - ls.annotations.create( - id=tasks[0].id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - assert_project_state(project.id, ProjectStateChoices.IN_PROGRESS) - - # Submit annotation on second task -> project COMPLETED - ls.annotations.create( - id=tasks[1].id, - result=[{'value': {'choices': ['negative']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - - # Verify all tasks completed and project completed - assert_task_state(tasks[0].id, TaskStateChoices.COMPLETED) - assert_task_state(tasks[1].id, TaskStateChoices.COMPLETED) - assert_project_state(project.id, ProjectStateChoices.COMPLETED) - - def test_project_back_to_in_progress_workflow(self, django_live_url, business_client): - """ - Task becomes incomplete -> Project COMPLETED -> IN_PROGRESS - - Validates: - - Deleting annotations from a task moves task to IN_PROGRESS - - Project transitions back to IN_PROGRESS when any task is incomplete - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - # Create project and tasks - project = ls.projects.create( - title='Test Project - Back to In Progress', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - ls.tasks.create(project=project.id, data={'text': 'Task 2'}) - - tasks = list(ls.tasks.list(project=project.id)) - - # Complete both tasks - annotation1 = ls.annotations.create( - id=tasks[0].id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - ls.annotations.create( - id=tasks[1].id, - result=[{'value': {'choices': ['negative']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - assert_project_state(project.id, ProjectStateChoices.COMPLETED) - - # Delete annotation from first task - ls.annotations.delete(id=annotation1.id) - - # Verify task moved to IN_PROGRESS and project back to IN_PROGRESS - assert_task_state(tasks[0].id, TaskStateChoices.IN_PROGRESS) - assert_project_state(project.id, ProjectStateChoices.IN_PROGRESS) - - -class TestTaskWorkflows: - """Test task FSM state tracking through realistic workflows""" - - def test_task_import_workflow(self, django_live_url, business_client): - """ - User imports tasks -> Each task state = CREATED - - Validates: - - Tasks are created with CREATED state - - FSM captures task creation - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - project = ls.projects.create( - title='Test Project - Task Import', - label_config='', - ) - - # Create tasks via SDK - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - ls.tasks.create(project=project.id, data={'text': 'Task 2'}) - ls.tasks.create(project=project.id, data={'text': 'Task 3'}) - - # Verify all tasks are CREATED - tasks = list(ls.tasks.list(project=project.id)) - assert len(tasks) == 3 - for task in tasks: - assert_task_state(task.id, TaskStateChoices.CREATED) - - def test_task_completion_workflow(self, django_live_url, business_client): - """ - First annotation submitted -> Task CREATED -> COMPLETED - - Validates: - - Task transitions to COMPLETED when annotation is submitted - - Annotation is in SUBMITTED state - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - project = ls.projects.create( - title='Test Project - Task Completion', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - - tasks = list(ls.tasks.list(project=project.id)) - task_id = tasks[0].id - - # Verify initial state - assert_task_state(task_id, TaskStateChoices.CREATED) - - # Submit annotation - annotation = ls.annotations.create( - id=task_id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - - # Verify task completed - assert_task_state(task_id, TaskStateChoices.COMPLETED) - assert_annotation_state(annotation.id, AnnotationStateChoices.SUBMITTED) - - def test_task_in_progress_workflow(self, django_live_url, business_client): - """ - All annotations deleted -> Task COMPLETED -> IN_PROGRESS - - Validates: - - Task transitions to IN_PROGRESS when all annotations are deleted - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - project = ls.projects.create( - title='Test Project - Task In Progress', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - - tasks = list(ls.tasks.list(project=project.id)) - task_id = tasks[0].id - - # Submit and verify completion - annotation = ls.annotations.create( - id=task_id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - assert_task_state(task_id, TaskStateChoices.COMPLETED) - - # Delete annotation - ls.annotations.delete(id=annotation.id) - - # Verify task in progress - assert_task_state(task_id, TaskStateChoices.IN_PROGRESS) - - def test_task_re_completion_workflow(self, django_live_url, business_client): - """ - Annotation submitted on IN_PROGRESS task -> Task IN_PROGRESS -> COMPLETED - - Validates: - - Task can transition back to COMPLETED after being IN_PROGRESS - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - project = ls.projects.create( - title='Test Project - Task Re-completion', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - - tasks = list(ls.tasks.list(project=project.id)) - task_id = tasks[0].id - - # Submit, delete, verify IN_PROGRESS - annotation1 = ls.annotations.create( - id=task_id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - ls.annotations.delete(id=annotation1.id) - assert_task_state(task_id, TaskStateChoices.IN_PROGRESS) - - # Re-submit annotation - ls.annotations.create( - id=task_id, - result=[{'value': {'choices': ['negative']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - - # Verify task completed again - assert_task_state(task_id, TaskStateChoices.COMPLETED) - - -class TestAnnotationWorkflows: - """Test annotation FSM state tracking through realistic workflows""" - - def test_annotation_submission_workflow(self, django_live_url, business_client): - """ - User submits annotation -> Annotation state = SUBMITTED - - Validates: - - Annotation is created with SUBMITTED state - - FSM captures annotation creation - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - project = ls.projects.create( - title='Test Project - Annotation Submission', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - - tasks = list(ls.tasks.list(project=project.id)) - - # Submit annotation - annotation = ls.annotations.create( - id=tasks[0].id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - - # Verify annotation state - assert_annotation_state(annotation.id, AnnotationStateChoices.SUBMITTED) - - # Verify FSM state record count - annotation_obj = Annotation.objects.get(pk=annotation.id) - state_count = StateManager.get_state_history(annotation_obj).count() - assert state_count == 1, f'Expected 1 state record, got {state_count}' - - def test_annotation_update_workflow(self, django_live_url, business_client): - """ - User updates annotation -> New state record (still SUBMITTED) - - Validates: - - Annotation update creates new FSM state record - - State remains SUBMITTED - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - project = ls.projects.create( - title='Test Project - Annotation Update', - label_config='', - ) - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - - tasks = list(ls.tasks.list(project=project.id)) - - # Submit annotation - annotation = ls.annotations.create( - id=tasks[0].id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - assert_annotation_state(annotation.id, AnnotationStateChoices.SUBMITTED) - - # Update annotation - ls.annotations.update( - id=annotation.id, - result=[{'value': {'choices': ['negative']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - ) - - # Verify state still SUBMITTED but new state record created - assert_annotation_state(annotation.id, AnnotationStateChoices.SUBMITTED) - annotation_obj = Annotation.objects.get(pk=annotation.id) - state_count = StateManager.get_state_history(annotation_obj).count() - assert state_count == 2, f'Expected 2 state records, got {state_count}' - - -class TestEndToEndWorkflows: - """Test complete end-to-end workflows""" - - def test_complete_annotation_journey(self, django_live_url, business_client): - """ - Complete workflow: - 1. Create project -> Project CREATED - 2. Import 2 tasks -> Tasks CREATED - 3. Submit annotation on task1 -> Task1 COMPLETED, Project IN_PROGRESS - 4. Submit annotation on task2 -> Task2 COMPLETED, Project COMPLETED - 5. Delete annotation from task1 -> Task1 IN_PROGRESS, Project IN_PROGRESS - 6. Re-submit annotation on task1 -> Task1 COMPLETED, Project COMPLETED - - Validates the complete FSM state flow for a typical annotation journey. - """ - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - # Step 1: Create project - project = ls.projects.create( - title='Test Project - Complete Journey', - label_config='', - ) - assert_project_state(project.id, ProjectStateChoices.CREATED) - - # Step 2: Create 2 tasks - ls.tasks.create(project=project.id, data={'text': 'Task 1'}) - ls.tasks.create(project=project.id, data={'text': 'Task 2'}) - tasks = list(ls.tasks.list(project=project.id)) - assert len(tasks) == 2 - task1_id = tasks[0].id - task2_id = tasks[1].id - assert_task_state(task1_id, TaskStateChoices.CREATED) - assert_task_state(task2_id, TaskStateChoices.CREATED) - - # Step 3: Submit annotation on task1 - annotation1 = ls.annotations.create( - id=task1_id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - assert_task_state(task1_id, TaskStateChoices.COMPLETED) - assert_project_state(project.id, ProjectStateChoices.IN_PROGRESS) - assert_annotation_state(annotation1.id, AnnotationStateChoices.SUBMITTED) - - # Step 4: Submit annotation on task2 - annotation2 = ls.annotations.create( - id=task2_id, - result=[{'value': {'choices': ['negative']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - assert_task_state(task2_id, TaskStateChoices.COMPLETED) - assert_project_state(project.id, ProjectStateChoices.COMPLETED) - assert_annotation_state(annotation2.id, AnnotationStateChoices.SUBMITTED) - - # Step 5: Delete annotation from task1 - ls.annotations.delete(id=annotation1.id) - assert_task_state(task1_id, TaskStateChoices.IN_PROGRESS) - assert_project_state(project.id, ProjectStateChoices.IN_PROGRESS) - - # Step 6: Re-submit annotation on task1 - annotation3 = ls.annotations.create( - id=task1_id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=5.0, - ) - assert_task_state(task1_id, TaskStateChoices.COMPLETED) - assert_project_state(project.id, ProjectStateChoices.COMPLETED) - assert_annotation_state(annotation3.id, AnnotationStateChoices.SUBMITTED) - - -class TestColdStartScenarios: - """ - Test FSM behavior when entities exist without state records. - - These tests simulate "cold start" scenarios that occur when: - 1. FSM is deployed to production with pre-existing data - 2. Entities exist in the database but have no FSM state records - 3. First FSM interaction must properly initialize states - """ - - @pytest.fixture(autouse=True) - def setup_context(self, business_client): - """Ensure CurrentContext has user set for FSM operations""" - from core.current_request import CurrentContext - - # Set the user from business_client to CurrentContext - user = business_client.user - CurrentContext.set_user(user) - if hasattr(user, 'active_organization') and user.active_organization: - CurrentContext.set_organization_id(user.active_organization.id) - - yield - - # Cleanup - CurrentContext.clear() - - def test_annotation_deletion_on_task_without_state(self, django_live_url, business_client, configured_project): - """ - Test: Annotation deletion on task that has no FSM state record. - - Steps: - 1. Create task directly (bypassing FSM auto-transitions) - 2. Add annotation directly (bypassing FSM) - 3. Delete annotation via SDK - 4. Verify states are initialized and updated correctly - """ - from fsm.state_choices import TaskStateChoices - from fsm.state_models import TaskState - from tasks.models import Annotation, Task - - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - # Step 1: Create task directly without FSM state - task = Task(data={'text': 'Test cold start'}, project=configured_project) - task.save(skip_fsm=True) - - # Verify no state exists - assert TaskState.objects.filter(task=task).count() == 0 - - # Step 2: Create annotation directly - annotation = Annotation( - task=task, - project=configured_project, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - ) - annotation.save(skip_fsm=True) - task.is_labeled = True - task.save(skip_fsm=True) - - # Step 3: Delete annotation via SDK (this triggers FSM logic) - ls.annotations.delete(id=annotation.id) - - # Step 4: Verify task state was initialized and updated - task.refresh_from_db() - assert not task.is_labeled # Annotation was deleted - - # Task state should now exist and be IN_PROGRESS - task_states = TaskState.objects.filter(task=task).order_by('-id') - assert task_states.count() >= 1 # At least one state record created - latest_state = task_states.first() - assert latest_state.state in [TaskStateChoices.IN_PROGRESS, TaskStateChoices.CREATED] - - def test_annotation_submission_on_task_without_state(self, django_live_url, business_client, configured_project): - """ - Test: Annotation submission on task that has no FSM state record. - - Steps: - 1. Create task directly (bypassing FSM) - 2. Submit annotation via SDK - 3. Verify task and project states are initialized correctly - """ - from fsm.state_choices import ProjectStateChoices, TaskStateChoices - from fsm.state_models import ProjectState, TaskState - from tasks.models import Task - - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - - # Step 1: Create task without FSM state - task = Task(data={'text': 'Cold start annotation test'}, project=configured_project) - task.save(skip_fsm=True) - - # Verify no states exist - assert TaskState.objects.filter(task=task).count() == 0 - - # Delete any project states that might exist - ProjectState.objects.filter(project=configured_project).delete() - - # Step 2: Submit annotation via SDK - ls.annotations.create( - id=task.id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=1.0, - ) - - # Step 3: Verify states initialized - task_states = TaskState.objects.filter(task=task).order_by('-id') - assert task_states.count() >= 1 - latest_task_state = task_states.first() - assert latest_task_state.state == TaskStateChoices.COMPLETED - - project_states = ProjectState.objects.filter(project=configured_project).order_by('-id') - assert project_states.count() >= 1 - latest_project_state = project_states.first() - assert latest_project_state.state in [ProjectStateChoices.IN_PROGRESS, ProjectStateChoices.COMPLETED] - - def test_project_state_update_with_mixed_task_states(self, django_live_url, business_client, configured_project): - """ - Test: Project state update when some tasks have states and some don't. - - Steps: - 1. Create multiple tasks without FSM states - 2. Update project state via annotation submission - 3. Verify all task states are initialized - 4. Verify project state is correct - """ - from fsm.state_choices import ProjectStateChoices, TaskStateChoices - from fsm.state_manager import get_state_manager - from fsm.state_models import TaskState - from tasks.models import Task - - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - StateManager = get_state_manager() - - # Step 1: Create two tasks without FSM states - task1 = Task(data={'text': 'Task 1'}, project=configured_project) - task1.save(skip_fsm=True) - - task2 = Task(data={'text': 'Task 2'}, project=configured_project) - task2.save(skip_fsm=True) - - # Verify no tasks have states initially - assert not TaskState.objects.filter(task=task1).exists() - assert not TaskState.objects.filter(task=task2).exists() - - # Step 2: Submit annotation on first task only via SDK - ls.annotations.create( - id=task1.id, - result=[{'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'}], - lead_time=1.0, - ) - - # Step 3: Verify both tasks now have states - # task1 should have COMPLETED state (annotation submitted) - task1_state = StateManager.get_current_state_value(task1) - assert task1_state == TaskStateChoices.COMPLETED - - # task2 should also have been initialized during project state calculation - task2_state = StateManager.get_current_state_value(task2) - assert task2_state in [ - TaskStateChoices.CREATED, - TaskStateChoices.IN_PROGRESS, - None, - ] # May or may not be initialized yet - - # Step 4: Verify project state is correct (IN_PROGRESS - some tasks completed) - project_state = StateManager.get_current_state_value(configured_project) - assert project_state == ProjectStateChoices.IN_PROGRESS - - def test_bulk_task_processing_cold_start(self, django_live_url, business_client): - """ - Test: Bulk processing of tasks when none have FSM states. - - Steps: - 1. Create a new project with multiple tasks without FSM states - 2. Submit annotations on all tasks via SDK - 3. Verify states are correctly initialized for all - 4. Verify project transitions correctly through states - """ - from fsm.state_choices import ProjectStateChoices, TaskStateChoices - from fsm.state_manager import get_state_manager - from fsm.state_models import TaskState - from projects.models import Project - from tasks.models import Task - - ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) - StateManager = get_state_manager() - - # Create a new project with FSM - project = Project( - title='Bulk Cold Start Test', - label_config='', - created_by=business_client.user, - ) - project.save() - - # Step 1: Create 3 tasks without FSM states - tasks = [] - for i in range(3): - task = Task(data={'text': f'Bulk task {i}'}, project=project) - task.save(skip_fsm=True) - tasks.append(task) - assert not TaskState.objects.filter(task=task).exists() - - # Step 2: Submit annotations on all tasks via SDK - for task in tasks: - ls.annotations.create( - id=task.id, - result=[ - {'value': {'choices': ['positive']}, 'from_name': 'label', 'to_name': 'text', 'type': 'choices'} - ], - lead_time=1.0, - ) - - # Step 3: Verify all tasks have correct states - for task in tasks: - task_state = StateManager.get_current_state_value(task) - assert task_state == TaskStateChoices.COMPLETED - - # Step 4: Verify project is COMPLETED (all tasks completed) - project_state = StateManager.get_current_state_value(project) - assert project_state == ProjectStateChoices.COMPLETED