diff --git a/label_studio/core/all_urls.json b/label_studio/core/all_urls.json index de24d53cc73d..7c5079331fb8 100644 --- a/label_studio/core/all_urls.json +++ b/label_studio/core/all_urls.json @@ -215,6 +215,12 @@ "name": "projects:api:project-model-versions", "decorators": "" }, + { + "url": "/api/projects//annotators/", + "module": "projects.api.ProjectAnnotatorsAPI", + "name": "projects:api:project-annotators", + "decorators": "" + }, { "url": "/api/templates/", "module": "projects.api.TemplateListAPI", diff --git a/label_studio/data_manager/migrations/0016_migrate_agreement_selected_annotators_to_unique.py b/label_studio/data_manager/migrations/0016_migrate_agreement_selected_annotators_to_unique.py new file mode 100644 index 000000000000..0a8906893d74 --- /dev/null +++ b/label_studio/data_manager/migrations/0016_migrate_agreement_selected_annotators_to_unique.py @@ -0,0 +1,100 @@ +from django.db import migrations +from copy import deepcopy +from django.apps import apps as django_apps +from core.models import AsyncMigrationStatus +from core.redis import start_job_async_or_sync +import logging + +migration_name = '0016_migrate_agreement_selected_annotators_to_unique' + +logger = logging.getLogger(__name__) + + +def forward_migration(): + migration, created = AsyncMigrationStatus.objects.get_or_create( + name=migration_name, + defaults={'status': AsyncMigrationStatus.STATUS_STARTED} + ) + if not created: + return # already in progress or done + + # Look up models at runtime inside the worker process + View = django_apps.get_model('data_manager', 'View') + Annotation = django_apps.get_model('tasks', 'Annotation') + + # Cache unique annotators per project_id to avoid repetitive queries + project_to_unique_annotators = {} + + # Iterate using values() to avoid loading full model instances + # Fetch only the fields we need + qs = View.objects.all().values('id', 'project_id', 'data') + + updated = 0 + for row in qs: + view_id = row['id'] + project_id = row['project_id'] + data = row.get('data') or {} + + agreement = data.get('agreement_selected') + if not isinstance(agreement, dict): + continue + + # Only migrate views that actually have annotators key present + existing_annotators = agreement.get('annotators', None) + if existing_annotators is None: + continue + + # Compute unique annotators for this project (once per project) + if project_id not in project_to_unique_annotators: + unique_ids = set( + Annotation.objects + .filter(project_id=project_id, completed_by_id__isnull=False) + .values_list('completed_by_id', flat=True) + .distinct() + ) + # Normalize to unique ints + project_to_unique_annotators[project_id] = unique_ids + + new_annotators = project_to_unique_annotators[project_id] + + # If no change, skip update + old_set = {int(a) for a in (existing_annotators or [])} + if new_annotators == old_set: + continue + + new_data = deepcopy(data) + new_data['agreement_selected']['annotators'] = list(new_annotators) + + # Update only the JSON field via update(); do not load model instance or call save() + View.objects.filter(id=view_id).update(data=new_data) + logger.info(f'Updated View {view_id} agreement selected annotators to {list(new_annotators)}') + logger.info(f'Old annotator length: {len(old_set)}, new annotator length: {len(new_annotators)}') + updated += 1 + + if updated: + logger.info(f'{migration_name} Updated {updated} View rows') + + migration.status = AsyncMigrationStatus.STATUS_FINISHED + migration.save(update_fields=['status']) + +def forwards(apps, schema_editor): + start_job_async_or_sync(forward_migration, queue_name='low') + + +def backwards(apps, schema_editor): + # Irreversible: we cannot reconstruct the previous annotator lists safely + pass + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ('data_manager', '0015_alter_view_options') + ] + + operations = [ + migrations.RunPython(forwards, backwards), + ] + + diff --git a/label_studio/projects/api.py b/label_studio/projects/api.py index 1b9cbff9ae54..62b4d60a78a6 100644 --- a/label_studio/projects/api.py +++ b/label_studio/projects/api.py @@ -49,13 +49,15 @@ from rest_framework.response import Response from rest_framework.settings import api_settings from rest_framework.views import exception_handler -from tasks.models import Task +from tasks.models import Annotation, Task from tasks.serializers import ( NextTaskSerializer, TaskSerializer, TaskSimpleSerializer, TaskWithAnnotationsAndPredictionsAndDraftsSerializer, ) +from users.models import User +from users.serializers import UserSimpleSerializer from webhooks.models import WebhookAction from webhooks.utils import api_webhook, api_webhook_for_delete, emit_webhooks_for_instance @@ -920,3 +922,38 @@ def delete(self, request, *args, **kwargs): count = project.delete_predictions(model_version=model_version) return Response(data=count) + + +@method_decorator( + name='get', + decorator=extend_schema( + tags=['Projects'], + summary='List unique annotators for project', + description='Return unique users who have submitted annotations in the specified project.', + responses={ + 200: OpenApiResponse( + description='List of annotator users', + response=UserSimpleSerializer(many=True), + ) + }, + extensions={ + 'x-fern-sdk-group-name': 'projects', + 'x-fern-sdk-method-name': 'list_unique_annotators', + 'x-fern-audiences': ['public'], + }, + ), +) +class ProjectAnnotatorsAPI(generics.RetrieveAPIView): + permission_required = all_permissions.projects_view + queryset = Project.objects.all() + + def get(self, request, *args, **kwargs): + project = self.get_object() + annotator_ids = list( + Annotation.objects.filter(project=project, completed_by_id__isnull=False) + .values_list('completed_by_id', flat=True) + .distinct() + ) + users = User.objects.filter(id__in=annotator_ids).prefetch_related('om_through').order_by('id') + data = UserSimpleSerializer(users, many=True, context={'request': request}).data + return Response(data) diff --git a/label_studio/projects/urls.py b/label_studio/projects/urls.py index 0db84b70ee4d..fb4c1fe0c83a 100644 --- a/label_studio/projects/urls.py +++ b/label_studio/projects/urls.py @@ -45,6 +45,8 @@ path('/sample-task/', api.ProjectSampleTask.as_view(), name='project-sample-task'), # List available model versions path('/model-versions/', api.ProjectModelVersions.as_view(), name='project-model-versions'), + # List all annotators for project + path('/annotators/', api.ProjectAnnotatorsAPI.as_view(), name='project-annotators'), ] _api_urlpatterns_templates = [ diff --git a/label_studio/tests/sdk/test_project_annotators_api.py b/label_studio/tests/sdk/test_project_annotators_api.py new file mode 100644 index 000000000000..6cd48f3ae2ee --- /dev/null +++ b/label_studio/tests/sdk/test_project_annotators_api.py @@ -0,0 +1,45 @@ +import pytest +from label_studio_sdk.client import LabelStudio +from tasks.models import Annotation + +from label_studio.tests.sdk.common import LABEL_CONFIG_AND_TASKS + +pytestmark = pytest.mark.django_db + + +def test_project_annotators_sdk(django_live_url, business_client): + ls = LabelStudio(base_url=django_live_url, api_key=business_client.api_key) + + # Create project via SDK + proj = ls.projects.create(title='Annotators Project', label_config=LABEL_CONFIG_AND_TASKS['label_config']) + + # Import two tasks + ls.projects.import_tasks( + id=proj.id, + request=[ + {'data': {'my_text': 't1'}}, + {'data': {'my_text': 't2'}}, + ], + ) + + # Get created tasks + tasks = list(ls.tasks.list(project=proj.id)) + + # Create two users via SDK + u2 = ls.users.create(email='a2@example.com', username='annotator2', first_name='A', last_name='Two') + u3 = ls.users.create(email='a3@example.com', username='annotator3', first_name='A', last_name='Three') + + # Add annotations directly (SDK doesn't expose annotation create easily with arbitrary user) + # Use ORM for completed_by set to the two users + Annotation.objects.create( + task_id=tasks[0].id, project_id=proj.id, completed_by_id=business_client.user.id, result=[{'r': 1}] + ) + Annotation.objects.create(task_id=tasks[1].id, project_id=proj.id, completed_by_id=u2.id, result=[{'r': 2}]) + + # Call annotators API via SDK wrapper + resp = list(ls.projects.list_unique_annotators(id=proj.id)) + + returned_ids = [u.id for u in resp] + assert sorted(returned_ids) == sorted([business_client.user.id, u2.id]) + assert returned_ids == sorted(returned_ids) + assert u3.id not in returned_ids # no annotations created for this user diff --git a/poetry.lock b/poetry.lock index 2f388c8a415d..e30770c797d0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2152,7 +2152,7 @@ optional = false python-versions = ">=3.9,<4" groups = ["main"] files = [ - {file = "7a4d5c352e428b059d7111086e93f108e47cc630.zip", hash = "sha256:fdc96110b7f7ab8553562ad0c4b07ea02e4c38ad69e79d863c64e6fda81e8319"}, + {file = "48b3458394e45c6e78997795c03a2be3adb2d533.zip", hash = "sha256:49481d354e72133fd78544a83aea947e683315e193f15e1826f6a57fa2288b7d"}, ] [package.dependencies] @@ -2180,7 +2180,7 @@ xmljson = "0.2.1" [package.source] type = "url" -url = "https://github.com/HumanSignal/label-studio-sdk/archive/7a4d5c352e428b059d7111086e93f108e47cc630.zip" +url = "https://github.com/HumanSignal/label-studio-sdk/archive/48b3458394e45c6e78997795c03a2be3adb2d533.zip" [[package]] name = "launchdarkly-server-sdk" @@ -5126,4 +5126,4 @@ uwsgi = ["pyuwsgi", "uwsgitop"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4" -content-hash = "8e4c56e5d1e9d3a45d50411209c5c9340f32266603e072a11e312f93a6a2564e" +content-hash = "2fca5b38d57892103ce9ee8476e0fc0a338ec351bdde5404e3dc3e8e91c7b4d9" diff --git a/pyproject.toml b/pyproject.toml index 9c95b75d1258..4fb9333ddae1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,7 @@ dependencies = [ "tldextract (>=5.1.3)", "uuid-utils (>=0.11.0,<1.0.0)", ## HumanSignal repo dependencies :start - "label-studio-sdk @ https://github.com/HumanSignal/label-studio-sdk/archive/7a4d5c352e428b059d7111086e93f108e47cc630.zip", + "label-studio-sdk @ https://github.com/HumanSignal/label-studio-sdk/archive/48b3458394e45c6e78997795c03a2be3adb2d533.zip", ## HumanSignal repo dependencies :end ]