From 77ce44374868acf255342ec42ab3adae6ebf5477 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 18 Mar 2025 09:47:39 +0100 Subject: [PATCH 001/103] Move common logic into a separate app --- cvat/apps/engine/rq.py | 132 +++---------- cvat/apps/engine/serializers.py | 15 +- cvat/apps/engine/urls.py | 1 - cvat/apps/engine/views.py | 213 --------------------- cvat/apps/redis_handler/background.py | 126 +++++++++++++ cvat/apps/redis_handler/rq.py | 81 ++++++++ cvat/apps/redis_handler/urls.py | 15 ++ cvat/apps/redis_handler/views.py | 256 ++++++++++++++++++++++++++ cvat/settings/base.py | 10 + cvat/urls.py | 1 + 10 files changed, 523 insertions(+), 327 deletions(-) create mode 100644 cvat/apps/redis_handler/background.py create mode 100644 cvat/apps/redis_handler/rq.py create mode 100644 cvat/apps/redis_handler/urls.py create mode 100644 cvat/apps/redis_handler/views.py diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 246e79a0f468..6e37dded0cc9 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -48,6 +48,7 @@ class RequestField: STATUS = "status" PROGRESS = "progress" TASK_PROGRESS = "task_progress" + HIDDEN = "hidden" # export specific fields RESULT_URL = "result_url" RESULT = "result" @@ -206,6 +207,8 @@ def request(self): task_id: int | None = ImmutableRQMetaAttribute(RQJobMetaField.TASK_ID, optional=True) job_id: int | None = ImmutableRQMetaAttribute(RQJobMetaField.JOB_ID, optional=True) + hidden: bool | None = ImmutableRQMetaAttribute(RQJobMetaField.HIDDEN, optional=True) + # mutable && optional fields progress: float | None = MutableRQMetaAttribute( RQJobMetaField.PROGRESS, validator=lambda x: isinstance(x, float), optional=True @@ -227,6 +230,7 @@ def build( *, request: ExtendedRequest, db_obj: Model | None, + hidden: bool | None = None, ): # to prevent circular import from cvat.apps.events.handlers import job_id, organization_slug, task_id @@ -255,6 +259,7 @@ def build( RQJobMetaField.PROJECT_ID: pid, RQJobMetaField.TASK_ID: tid, RQJobMetaField.JOB_ID: jid, + **({RQJobMetaField.HIDDEN: hidden} if hidden is not None else {}), } @@ -315,42 +320,30 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: return BaseRQMeta.for_job(rq_job).user.id == user_id -@attrs.frozen() -class RQId: - action: RequestAction = attrs.field(validator=attrs.validators.instance_of(RequestAction)) - target: RequestTarget = attrs.field(validator=attrs.validators.instance_of(RequestTarget)) - identifier: Union[int, UUID] = attrs.field(validator=attrs.validators.instance_of((int, UUID))) - subresource: Optional[RequestSubresource] = attrs.field( - validator=attrs.validators.optional(attrs.validators.instance_of(RequestSubresource)), - kw_only=True, - default=None, - ) - user_id: Optional[int] = attrs.field( - validator=attrs.validators.optional(attrs.validators.instance_of(int)), - kw_only=True, - default=None, - ) - format: Optional[str] = attrs.field( - validator=attrs.validators.optional(attrs.validators.instance_of(str)), - kw_only=True, - default=None, - ) +# TODO: +from cvat.apps.redis_handler.rq import RQId + + +class ExportRQId(RQId): + pass - _OPTIONAL_FIELD_REQUIREMENTS = { - RequestAction.AUTOANNOTATE: {"subresource": False, "format": False, "user_id": False}, - RequestAction.CREATE: {"subresource": False, "format": False, "user_id": False}, - RequestAction.EXPORT: {"subresource": True, "user_id": True}, - RequestAction.IMPORT: {"subresource": True, "format": False, "user_id": False}, - } - - def __attrs_post_init__(self) -> None: - for field, req in self._OPTIONAL_FIELD_REQUIREMENTS[self.action].items(): - if req: - if getattr(self, field) is None: - raise ValueError(f"{field} is required for the {self.action} action") - else: - if getattr(self, field) is not None: - raise ValueError(f"{field} is not allowed for the {self.action} action") + # TODO: format, user_id, subresource + + # subresource: Optional[RequestSubresource] = attrs.field( + # validator=attrs.validators.optional(attrs.validators.instance_of(RequestSubresource)), + # kw_only=True, + # default=None, + # ) + # user_id: Optional[int] = attrs.field( + # validator=attrs.validators.optional(attrs.validators.instance_of(int)), + # kw_only=True, + # default=None, + # ) + # format: Optional[str] = attrs.field( + # validator=attrs.validators.optional(attrs.validators.instance_of(str)), + # kw_only=True, + # default=None, + # ) # RQ ID templates: # autoannotate:task- @@ -359,75 +352,6 @@ def __attrs_post_init__(self) -> None: # export:---in--format-by- # export:--backup-by- - def render( - self, - ) -> str: - common_prefix = f"{self.action}:{self.target}-{self.identifier}" - - if RequestAction.IMPORT == self.action: - return f"{common_prefix}-{self.subresource}" - elif RequestAction.EXPORT == self.action: - if self.format is None: - return f"{common_prefix}-{self.subresource}-by-{self.user_id}" - - format_to_be_used_in_urls = self.format.replace(" ", "_").replace(".", "@") - return f"{common_prefix}-{self.subresource}-in-{format_to_be_used_in_urls}-format-by-{self.user_id}" - elif self.action in {RequestAction.CREATE, RequestAction.AUTOANNOTATE}: - return common_prefix - else: - assert False, f"Unsupported action {self.action!r} was found" - - @staticmethod - def parse(rq_id: str) -> RQId: - identifier: Optional[Union[UUID, int]] = None - subresource: Optional[RequestSubresource] = None - user_id: Optional[int] = None - anno_format: Optional[str] = None - - try: - action_and_resource, unparsed = rq_id.split("-", maxsplit=1) - action_str, target_str = action_and_resource.split(":") - action = RequestAction(action_str) - target = RequestTarget(target_str) - - if action in {RequestAction.CREATE, RequestAction.AUTOANNOTATE}: - identifier = unparsed - elif RequestAction.IMPORT == action: - identifier, subresource_str = unparsed.rsplit("-", maxsplit=1) - subresource = RequestSubresource(subresource_str) - else: # action == export - identifier, subresource_str, unparsed = unparsed.split("-", maxsplit=2) - subresource = RequestSubresource(subresource_str) - - if RequestSubresource.BACKUP == subresource: - _, user_id = unparsed.split("-") - else: - unparsed, _, user_id = unparsed.rsplit("-", maxsplit=2) - # remove prefix(in-), suffix(-format) and restore original format name - # by replacing special symbols: "_" -> " ", "@" -> "." - anno_format = unparsed[3:-7].replace("_", " ").replace("@", ".") - - if identifier is not None: - if identifier.isdigit(): - identifier = int(identifier) - else: - identifier = UUID(identifier) - - if user_id is not None: - user_id = int(user_id) - - return RQId( - action=action, - target=target, - identifier=identifier, - subresource=subresource, - user_id=user_id, - format=anno_format, - ) - - except Exception as ex: - raise ValueError(f"The {rq_id!r} RQ ID cannot be parsed: {str(ex)}") from ex - def define_dependent_job( queue: DjangoRQ, diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 3c9960a384be..eb7d4d2307dd 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -41,7 +41,7 @@ from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.model_utils import bulk_create from cvat.apps.engine.permissions import TaskPermission -from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta, RequestAction, RQId +from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta, RequestAction from cvat.apps.engine.task_validation import HoneypotFrameSelector from cvat.apps.engine.utils import ( CvatChunkTimestampMismatchError, @@ -55,6 +55,7 @@ take_by, ) from cvat.apps.lambda_manager.rq import LambdaRQMeta +from cvat.apps.redis_handler.rq import RQId from utils.dataset_manifest import ImageManifestManager slogger = ServerLogManager(__name__) @@ -3511,12 +3512,8 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: base_rq_job_meta = BaseRQMeta.for_job(rq_job) representation = { - "type": ":".join( - [ - parsed_rq_id.action, - parsed_rq_id.subresource or parsed_rq_id.target, - ] - ), + # TODO: update to action + subresource in export jobs + "type": parsed_rq_id.type, "target": parsed_rq_id.target, "project_id": base_rq_job_meta.project_id, "task_id": base_rq_job_meta.task_id, @@ -3525,7 +3522,7 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: if parsed_rq_id.action == RequestAction.AUTOANNOTATE: representation["function_id"] = LambdaRQMeta.for_job(rq_job).function_id elif parsed_rq_id.action in (RequestAction.IMPORT, RequestAction.EXPORT): - representation["format"] = parsed_rq_id.format + representation["format"] = parsed_rq_id.extra["format"] # todo: refactor return representation @@ -3610,7 +3607,7 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: if ( rq_job.parsed_rq_id.action == models.RequestAction.IMPORT - and rq_job.parsed_rq_id.subresource == models.RequestSubresource.BACKUP + and rq_job.parsed_rq_id.extra["subresource"] == models.RequestSubresource.BACKUP ): representation["result_id"] = rq_job.return_value() diff --git a/cvat/apps/engine/urls.py b/cvat/apps/engine/urls.py index 95888f23e2ef..7c0408b6358e 100644 --- a/cvat/apps/engine/urls.py +++ b/cvat/apps/engine/urls.py @@ -23,7 +23,6 @@ router.register('cloudstorages', views.CloudStorageViewSet) router.register('assets', views.AssetsViewSet) router.register('guides', views.AnnotationGuidesViewSet) -router.register('requests', views.RequestViewSet, basename="request") urlpatterns = [ # Entry point for a client diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 308bfabb2ab3..c7e5002170c6 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -3625,216 +3625,3 @@ def _import_project_dataset( serializer.is_valid(raise_exception=True) return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - -@extend_schema(tags=['requests']) -@extend_schema_view( - list=extend_schema( - summary='List requests', - responses={ - '200': RequestSerializer(many=True), - } - ), - retrieve=extend_schema( - summary='Get request details', - responses={ - '200': RequestSerializer, - } - ), -) -class RequestViewSet(viewsets.GenericViewSet): - # FUTURE-TODO: support re-enqueue action - # FUTURE-TODO: implement endpoint to download result file - SUPPORTED_QUEUES = ( - settings.CVAT_QUEUES.IMPORT_DATA.value, - settings.CVAT_QUEUES.EXPORT_DATA.value, - ) - - serializer_class = RequestSerializer - iam_organization_field = None - filter_backends = [ - NonModelSimpleFilter, - NonModelJsonLogicFilter, - NonModelOrderingFilter, - ] - - ordering_fields = ['created_date', 'status', 'action'] - ordering = '-created_date' - - filter_fields = [ - # RQ job fields - 'status', - # derivatives fields (from meta) - 'project_id', - 'task_id', - 'job_id', - # derivatives fields (from parsed rq_id) - 'action', - 'target', - 'subresource', - 'format', - ] - - simple_filters = filter_fields + ['org'] - - lookup_fields = { - 'created_date': 'created_at', - 'action': 'parsed_rq_id.action', - 'target': 'parsed_rq_id.target', - 'subresource': 'parsed_rq_id.subresource', - 'format': 'parsed_rq_id.format', - 'status': 'get_status', - 'project_id': 'meta.project_id', - 'task_id': 'meta.task_id', - 'job_id': 'meta.job_id', - 'org': 'meta.org_slug', - } - - SchemaField = namedtuple('SchemaField', ['type', 'choices'], defaults=(None,)) - - simple_filters_schema = { - 'status': SchemaField('string', RequestStatus.choices), - 'project_id': SchemaField('integer'), - 'task_id': SchemaField('integer'), - 'job_id': SchemaField('integer'), - 'action': SchemaField('string', RequestAction.choices), - 'target': SchemaField('string', RequestTarget.choices), - 'subresource': SchemaField('string', RequestSubresource.choices), - 'format': SchemaField('string'), - 'org': SchemaField('string'), - } - - def get_queryset(self): - return None - - @property - def queues(self) -> Iterable[DjangoRQ]: - return (django_rq.get_queue(queue_name) for queue_name in self.SUPPORTED_QUEUES) - - def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: - job_ids = set(queue.get_job_ids() + - queue.started_job_registry.get_job_ids() + - queue.finished_job_registry.get_job_ids() + - queue.failed_job_registry.get_job_ids() + - queue.deferred_job_registry.get_job_ids() - ) - jobs = [] - for job in queue.job_class.fetch_many(job_ids, queue.connection): - if job and is_rq_job_owner(job, user_id): - try: - parsed_rq_id = RQId.parse(job.id) - except Exception: # nosec B112 - continue - job.parsed_rq_id = parsed_rq_id - jobs.append(job) - - return jobs - - - def _get_rq_jobs(self, user_id: int) -> list[RQJob]: - """ - Get all RQ jobs for a specific user and return them as a list of RQJob objects. - - Parameters: - user_id (int): The ID of the user for whom to retrieve jobs. - - Returns: - List[RQJob]: A list of RQJob objects representing all jobs for the specified user. - """ - all_jobs = [] - for queue in self.queues: - jobs = self._get_rq_jobs_from_queue(queue, user_id) - all_jobs.extend(jobs) - - return all_jobs - - def _get_rq_job_by_id(self, rq_id: str) -> Optional[RQJob]: - """ - Get a RQJob by its ID from the queues. - - Args: - rq_id (str): The ID of the RQJob to retrieve. - - Returns: - Optional[RQJob]: The retrieved RQJob, or None if not found. - """ - try: - parsed_rq_id = RQId.parse(rq_id) - except Exception: - return None - - job: Optional[RQJob] = None - - for queue in self.queues: - job = queue.fetch_job(rq_id) - if job: - job.parsed_rq_id = parsed_rq_id - break - - return job - - def _handle_redis_exceptions(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except RedisConnectionError as ex: - msg = 'Redis service is not available' - slogger.glob.exception(f'{msg}: {str(ex)}') - return Response(msg, status=status.HTTP_503_SERVICE_UNAVAILABLE) - return wrapper - - @method_decorator(never_cache) - @_handle_redis_exceptions - def retrieve(self, request: ExtendedRequest, pk: str): - job = self._get_rq_job_by_id(pk) - - if not job: - return HttpResponseNotFound("There is no request with specified id") - - self.check_object_permissions(request, job) - - serializer = self.get_serializer(job, context={'request': request}) - return Response(data=serializer.data, status=status.HTTP_200_OK) - - @method_decorator(never_cache) - @_handle_redis_exceptions - def list(self, request: ExtendedRequest): - user_id = request.user.id - user_jobs = self._get_rq_jobs(user_id) - - filtered_jobs = self.filter_queryset(user_jobs) - - page = self.paginate_queryset(filtered_jobs) - if page is not None: - serializer = self.get_serializer(page, many=True, context={'request': request}) - return self.get_paginated_response(serializer.data) - - serializer = self.get_serializer(filtered_jobs, many=True, context={'request': request}) - return Response(data=serializer.data, status=status.HTTP_200_OK) - - @extend_schema( - summary='Cancel request', - request=None, - responses={ - '200': OpenApiResponse(description='The request has been cancelled'), - }, - ) - @method_decorator(never_cache) - @action(detail=True, methods=['POST'], url_path='cancel') - @_handle_redis_exceptions - def cancel(self, request: ExtendedRequest, pk: str): - rq_job = self._get_rq_job_by_id(pk) - - if not rq_job: - return HttpResponseNotFound("There is no request with specified id") - - self.check_object_permissions(request, rq_job) - - if rq_job.get_status(refresh=False) not in {RQJobStatus.QUEUED, RQJobStatus.DEFERRED}: - return HttpResponseBadRequest("Only requests that have not yet been started can be cancelled") - - # FUTURE-TODO: race condition is possible here - rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) - rq_job.delete() - - return Response(status=status.HTTP_200_OK) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py new file mode 100644 index 000000000000..e90e4bb1e559 --- /dev/null +++ b/cvat/apps/redis_handler/background.py @@ -0,0 +1,126 @@ +# Copyright (C) CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from abc import ABCMeta, abstractmethod +from typing import ClassVar, Optional + +import django_rq +from django_rq.queues import DjangoRQ, DjangoScheduler +from rest_framework import status +from rest_framework.response import Response +from rq.job import Job as RQJob +from rq.job import JobStatus as RQJobStatus + +from cvat.apps.engine.log import ServerLogManager +from cvat.apps.engine.serializers import RqIdSerializer +from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.engine.utils import get_rq_lock_for_job + +slogger = ServerLogManager(__name__) +from django.conf import settings + +from cvat.apps.engine.models import Job, Project, RequestSubresource, RequestTarget, Task + +# TODO: describe here protocol + + +class AbstractRQJobManager(metaclass=ABCMeta): + QUEUE_NAME: ClassVar[str] + SUPPORTED_RESOURCES: ClassVar[set[RequestSubresource]] + + @classmethod + def get_queue(cls) -> DjangoRQ: + return django_rq.get_queue(cls.QUEUE_NAME) + + @classmethod + # @abstractmethod + def validate_rq_id(rq_id: str, /) -> None: ... + + @classmethod + def get_job_by_id(cls, rq_id: str, /, *, validate: bool = True) -> RQJob | None: + if validate: + try: + cls.validate_rq_id(rq_id) + except Exception: + return None + + queue = cls.get_queue() + return queue.fetch_job(rq_id) + + def __init__( + self, + db_instance: Project | Task | Job, + request: ExtendedRequest, + ) -> None: + """ + Args: + db_instance (Union[models.Project, models.Task, models.Job]): Model instance + request (ExtendedRequest): Incoming HTTP request + """ + self.db_instance = db_instance + self.request = request + self.resource = db_instance.__class__.__name__.lower() + if self.resource not in self.SUPPORTED_RESOURCES: + raise ValueError("Unexpected type of db_instance: {}".format(type(db_instance))) + + def handle_existing_rq_job( + self, rq_job: Optional[RQJob], queue: DjangoRQ + ) -> Optional[Response]: + if not rq_job: + return None + + rq_job_status = rq_job.get_status(refresh=False) + + if rq_job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED}: + return Response( + data="Request is being processed", + status=status.HTTP_409_CONFLICT, + ) + + if rq_job_status == RQJobStatus.DEFERRED: + rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) + + if rq_job_status == RQJobStatus.SCHEDULED: + scheduler: DjangoScheduler = django_rq.get_scheduler(queue.name, queue=queue) + # remove the job id from the set with scheduled keys + scheduler.cancel(rq_job) + rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) + + rq_job.delete() + return None + + def validate_request(self) -> Response | None: + """Hook to run some validations before processing a request""" + + def after_processing(self) -> None: + """Hook to run some actions (e.g. collect events) after processing a request""" + + @abstractmethod + def setup_background_job(self, queue: DjangoRQ, rq_id: str) -> None: ... + + @abstractmethod + def build_rq_id(self): ... + + def get_response(self, rq_id: str) -> Response: + serializer = RqIdSerializer({"rq_id": rq_id}) + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + + def process(self) -> Response: + if invalid_response := self.validate_request(): + return invalid_response + + queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) + rq_id = self.build_rq_id() + + # ensure that there is no race condition when processing parallel requests + with get_rq_lock_for_job(queue, rq_id): + rq_job = queue.fetch_job(rq_id) + + if response := self.handle_existing_rq_job(rq_job, queue): + return response + + self.setup_background_job(queue, rq_id) + + self.after_processing() + return self.get_response(rq_id) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py new file mode 100644 index 000000000000..8b8c7e1ab478 --- /dev/null +++ b/cvat/apps/redis_handler/rq.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, ClassVar +from uuid import UUID + +import attrs + +from cvat.apps.engine.types import ExtendedRequest + +if TYPE_CHECKING: + from django.contrib.auth.models import User + +import base64 + +from django.conf import settings + + +def convert_id(value: int | str | UUID) -> int | UUID: + if isinstance(value, (int, UUID)): + return value + + assert isinstance(value, str) + + if value.isnumeric(): + return int(value) + + return UUID(value) + + +@attrs.frozen(kw_only=True) +class RQId: + FIELD_SEP: ClassVar[str] = "&" + KEY_VAL_SEP: ClassVar[str] = "=" + + queue: settings.CVAT_QUEUES = attrs.field(converter=settings.CVAT_QUEUES) + action: str = attrs.field(validator=attrs.validators.instance_of(str)) + target: str = attrs.field(validator=attrs.validators.instance_of(str)) + id: int | UUID = attrs.field( + validator=attrs.validators.instance_of((int, UUID)), + converter=convert_id, + ) + + # todo: dot access + extra: dict | None = attrs.field(default=None) + + @property + def type(self) -> str: + return ":".join([self.action, self.target]) + + def render(self) -> str: + bytes = self.FIELD_SEP.join( + [ + self.KEY_VAL_SEP.join([k, v]) + for k, v in { + "queue": self.queue.value, + "action": self.action, + "target": self.target, + "id": str(self.id), + **(self.extra or {}), + }.items() + ] + ).encode() + + return base64.b64encode(bytes).decode() + + # TODO: handle exceptions + @classmethod + def parse(cls, rq_id: str, /) -> RQId: + decoded_rq_id = base64.b64decode(rq_id).decode() + + keys = set(attrs.fields_dict(cls).keys()) - {"extra"} + params = {} + + for pair in decoded_rq_id.split(RQId.FIELD_SEP): + key, value = pair.split(RQId.KEY_VAL_SEP, maxsplit=1) + if key in keys: + params[key] = value + else: + params.setdefault("extra", {})[key] = value + + return cls(**params) diff --git a/cvat/apps/redis_handler/urls.py b/cvat/apps/redis_handler/urls.py new file mode 100644 index 000000000000..7f742986978c --- /dev/null +++ b/cvat/apps/redis_handler/urls.py @@ -0,0 +1,15 @@ +# Copyright (C) CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from django.urls import include, path +from rest_framework import routers + +from . import views + +router = routers.DefaultRouter(trailing_slash=False) +router.register("requests", views.RequestViewSet, basename="request") + +urlpatterns = [ + path("api/", include(router.urls)), +] diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py new file mode 100644 index 000000000000..1277dd8d392b --- /dev/null +++ b/cvat/apps/redis_handler/views.py @@ -0,0 +1,256 @@ +import functools +from collections import namedtuple +from collections.abc import Iterable +from typing import Optional + +import django_rq +from django.conf import settings +from django.http import HttpResponseBadRequest, HttpResponseNotFound +from django.utils.decorators import method_decorator +from django.views.decorators.cache import never_cache +from django_rq.queues import DjangoRQ +from drf_spectacular.utils import OpenApiResponse, extend_schema, extend_schema_view +from redis.exceptions import ConnectionError as RedisConnectionError +from rest_framework import status, viewsets +from rest_framework.decorators import action +from rest_framework.exceptions import ValidationError +from rest_framework.response import Response +from rq.job import Job as RQJob +from rq.job import JobStatus as RQJobStatus + +from cvat.apps.engine.filters import ( + NonModelJsonLogicFilter, + NonModelOrderingFilter, + NonModelSimpleFilter, +) +from cvat.apps.engine.log import ServerLogManager +from cvat.apps.engine.models import RequestAction, RequestStatus, RequestSubresource, RequestTarget +from cvat.apps.engine.rq import is_rq_job_owner +from cvat.apps.engine.serializers import RequestSerializer +from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.redis_handler.rq import RQId + +slogger = ServerLogManager(__name__) + + +@extend_schema(tags=["requests"]) +@extend_schema_view( + list=extend_schema( + summary="List requests", + responses={ + "200": RequestSerializer(many=True), + }, + ), + retrieve=extend_schema( + summary="Get request details", + responses={ + "200": RequestSerializer, + }, + ), +) +class RequestViewSet(viewsets.GenericViewSet): + # FUTURE-TODO: support re-enqueue action + SUPPORTED_QUEUES = { + queue_name + for queue_name, queue_conf in settings.RQ_QUEUES.items() + if queue_conf.get("VISIBLE_VIA_REQUESTS_API") + } + + serializer_class = RequestSerializer + iam_organization_field = None + filter_backends = [ + NonModelSimpleFilter, + NonModelJsonLogicFilter, + NonModelOrderingFilter, + ] + + ordering_fields = ["created_date", "status", "action"] + ordering = "-created_date" + + # TODO: fix filters + filter_fields = [ + # RQ job fields + "status", + # derivatives fields (from meta) + "project_id", + "task_id", + "job_id", + # derivatives fields (from parsed rq_id) + "action", + "target", + # "subresource", + # "format", + ] + + simple_filters = filter_fields + ["org"] + + lookup_fields = { + "created_date": "created_at", + "action": "parsed_rq_id.action", + "target": "parsed_rq_id.target", + # "subresource": "parsed_rq_id.subresource", + # "format": "parsed_rq_id.format", + "status": "get_status", + "project_id": "meta.project_id", + "task_id": "meta.task_id", + "job_id": "meta.job_id", + "org": "meta.org_slug", + } + + SchemaField = namedtuple("SchemaField", ["type", "choices"], defaults=(None,)) + + simple_filters_schema = { + "status": SchemaField("string", RequestStatus.choices), + "project_id": SchemaField("integer"), + "task_id": SchemaField("integer"), + "job_id": SchemaField("integer"), + "action": SchemaField("string", RequestAction.choices), + "target": SchemaField("string", RequestTarget.choices), + # "subresource": SchemaField("string", RequestSubresource.choices), + # "format": SchemaField("string"), + "org": SchemaField("string"), + } + + def get_queryset(self): + return None + + @property + def queues(self) -> Iterable[DjangoRQ]: + return (django_rq.get_queue(queue_name) for queue_name in self.SUPPORTED_QUEUES) + + def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: + job_ids = set( + queue.get_job_ids() + + queue.started_job_registry.get_job_ids() + + queue.finished_job_registry.get_job_ids() + + queue.failed_job_registry.get_job_ids() + + queue.deferred_job_registry.get_job_ids() + ) + jobs = [] + for job in queue.job_class.fetch_many(job_ids, queue.connection): + # TODO: move filtration by owner? + if job and is_rq_job_owner(job, user_id): + try: + parsed_rq_id = RQId.parse(job.id) + except Exception: # nosec B112 + continue + job.parsed_rq_id = parsed_rq_id + jobs.append(job) + + return jobs + + def _get_rq_jobs(self, user_id: int) -> list[RQJob]: + """ + Get all RQ jobs for a specific user and return them as a list of RQJob objects. + + Parameters: + user_id (int): The ID of the user for whom to retrieve jobs. + + Returns: + List[RQJob]: A list of RQJob objects representing all jobs for the specified user. + """ + all_jobs = [] + # TODO: optimize filtration here + for queue in self.queues: + jobs = self._get_rq_jobs_from_queue(queue, user_id) + all_jobs.extend(jobs) + + return all_jobs + + def _get_rq_job_by_id(self, rq_id: str) -> Optional[RQJob]: + """ + Get a RQJob by its ID from the queues. + + Args: + rq_id (str): The ID of the RQJob to retrieve. + + Returns: + Optional[RQJob]: The retrieved RQJob, or None if not found. + """ + try: + parsed_rq_id = RQId.parse(rq_id) + except Exception as ex: + return None + + job: Optional[RQJob] = None + + if parsed_rq_id.queue.value not in self.SUPPORTED_QUEUES: + raise ValidationError("Unsupported queue") + + queue: DjangoRQ = django_rq.get_queue(parsed_rq_id.queue.value) + + job = queue.fetch_job(rq_id) + if job: + job.parsed_rq_id = parsed_rq_id + + return job + + def _handle_redis_exceptions(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except RedisConnectionError as ex: + msg = "Redis service is not available" + slogger.glob.exception(f"{msg}: {str(ex)}") + return Response(msg, status=status.HTTP_503_SERVICE_UNAVAILABLE) + + return wrapper + + @method_decorator(never_cache) + @_handle_redis_exceptions + def retrieve(self, request: ExtendedRequest, pk: str): + job = self._get_rq_job_by_id(pk) + + if not job: + return HttpResponseNotFound("There is no request with specified id") + + self.check_object_permissions(request, job) + + serializer = self.get_serializer(job, context={"request": request}) + return Response(data=serializer.data, status=status.HTTP_200_OK) + + @method_decorator(never_cache) + @_handle_redis_exceptions + def list(self, request: ExtendedRequest): + user_id = request.user.id + user_jobs = self._get_rq_jobs(user_id) + + filtered_jobs = self.filter_queryset(user_jobs) + + page = self.paginate_queryset(filtered_jobs) + if page is not None: + serializer = self.get_serializer(page, many=True, context={"request": request}) + return self.get_paginated_response(serializer.data) + + serializer = self.get_serializer(filtered_jobs, many=True, context={"request": request}) + return Response(data=serializer.data, status=status.HTTP_200_OK) + + @extend_schema( + summary="Cancel request", + request=None, + responses={ + "200": OpenApiResponse(description="The request has been cancelled"), + }, + ) + @method_decorator(never_cache) + @action(detail=True, methods=["POST"], url_path="cancel") + @_handle_redis_exceptions + def cancel(self, request: ExtendedRequest, pk: str): + rq_job = self._get_rq_job_by_id(pk) + + if not rq_job: + return HttpResponseNotFound("There is no request with specified id") + + self.check_object_permissions(request, rq_job) + + if rq_job.get_status(refresh=False) not in {RQJobStatus.QUEUED, RQJobStatus.DEFERRED}: + return HttpResponseBadRequest( + "Only requests that have not yet been started can be cancelled" + ) + + # FUTURE-TODO: race condition is possible here + rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) + rq_job.delete() + + return Response(status=status.HTTP_200_OK) diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 80e2474cae36..731ba4fd6d5c 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -297,42 +297,52 @@ class CVAT_QUEUES(Enum): CVAT_QUEUES.IMPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", + "VISIBLE_VIA_REQUESTS_API": True, }, CVAT_QUEUES.EXPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", + "VISIBLE_VIA_REQUESTS_API": True, }, CVAT_QUEUES.AUTO_ANNOTATION.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "24h", + "VISIBLE_VIA_REQUESTS_API": True, }, CVAT_QUEUES.WEBHOOKS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.NOTIFICATIONS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.QUALITY_REPORTS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + "VISIBLE_VIA_REQUESTS_API": True, }, CVAT_QUEUES.ANALYTICS_REPORTS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + "VISIBLE_VIA_REQUESTS_API": True, }, CVAT_QUEUES.CLEANING.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "2h", + "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.CHUNKS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "5m", + "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.CONSENSUS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + "VISIBLE_VIA_REQUESTS_API": True, }, } diff --git a/cvat/urls.py b/cvat/urls.py index 770a5d0f3d43..05a343f42107 100644 --- a/cvat/urls.py +++ b/cvat/urls.py @@ -25,6 +25,7 @@ urlpatterns = [ path("admin/", admin.site.urls), path("", include("cvat.apps.engine.urls")), + path("", include("cvat.apps.redis_handler.urls")), path("django-rq/", include("django_rq.urls")), ] From f525c7b419cc31e998ad1e2efdde515be6cf3ad1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 18 Mar 2025 09:52:07 +0100 Subject: [PATCH 002/103] Update quality control app --- cvat/apps/quality_control/permissions.py | 1 + cvat/apps/quality_control/quality_reports.py | 115 ++++++------------- cvat/apps/quality_control/views.py | 44 ++++--- 3 files changed, 64 insertions(+), 96 deletions(-) diff --git a/cvat/apps/quality_control/permissions.py b/cvat/apps/quality_control/permissions.py index 25677b2d0480..6e7c38944a88 100644 --- a/cvat/apps/quality_control/permissions.py +++ b/cvat/apps/quality_control/permissions.py @@ -102,6 +102,7 @@ def create(cls, request, view, obj, iam_context): return permissions def __init__(self, **kwargs): + # TODO: refactor if "rq_job_owner_id" in kwargs: self.rq_job_owner_id = int(kwargs.pop("rq_job_owner_id")) diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index fa2030410dcd..baaa6ab57054 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -10,22 +10,20 @@ from collections.abc import Hashable, Sequence from copy import deepcopy from functools import cached_property, partial -from typing import Any, Callable, Optional, TypeVar, Union, cast +from typing import Any, Callable, ClassVar, Optional, TypeVar, Union, cast import datumaro as dm import datumaro.components.annotations.matcher import datumaro.components.comparator import datumaro.util.annotation_util import datumaro.util.mask_tools -import django_rq import numpy as np -import rq from attrs import asdict, define, fields_dict from datumaro.util import dump_json, parse_json from django.conf import settings from django.db import transaction +from django.http import HttpResponseBadRequest from django_rq.queues import DjangoRQ as RqQueue -from rq.job import Job as RqJob from scipy.optimize import linear_sum_assignment from cvat.apps.dataset_manager.bindings import ( @@ -45,6 +43,7 @@ Image, Job, JobType, + RequestTarget, ShapeType, StageChoice, StatusChoice, @@ -62,6 +61,8 @@ AnnotationConflictType, AnnotationType, ) +from cvat.apps.redis_handler.background import AbstractRQJobManager +from cvat.apps.redis_handler.rq import RQId class Serializable: @@ -2264,95 +2265,49 @@ def generate_report(self) -> ComparisonReport: ) -class QualityReportUpdateManager: - _QUEUE_CUSTOM_JOB_PREFIX = "quality-check-" - _RQ_CUSTOM_QUALITY_CHECK_JOB_TYPE = "custom_quality_check" +class QualityReportUpdateManager(AbstractRQJobManager): _JOB_RESULT_TTL = 120 + _JOB_FAILURE_TTL = _JOB_RESULT_TTL - def _get_queue(self) -> RqQueue: - return django_rq.get_queue(settings.CVAT_QUEUES.QUALITY_REPORTS.value) + QUEUE_NAME = settings.CVAT_QUEUES.QUALITY_REPORTS.value + SUPPORTED_RESOURCES: ClassVar[set[RequestTarget]] = {RequestTarget.TASK} - def _make_custom_quality_check_job_id(self, task_id: int, user_id: int) -> str: - # FUTURE-TODO: it looks like job ID template should not include user_id because: - # 1. There is no need to compute quality reports several times for different users - # 2. Each user (not only rq job owner) that has permission to access a task should - # be able to check the status of the computation process - return f"{self._QUEUE_CUSTOM_JOB_PREFIX}task-{task_id}-user-{user_id}" + def build_rq_id(self): + return RQId( + queue=self.QUEUE_NAME, + action="compute", + target=self.resource, + id=self.db_instance.pk, + ).render() - class QualityReportsNotAvailable(Exception): - pass + def validate_request(self): + if self.db_instance.dimension != DimensionType.DIM_2D: + return HttpResponseBadRequest("Quality reports are only supported in 2d tasks") - def _check_quality_reporting_available(self, task: Task): - if task.dimension != DimensionType.DIM_2D: - raise self.QualityReportsNotAvailable("Quality reports are only supported in 2d tasks") - - gt_job = task.gt_job + gt_job = self.db_instance.gt_job if gt_job is None or not ( gt_job.stage == StageChoice.ACCEPTANCE and gt_job.state == StatusChoice.COMPLETED ): - raise self.QualityReportsNotAvailable( + return HttpResponseBadRequest( "Quality reports require a Ground Truth job in the task " f"at the {StageChoice.ACCEPTANCE} stage " f"and in the {StatusChoice.COMPLETED} state" ) - class JobAlreadyExists(QualityReportsNotAvailable): - def __str__(self): - return "Quality computation job for this task already enqueued" - - def schedule_custom_quality_check_job( - self, request: ExtendedRequest, task: Task, *, user_id: int - ) -> str: - """ - Schedules a quality report computation job, supposed for updates by a request. - """ - - self._check_quality_reporting_available(task) - - queue = self._get_queue() - rq_id = self._make_custom_quality_check_job_id(task_id=task.id, user_id=user_id) - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - if rq_job := queue.fetch_job(rq_id): - if rq_job.get_status(refresh=False) in ( - rq.job.JobStatus.QUEUED, - rq.job.JobStatus.STARTED, - rq.job.JobStatus.SCHEDULED, - rq.job.JobStatus.DEFERRED, - ): - raise self.JobAlreadyExists() - - rq_job.delete() - - with get_rq_lock_by_user(queue, user_id=user_id): - dependency = define_dependent_job( - queue, user_id=user_id, rq_id=rq_id, should_be_dependent=True - ) - - queue.enqueue( - self._check_task_quality, - task_id=task.id, - job_id=rq_id, - meta=BaseRQMeta.build(request=request, db_obj=task), - result_ttl=self._JOB_RESULT_TTL, - failure_ttl=self._JOB_RESULT_TTL, - depends_on=dependency, - ) - - return rq_id - - def get_quality_check_job(self, rq_id: str) -> Optional[RqJob]: - queue = self._get_queue() - rq_job = queue.fetch_job(rq_id) - - if rq_job and not self.is_custom_quality_check_job(rq_job): - rq_job = None - - return rq_job - - def is_custom_quality_check_job(self, rq_job: RqJob) -> bool: - return isinstance(rq_job.id, str) and rq_job.id.startswith(self._QUEUE_CUSTOM_JOB_PREFIX) + def setup_background_job(self, queue: RqQueue, rq_id: str) -> None: + user_id = self.request.user.id + + with get_rq_lock_by_user(queue, user_id=user_id): + dependency = define_dependent_job(queue, user_id=user_id, rq_id=rq_id) + queue.enqueue( + self._check_task_quality, + task_id=self.db_instance.pk, + job_id=rq_id, + meta=BaseRQMeta.build(request=self.request, db_obj=self.db_instance), + result_ttl=self._JOB_RESULT_TTL, + failure_ttl=self._JOB_FAILURE_TTL, + depends_on=dependency, + ) @classmethod @silk_profile() diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 239ff6de8144..71cd22eb14be 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -3,9 +3,11 @@ # SPDX-License-Identifier: MIT import textwrap +from datetime import datetime from django.db.models import Q -from django.http import HttpResponse +from django.http import HttpResponse, HttpResponseNotFound +from django.utils import timezone from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import ( OpenApiParameter, @@ -224,6 +226,7 @@ def get_queryset(self): @extend_schema( operation_id="quality_create_report", summary="Create a quality report", + description="Deprecation warning: Do not use this endpoint ot check the report computation status", parameters=[ OpenApiParameter( CREATE_REPORT_RQ_ID_PARAMETER, @@ -234,6 +237,7 @@ def get_queryset(self): creation status. """ ), + deprecated=True, ) ], request=QualityReportCreateSerializer(required=False), @@ -273,22 +277,17 @@ def create(self, request, *args, **kwargs): except Task.DoesNotExist as ex: raise NotFound(f"Task {task_id} does not exist") from ex - try: - rq_id = qc.QualityReportUpdateManager().schedule_custom_quality_check_job( - request=request, task=task, user_id=request.user.id - ) - serializer = RqIdSerializer({"rq_id": rq_id}) - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - except qc.QualityReportUpdateManager.QualityReportsNotAvailable as ex: - raise ValidationError(str(ex)) + manager = qc.QualityReportUpdateManager(task, request) + return manager.process() else: + deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) + response_headers = {"Deprecation": f"@{deprecation_timestamp}"} serializer = RqIdSerializer(data={"rq_id": rq_id}) serializer.is_valid(raise_exception=True) rq_id = serializer.validated_data["rq_id"] + rq_job = qc.QualityReportUpdateManager.get_job_by_id(rq_id) - report_manager = qc.QualityReportUpdateManager() - rq_job = report_manager.get_quality_check_job(rq_id) # FUTURE-TODO: move into permissions # and allow not only rq job owner to check the status if ( @@ -300,26 +299,36 @@ def create(self, request, *args, **kwargs): .allow ): # We should not provide job existence information to unauthorized users - raise NotFound("Unknown request id") + return HttpResponseNotFound("Unknown request id", headers=response_headers) rq_job_status = rq_job.get_status(refresh=False) if rq_job_status == RqJobStatus.FAILED: message = str(rq_job.exc_info) rq_job.delete() - raise ValidationError(message) + return Response( + message, status=status.HTTP_500_INTERNAL_SERVER_ERROR, headers=response_headers + ) + elif rq_job_status in ( RqJobStatus.QUEUED, RqJobStatus.STARTED, RqJobStatus.SCHEDULED, RqJobStatus.DEFERRED, ): - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + return Response( + serializer.data, status=status.HTTP_202_ACCEPTED, headers=response_headers + ) + elif rq_job_status == RqJobStatus.FINISHED: return_value = rq_job.return_value() rq_job.delete() if not return_value: - raise ValidationError("No report has been computed") + raise Response( + "No report has been computed", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + headers=response_headers, + ) report = self.get_queryset().get(pk=return_value) report_serializer = QualityReportSerializer( @@ -328,7 +337,10 @@ def create(self, request, *args, **kwargs): return Response( data=report_serializer.data, status=status.HTTP_201_CREATED, - headers=self.get_success_headers(report_serializer.data), + headers={ + **self.get_success_headers(report_serializer.data), + **response_headers, + }, ) raise AssertionError(f"Unexpected rq job '{rq_id}' status '{rq_job_status}'") From 77274fd7396c3ea36026b701d88635d98ab0aa15 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 18 Mar 2025 09:54:29 +0100 Subject: [PATCH 003/103] Update consensus app --- cvat-core/src/server-proxy.ts | 125 +++++++++++------------ cvat-core/src/session.ts | 4 +- cvat-ui/src/actions/consensus-actions.ts | 11 +- cvat/apps/consensus/merging_manager.py | 98 +++++++----------- cvat/apps/consensus/views.py | 33 +++--- 5 files changed, 123 insertions(+), 148 deletions(-) diff --git a/cvat-core/src/server-proxy.ts b/cvat-core/src/server-proxy.ts index 241d09b2a8a9..bd0dc058776b 100644 --- a/cvat-core/src/server-proxy.ts +++ b/cvat-core/src/server-proxy.ts @@ -612,6 +612,57 @@ const defaultRequestConfig = { fetchAll: false, }; +async function getRequestsList(): Promise> { + const { backendAPI } = config; + const params = enableOrganization(); + + try { + const response = await fetchAll(`${backendAPI}/requests`, params); + + return response.results; + } catch (errorData) { + throw generateError(errorData); + } +} + +// Temporary solution for server availability problems +const retryTimeouts = [5000, 10000, 15000]; +async function getRequestStatus(rqID: string): Promise { + const { backendAPI } = config; + let retryCount = 0; + let lastError = null; + + while (retryCount < 3) { + try { + const response = await Axios.get(`${backendAPI}/requests/${rqID}`); + + return response.data; + } catch (errorData) { + lastError = generateError(errorData); + const { response } = errorData; + if (response && [502, 503, 504].includes(response.status)) { + const timeout = retryTimeouts[retryCount]; + await new Promise((resolve) => { setTimeout(resolve, timeout); }); + retryCount++; + } else { + throw generateError(errorData); + } + } + } + + throw lastError; +} + +async function cancelRequest(requestID): Promise { + const { backendAPI } = config; + + try { + await Axios.post(`${backendAPI}/requests/${requestID}/cancel`); + } catch (errorData) { + throw generateError(errorData); + } +} + async function serverRequest( url: string, data: object, requestConfig: ServerRequestConfig = defaultRequestConfig, @@ -768,30 +819,19 @@ async function deleteTask(id: number, organizationID: string | null = null): Pro } } -async function mergeConsensusJobs(id: number, instanceType: string): Promise { +async function mergeConsensusJobs(id: number, instanceType: string): Promise { const { backendAPI } = config; const url = `${backendAPI}/consensus/merges`; - const params = { - rq_id: null, - }; - const requestBody = { - task_id: undefined, - job_id: undefined, - }; + const requestBody = (instanceType === 'task') ? { task_id: id } : { job_id: id }; - if (instanceType === 'task') requestBody.task_id = id; - else requestBody.job_id = id; - - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { async function request() { try { - const response = await Axios.post(url, requestBody, { params }); - params.rq_id = response.data.rq_id; + const response = await Axios.post(url, requestBody); + const rqID = response.data.rq_id; const { status } = response; if (status === 202) { - setTimeout(request, 3000); - } else if (status === 201) { - resolve(); + resolve(rqID); } else { reject(generateError(response)); } @@ -2304,57 +2344,6 @@ async function getAnalyticsReports( } } -async function getRequestsList(): Promise> { - const { backendAPI } = config; - const params = enableOrganization(); - - try { - const response = await fetchAll(`${backendAPI}/requests`, params); - - return response.results; - } catch (errorData) { - throw generateError(errorData); - } -} - -// Temporary solution for server availability problems -const retryTimeouts = [5000, 10000, 15000]; -async function getRequestStatus(rqID: string): Promise { - const { backendAPI } = config; - let retryCount = 0; - let lastError = null; - - while (retryCount < 3) { - try { - const response = await Axios.get(`${backendAPI}/requests/${rqID}`); - - return response.data; - } catch (errorData) { - lastError = generateError(errorData); - const { response } = errorData; - if (response && [502, 503, 504].includes(response.status)) { - const timeout = retryTimeouts[retryCount]; - await new Promise((resolve) => { setTimeout(resolve, timeout); }); - retryCount++; - } else { - throw generateError(errorData); - } - } - } - - throw lastError; -} - -async function cancelRequest(requestID): Promise { - const { backendAPI } = config; - - try { - await Axios.post(`${backendAPI}/requests/${requestID}/cancel`); - } catch (errorData) { - throw generateError(errorData); - } -} - const listenToCreateAnalyticsReportCallbacks: { job: LongProcessListener; task: LongProcessListener; diff --git a/cvat-core/src/session.ts b/cvat-core/src/session.ts index 8deff9694ad4..cc5a19273d6b 100644 --- a/cvat-core/src/session.ts +++ b/cvat-core/src/session.ts @@ -738,7 +738,7 @@ export class Job extends Session { return result; } - async mergeConsensusJobs(): Promise { + async mergeConsensusJobs(): Promise { const result = await PluginRegistry.apiWrapper.call(this, Job.prototype.mergeConsensusJobs); return result; } @@ -1204,7 +1204,7 @@ export class Task extends Session { return result; } - async mergeConsensusJobs(): Promise { + async mergeConsensusJobs(): Promise { const result = await PluginRegistry.apiWrapper.call(this, Task.prototype.mergeConsensusJobs); return result; } diff --git a/cvat-ui/src/actions/consensus-actions.ts b/cvat-ui/src/actions/consensus-actions.ts index a06de6ef3bdc..c42d5334b725 100644 --- a/cvat-ui/src/actions/consensus-actions.ts +++ b/cvat-ui/src/actions/consensus-actions.ts @@ -3,7 +3,11 @@ // SPDX-License-Identifier: MIT import { ActionUnion, createAction, ThunkAction } from 'utils/redux'; -import { Project, ProjectOrTaskOrJob } from 'cvat-core-wrapper'; +import { Project, ProjectOrTaskOrJob, getCore } from 'cvat-core-wrapper'; + +import { updateRequestProgress } from './requests-actions'; + +const core = getCore(); export enum ConsensusActionTypes { MERGE_CONSENSUS_JOBS = 'MERGE_CONSENSUS_JOBS', @@ -28,7 +32,10 @@ export const mergeConsensusJobsAsync = ( ): ThunkAction => async (dispatch) => { try { dispatch(consensusActions.mergeConsensusJobs(instance)); - await instance.mergeConsensusJobs(); + const rqID = await instance.mergeConsensusJobs(); + await core.requests.listen(rqID, { + callback: (updatedRequest) => updateRequestProgress(updatedRequest, dispatch), + }); } catch (error) { dispatch(consensusActions.mergeConsensusJobsFailed(instance, error)); return; diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index b6fdf0eb75c5..303297c14d1b 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -6,12 +6,10 @@ from typing import Type import datumaro as dm -import django_rq from django.conf import settings from django.db import transaction +from django.http import HttpResponseBadRequest from django_rq.queues import DjangoRQ as RqQueue -from rq.job import Job as RqJob -from rq.job import JobStatus as RqJobStatus from cvat.apps.consensus.intersect_merge import IntersectMerge from cvat.apps.consensus.models import ConsensusSettings @@ -21,6 +19,7 @@ DimensionType, Job, JobType, + RequestTarget, StageChoice, StateChoice, Task, @@ -28,10 +27,11 @@ clear_annotations_in_jobs, ) from cvat.apps.engine.rq import BaseRQMeta, define_dependent_job -from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import get_rq_lock_by_user from cvat.apps.profiler import silk_profile from cvat.apps.quality_control.quality_reports import ComparisonParameters, JobDataProvider +from cvat.apps.redis_handler.background import AbstractRQJobManager +from cvat.apps.redis_handler.rq import RQId class _TaskMerger: @@ -159,84 +159,56 @@ class MergingNotAvailable(Exception): pass -class JobAlreadyExists(MergingNotAvailable): - def __init__(self, instance: Task | Job): - super().__init__() - self.instance = instance +class MergingManager(AbstractRQJobManager): + QUEUE_NAME = settings.CVAT_QUEUES.CONSENSUS.value + SUPPORTED_RESOURCES = {RequestTarget.TASK, RequestTarget.JOB} - def __str__(self): - return f"Merging for this {type(self.instance).__name__.lower()} already enqueued" - - -class MergingManager: - _QUEUE_CUSTOM_JOB_PREFIX = "consensus-merge-" _JOB_RESULT_TTL = 300 + _JOB_FAILURE_TTL = _JOB_RESULT_TTL - def _get_queue(self) -> RqQueue: - return django_rq.get_queue(settings.CVAT_QUEUES.CONSENSUS.value) + def build_rq_id(self) -> str: + # todo: add redis migration + return RQId( + queue=self.QUEUE_NAME, + action="merge", + target=self.resource, + id=self.db_instance.pk, + ).render() - def _make_job_id(self, task_id: int, job_id: int | None, user_id: int) -> str: - key = f"{self._QUEUE_CUSTOM_JOB_PREFIX}task-{task_id}" - if job_id: - key += f"-job-{job_id}" - key += f"-user-{user_id}" # TODO: remove user id, add support for non owners to get status - return key + def _split_to_task_and_job(self) -> tuple[Task, Job | None]: + if isinstance(self.db_instance, Job): + return self.db_instance.segment.task, self.db_instance - def _check_merging_available(self, task: Task, job: Job | None): - _TaskMerger(task=task).check_merging_available(parent_job_id=job.id if job else None) + return self.db_instance, None - def schedule_merge(self, target: Task | Job, *, request: ExtendedRequest) -> str: - if isinstance(target, Job): - target_task = target.segment.task - target_job = target - else: - target_task = target - target_job = None + def validate_request(self): + # FUTURE-FIXME: check that there is no indirectly dependent RQ jobs: + # e.g merge whole task and merge a particular job from the task + task, job = self._split_to_task_and_job() - self._check_merging_available(target_task, target_job) + try: + _TaskMerger(task=task).check_merging_available(parent_job_id=job.pk if job else None) + except MergingNotAvailable as ex: + return HttpResponseBadRequest(str(ex)) - queue = self._get_queue() + def setup_background_job(self, queue: RqQueue, rq_id: str) -> None: + user_id = self.request.user.id - user_id = request.user.id with get_rq_lock_by_user(queue, user_id=user_id): - rq_id = self._make_job_id( - task_id=target_task.id, - job_id=target_job.id if target_job else None, - user_id=user_id, - ) - rq_job = queue.fetch_job(rq_id) - if rq_job: - if rq_job.get_status(refresh=False) in ( - RqJobStatus.QUEUED, - RqJobStatus.STARTED, - RqJobStatus.SCHEDULED, - RqJobStatus.DEFERRED, - ): - raise JobAlreadyExists(target) - - rq_job.delete() - dependency = define_dependent_job( - queue, user_id=user_id, rq_id=rq_id, should_be_dependent=True + queue, user_id=user_id, rq_id=rq_id ) - queue.enqueue( self._merge, - target_type=type(target), - target_id=target.id, + target_type=type(self.db_instance), + target_id=self.db_instance.pk, job_id=rq_id, - meta=BaseRQMeta.build(request=request, db_obj=target), + meta=BaseRQMeta.build(request=self.request, db_obj=self.db_instance), result_ttl=self._JOB_RESULT_TTL, - failure_ttl=self._JOB_RESULT_TTL, + failure_ttl=self._JOB_FAILURE_TTL, depends_on=dependency, ) - return rq_id - - def get_job(self, rq_id: str) -> RqJob | None: - queue = self._get_queue() - return queue.fetch_job(rq_id) - @classmethod @silk_profile() def _merge(cls, *, target_type: Type[Task | Job], target_id: int) -> int: diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index 4a7032215fdb..bbbf3fc441a6 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -3,7 +3,10 @@ # SPDX-License-Identifier: MIT import textwrap +from datetime import datetime +from django.http import HttpResponseNotFound +from django.utils import timezone from drf_spectacular.utils import ( OpenApiParameter, OpenApiResponse, @@ -90,20 +93,17 @@ def create(self, request: ExtendedRequest, *args, **kwargs): except Job.DoesNotExist as ex: raise NotFound(f"Jobs {job_id} do not exist") from ex - try: - manager = merging.MergingManager() - rq_id = manager.schedule_merge(instance, request=request) - serializer = RqIdSerializer({"rq_id": rq_id}) - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - except merging.MergingNotAvailable as ex: - raise ValidationError(str(ex)) + manager = merging.MergingManager(instance, request) + return manager.process() else: + deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) + response_headers = {"Deprecation": f"@{deprecation_timestamp}"} + serializer = RqIdSerializer(data={"rq_id": rq_id}) serializer.is_valid(raise_exception=True) rq_id = serializer.validated_data["rq_id"] + rq_job = merging.MergingManager.get_job_by_id(rq_id) - manager = merging.MergingManager() - rq_job = manager.get_job(rq_id) if ( not rq_job or not ConsensusMergePermission.create_scope_check_status( @@ -113,7 +113,7 @@ def create(self, request: ExtendedRequest, *args, **kwargs): .allow ): # We should not provide job existence information to unauthorized users - raise NotFound("Unknown request id") + return HttpResponseNotFound("Unknown request id", headers=response_headers) rq_job_status = rq_job.get_status(refresh=False) if rq_job_status == RqJobStatus.FAILED: @@ -124,19 +124,26 @@ def create(self, request: ExtendedRequest, *args, **kwargs): return Response( data=exc_info[exc_pos + len(exc_name_pattern) :].strip(), status=status.HTTP_400_BAD_REQUEST, + headers=response_headers, ) - return Response(data=str(exc_info), status=status.HTTP_500_INTERNAL_SERVER_ERROR) + return Response( + data=str(exc_info), + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + headers=response_headers, + ) elif rq_job_status in ( RqJobStatus.QUEUED, RqJobStatus.STARTED, RqJobStatus.SCHEDULED, RqJobStatus.DEFERRED, ): - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + return Response( + serializer.data, status=status.HTTP_202_ACCEPTED, headers=response_headers + ) elif rq_job_status == RqJobStatus.FINISHED: rq_job.delete() - return Response(status=status.HTTP_201_CREATED) + return Response(status=status.HTTP_201_CREATED, headers=response_headers) raise AssertionError(f"Unexpected rq job '{rq_id}' status '{rq_job_status}'") From 5bd0b8994dcfe4c2b6ef9fd798da5e83c938b216 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 18 Mar 2025 09:58:30 +0100 Subject: [PATCH 004/103] [quality/consensus] Allow users with resource access to check operation status --- cvat/apps/engine/permissions.py | 37 ----------- cvat/apps/redis_handler/apps.py | 5 ++ cvat/apps/redis_handler/permissions.py | 90 ++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 37 deletions(-) create mode 100644 cvat/apps/redis_handler/permissions.py diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index 7a0ce8c0c627..6ed84a917046 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -12,9 +12,7 @@ from django.conf import settings from django.shortcuts import get_object_or_404 from rest_framework.exceptions import PermissionDenied, ValidationError -from rq.job import Job as RQJob -from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import is_dataset_export from cvat.apps.iam.permissions import ( @@ -1194,41 +1192,6 @@ def get_scopes(request: ExtendedRequest, view: ViewSet, obj: AnnotationGuide | N }[view.action]] -class RequestPermission(OpenPolicyAgentPermission): - class Scopes(StrEnum): - LIST = 'list' - VIEW = 'view' - CANCEL = 'cancel' - - @classmethod - def create(cls, request: ExtendedRequest, view: ViewSet, obj: RQJob | None, iam_context: dict) -> list[OpenPolicyAgentPermission]: - permissions = [] - if view.basename == 'request': - for scope in cls.get_scopes(request, view, obj): - if scope != cls.Scopes.LIST: - user_id = request.user.id - if not is_rq_job_owner(obj, user_id): - raise PermissionDenied('You don\'t have permission to perform this action') - - return permissions - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.url = settings.IAM_OPA_DATA_URL + '/requests/allow' - - @staticmethod - def get_scopes(request: ExtendedRequest, view: ViewSet, obj: RQJob | None) -> list[Scopes]: - Scopes = __class__.Scopes - return [{ - ('list', 'GET'): Scopes.LIST, - ('retrieve', 'GET'): Scopes.VIEW, - ('cancel', 'POST'): Scopes.CANCEL, - }[(view.action, request.method)]] - - - def get_resource(self): - return None - def get_cloud_storage_for_import_or_export( storage_id: int, *, request: ExtendedRequest, is_default: bool = False ) -> CloudStorage: diff --git a/cvat/apps/redis_handler/apps.py b/cvat/apps/redis_handler/apps.py index a00543165e7f..be23d57b5d2b 100644 --- a/cvat/apps/redis_handler/apps.py +++ b/cvat/apps/redis_handler/apps.py @@ -8,3 +8,8 @@ class RedisHandlerConfig(AppConfig): name = "cvat.apps.redis_handler" + + def ready(self) -> None: + from cvat.apps.iam.permissions import load_app_permissions + + load_app_permissions(self) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py new file mode 100644 index 000000000000..a717f2a2949f --- /dev/null +++ b/cvat/apps/redis_handler/permissions.py @@ -0,0 +1,90 @@ +# Copyright (C) CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from django.conf import settings +from rest_framework.exceptions import PermissionDenied +from rest_framework.serializers import ValidationError +from rq.job import Job as RQJob + +from cvat.apps.engine.rq import is_rq_job_owner +from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.iam.permissions import OpenPolicyAgentPermission, StrEnum + +if TYPE_CHECKING: + from rest_framework.viewsets import ViewSet + +from cvat.apps.engine.models import RequestTarget +from cvat.apps.engine.permissions import JobPermission, TaskPermission +from cvat.apps.redis_handler.rq import RQId + + +class RequestPermission(OpenPolicyAgentPermission): + + class Scopes(StrEnum): + LIST = "list" + VIEW = "view" + CANCEL = "cancel" + + @classmethod + def create( + cls, request: ExtendedRequest, view: ViewSet, obj: RQJob | None, iam_context: dict + ) -> list[OpenPolicyAgentPermission]: + permissions = [] + if view.basename == "request": + user_id = request.user.id + + for scope in cls.get_scopes(request, view, obj): + if scope == cls.Scopes.LIST: + continue + elif scope == cls.Scopes.VIEW: + parsed_rq_id = obj.parsed_rq_id + + if ( + parsed_rq_id.queue + in ( + settings.CVAT_QUEUES.CONSENSUS, + settings.CVAT_QUEUES.QUALITY_REPORTS, + ) + and parsed_rq_id.target == RequestTarget.TASK + ): + permissions.append( + TaskPermission.create_scope_view(request, parsed_rq_id.id, iam_context) + ) + continue + + if ( + parsed_rq_id.queue == settings.CVAT_QUEUES.CONSENSUS + and parsed_rq_id.target == RequestTarget.JOB + ): + permissions.append( + JobPermission.create_scope_view(request, parsed_rq_id.id, iam_context) + ) + continue + + # TODO: move into OPA + if not is_rq_job_owner(obj, user_id): + raise PermissionDenied("You don't have permission to perform this action") + + return permissions + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.url = settings.IAM_OPA_DATA_URL + "/requests/allow" + + @staticmethod + def get_scopes(request: ExtendedRequest, view: ViewSet, obj: RQJob | None) -> list[Scopes]: + return [ + { + ("list", "GET"): __class__.Scopes.LIST, + ("retrieve", "GET"): __class__.Scopes.VIEW, + ("cancel", "POST"): __class__.Scopes.CANCEL, + }[(view.action, request.method)] + ] + + def get_resource(self): + return None From 622952a8aa8850ce65dad4aea8b3fbd4e4f51156 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 19 Mar 2025 11:44:35 +0100 Subject: [PATCH 005/103] Update import/export --- cvat/apps/engine/background.py | 128 +++++++++----------------------- cvat/apps/engine/backup.py | 16 +++- cvat/apps/engine/mixins.py | 13 +++- cvat/apps/engine/permissions.py | 8 +- cvat/apps/engine/rq.py | 70 ++++++++--------- cvat/apps/engine/serializers.py | 11 ++- cvat/apps/engine/task.py | 7 +- cvat/apps/engine/views.py | 32 ++++++-- 8 files changed, 133 insertions(+), 152 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 43daad8f7427..8e4d8d16b686 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -6,13 +6,13 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime -from typing import Any, ClassVar, Optional, Union +from typing import Any, ClassVar from urllib.parse import quote import django_rq from django.conf import settings from django.http.response import HttpResponseBadRequest -from django_rq.queues import DjangoRQ, DjangoScheduler +from django_rq.queues import DjangoRQ from rest_framework import serializers, status from rest_framework.response import Response from rest_framework.reverse import reverse @@ -23,16 +23,13 @@ from cvat.apps.dataset_manager.formats.registry import EXPORT_FORMATS from cvat.apps.dataset_manager.util import get_export_cache_lock from cvat.apps.dataset_manager.views import get_export_cache_ttl, get_export_callback -from cvat.apps.engine import models from cvat.apps.engine.backup import ProjectExporter, TaskExporter, create_backup from cvat.apps.engine.cloud_provider import export_resource_to_cloud_storage from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import Location, RequestAction, RequestSubresource, RequestTarget, Task from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq import ExportRQMeta, RQId, define_dependent_job -from cvat.apps.engine.serializers import RqIdSerializer -from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.engine.rq import ExportRQMeta, ExportRQId, define_dependent_job from cvat.apps.engine.utils import ( build_annotations_file_name, build_backup_file_name, @@ -50,28 +47,13 @@ LOCK_TTL = REQUEST_TIMEOUT - 5 LOCK_ACQUIRE_TIMEOUT = LOCK_TTL - 5 +from cvat.apps.redis_handler.background import AbstractRQJobManager -class ResourceExportManager(ABC): + +class ResourceExportManager(AbstractRQJobManager): QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value - SUPPORTED_RESOURCES: ClassVar[set[RequestSubresource]] SUPPORTED_SUBRESOURCES: ClassVar[set[RequestSubresource]] - def __init__( - self, - db_instance: Union[models.Project, models.Task, models.Job], - request: ExtendedRequest, - ) -> None: - """ - Args: - db_instance (Union[models.Project, models.Task, models.Job]): Model instance - request (ExtendedRequest): Incoming HTTP request - """ - self.db_instance = db_instance - self.request = request - self.resource = db_instance.__class__.__name__.lower() - if self.resource not in self.SUPPORTED_RESOURCES: - raise ValueError("Unexpected type of db_instance: {}".format(type(db_instance))) - ### Initialization logic ### @abstractmethod @@ -80,34 +62,6 @@ def initialize_export_args(self) -> None: ... @abstractmethod def validate_export_args(self) -> Response | None: ... - @abstractmethod - def build_rq_id(self) -> str: ... - - def handle_existing_rq_job( - self, rq_job: Optional[RQJob], queue: DjangoRQ - ) -> Optional[Response]: - if not rq_job: - return None - - rq_job_status = rq_job.get_status(refresh=False) - - if rq_job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED}: - return Response( - data="Export request is being processed", - status=status.HTTP_409_CONFLICT, - ) - - if rq_job_status == RQJobStatus.DEFERRED: - rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) - - if rq_job_status == RQJobStatus.SCHEDULED: - scheduler: DjangoScheduler = django_rq.get_scheduler(queue.name, queue=queue) - # remove the job id from the set with scheduled keys - scheduler.cancel(rq_job) - rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) - - rq_job.delete() - return None @abstractmethod def get_download_api_endpoint_view_name(self) -> str: ... @@ -129,39 +83,22 @@ def get_result_filename(self) -> str: ... @abstractmethod def send_events(self) -> None: ... - @abstractmethod - def setup_background_job(self, queue: DjangoRQ, rq_id: str) -> None: ... - - def export(self) -> Response: - self.initialize_export_args() - - if invalid_response := self.validate_export_args(): - return invalid_response - - queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) - rq_id = self.build_rq_id() - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - if response := self.handle_existing_rq_job(rq_job, queue): - return response - self.setup_background_job(queue, rq_id) - + def after_processing(self): self.send_events() - serializer = RqIdSerializer({"rq_id": rq_id}) - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + def process(self): + self.initialize_export_args() + return super().process() ### Logic related to prepared file downloading ### def validate_rq_id(self, rq_id: str) -> None: - parsed_rq_id = RQId.parse(rq_id) + parsed_rq_id = ExportRQId.parse(rq_id) if ( parsed_rq_id.action != RequestAction.EXPORT or parsed_rq_id.target != RequestTarget(self.resource) - or parsed_rq_id.identifier != self.db_instance.pk + or parsed_rq_id.id != self.db_instance.pk or parsed_rq_id.subresource not in self.SUPPORTED_SUBRESOURCES ): raise ValueError("The provided request id does not match exported target or resource") @@ -286,17 +223,21 @@ def validate_export_args(self): return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) def build_rq_id(self): - return RQId( - RequestAction.EXPORT, - RequestTarget(self.resource), - self.db_instance.pk, - subresource=( - RequestSubresource.DATASET - if self.export_args.save_images - else RequestSubresource.ANNOTATIONS - ), - format=self.export_args.format, - user_id=self.request.user.id, + return ExportRQId( + queue=self.QUEUE_NAME, + action=RequestAction.EXPORT, + target=RequestTarget(self.resource), + id=self.db_instance.pk, + # todo: refactor + extra={ + "subresource": ( + RequestSubresource.DATASET + if self.export_args.save_images + else RequestSubresource.ANNOTATIONS + ), + "format": self.export_args.format, + "user_id": self.request.user.id, + }, ).render() def send_events(self): @@ -434,12 +375,15 @@ def get_result_filename(self) -> str: return filename def build_rq_id(self): - return RQId( - RequestAction.EXPORT, - RequestTarget(self.resource), - self.db_instance.pk, - subresource=RequestSubresource.BACKUP, - user_id=self.request.user.id, + return ExportRQId( + queue=self.QUEUE_NAME, + action=RequestAction.EXPORT, + target=RequestTarget(self.resource), + id=self.db_instance.pk, + extra={ + "subresource": RequestSubresource.BACKUP, + "user_id": self.request.user.id, + }, ).render() # FUTURE-TODO: move into ResourceExportManager diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index f03de29f118e..8b3ed9a31d1d 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -1284,8 +1284,11 @@ def import_project(request: ExtendedRequest, queue_name: str, filename: str | No rq_id = request.data['rq_id'] else: rq_id = RQId( - RequestAction.IMPORT, RequestTarget.PROJECT, uuid.uuid4(), - subresource=RequestSubresource.BACKUP, + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.IMPORT, target=RequestTarget.PROJECT, id=uuid.uuid4(), + extra={ + "subresource": RequestSubresource.BACKUP, + } ).render() Serializer = ProjectFileSerializer file_field_name = 'project_file' @@ -1310,8 +1313,13 @@ def import_project(request: ExtendedRequest, queue_name: str, filename: str | No def import_task(request: ExtendedRequest, queue_name: str, filename: str | None = None): rq_id = request.data.get('rq_id', RQId( - RequestAction.IMPORT, RequestTarget.TASK, uuid.uuid4(), - subresource=RequestSubresource.BACKUP, + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.IMPORT, + target=RequestTarget.TASK, + id=uuid.uuid4(), + extra={ + "subresource": RequestSubresource.BACKUP, + } ).render()) Serializer = TaskFileSerializer file_field_name = 'task_file' diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 6d5c3addb68e..0917235293f8 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -280,8 +280,13 @@ def init_tus_upload(self, request: ExtendedRequest): # check whether the rq_job is in progress or has been finished/failed object_class_name = self._object.__class__.__name__.lower() template = RQId( - RequestAction.IMPORT, RequestTarget(object_class_name), self._object.pk, - subresource=RequestSubresource(import_type) + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.IMPORT, + target=RequestTarget(object_class_name), + id=self._object.pk, + extra={ + "subresource": RequestSubresource(import_type) + } ).render() queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) finished_job_ids = queue.finished_job_registry.get_job_ids() @@ -454,7 +459,7 @@ def initiate_dataset_export(self, request: ExtendedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() export_manager = DatasetExportManager(self._object, request) - return export_manager.export() + return export_manager.process() @extend_schema(summary='Download a prepared dataset file', parameters=[ @@ -550,7 +555,7 @@ def import_backup_v1(self, request: ExtendedRequest, import_func: Callable) -> R def initiate_backup_export(self, request: ExtendedRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions export_manager = BackupExportManager(db_object, request) - return export_manager.export() + return export_manager.process() @extend_schema(summary='Download a prepared backup file', diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index 584307823449..1b76dc8cf78e 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -13,7 +13,7 @@ from django.shortcuts import get_object_or_404 from rest_framework.exceptions import PermissionDenied, ValidationError -from cvat.apps.engine.rq import RQId +from cvat.apps.engine.rq import ExportRQId from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import is_dataset_export from cvat.apps.iam.permissions import ( @@ -47,7 +47,7 @@ def _get_key(d: dict[str, Any], key_path: Union[str, Sequence[str]]) -> Optional return d class DownloadExportedExtension: - rq_job_id: RQId | None + rq_job_id: ExportRQId | None class Scopes(StrEnum): DOWNLOAD_EXPORTED_FILE = 'download:exported_file' @@ -56,7 +56,7 @@ class Scopes(StrEnum): def extend_params_with_rq_job_details(*, request: ExtendedRequest, params: dict[str, Any]) -> None: if rq_id := request.query_params.get("rq_id"): try: - params["rq_job_id"] = RQId.parse(rq_id) + params["rq_job_id"] = ExportRQId.parse(rq_id) return except Exception: raise ValidationError("Unexpected request id format") @@ -66,7 +66,7 @@ def extend_params_with_rq_job_details(*, request: ExtendedRequest, params: dict[ def extend_resource_with_rq_job_details(self, data: dict[str, Any]) -> None: data["rq_job"] = { "owner": { - "id": self.rq_job_id.extra["user_id"] if self.rq_job_id else None + "id": self.rq_job_id.user_id if self.rq_job_id else None } } diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 269b4a0b7d7d..54a2f52e1687 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -5,10 +5,8 @@ from __future__ import annotations from abc import ABCMeta, abstractmethod -from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, Union -from uuid import UUID +from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol -import attrs from django.conf import settings from django.db.models import Model from django.utils import timezone @@ -19,7 +17,9 @@ from cvat.apps.engine.types import ExtendedRequest -from .models import RequestAction, RequestSubresource, RequestTarget +from .models import RequestSubresource +from cvat.apps.redis_handler.rq import RQId +from functools import cached_property if TYPE_CHECKING: from django.contrib.auth.models import User @@ -52,14 +52,14 @@ class RequestField: PROGRESS = "progress" HIDDEN = "hidden" - # export specific fields - RESULT_URL = "result_url" - RESULT = "result" - # import specific fields TMP_FILE = "tmp_file" TASK_PROGRESS = "task_progress" + # export specific fields + RESULT_URL = "result_url" + RESULT_FILENAME = "result_filename" + # lambda fields LAMBDA = "lambda" FUNCTION_ID = "function_id" @@ -334,37 +334,33 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: return BaseRQMeta.for_job(rq_job).user.id == user_id -# TODO: -from cvat.apps.redis_handler.rq import RQId +class ExportRQId(RQId): + @cached_property + def user_id(self) -> int: + return int(self.extra["user_id"]) + @cached_property + def subresource(self) -> RequestSubresource: + return RequestSubresource(self.extra["subresource"]) -class ExportRQId(RQId): - pass - - # TODO: format, user_id, subresource - - # subresource: Optional[RequestSubresource] = attrs.field( - # validator=attrs.validators.optional(attrs.validators.instance_of(RequestSubresource)), - # kw_only=True, - # default=None, - # ) - # user_id: Optional[int] = attrs.field( - # validator=attrs.validators.optional(attrs.validators.instance_of(int)), - # kw_only=True, - # default=None, - # ) - # format: Optional[str] = attrs.field( - # validator=attrs.validators.optional(attrs.validators.instance_of(str)), - # kw_only=True, - # default=None, - # ) - - # RQ ID templates: - # autoannotate:task- - # import:-- - # create:task- - # export:---in--format-by- - # export:--backup-by- + @cached_property + def format(self) -> str | None: + # TODO: quote/unquote + return self.extra.get("format") + + +class ImportRQId(RQId): + @cached_property + def subresource(self) -> RequestSubresource | None: + if subresource := self.extra.get("subresource"): + return RequestSubresource(subresource) + + return None + + @cached_property + def format(self) -> str | None: + # TODO: quote/unquote + return self.extra.get("format") def define_dependent_job( diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index eb7d4d2307dd..96fbaaa55beb 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -40,8 +40,9 @@ from cvat.apps.engine.frame_provider import FrameQuality, TaskFrameProvider from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.model_utils import bulk_create +from cvat.apps.engine.models import RequestAction, RequestSubresource from cvat.apps.engine.permissions import TaskPermission -from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta, RequestAction +from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta from cvat.apps.engine.task_validation import HoneypotFrameSelector from cvat.apps.engine.utils import ( CvatChunkTimestampMismatchError, @@ -3521,8 +3522,10 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: } if parsed_rq_id.action == RequestAction.AUTOANNOTATE: representation["function_id"] = LambdaRQMeta.for_job(rq_job).function_id - elif parsed_rq_id.action in (RequestAction.IMPORT, RequestAction.EXPORT): - representation["format"] = parsed_rq_id.extra["format"] # todo: refactor + elif parsed_rq_id.action in ( + RequestAction.IMPORT, RequestAction.EXPORT + ) and parsed_rq_id.subresource in (RequestSubresource.ANNOTATIONS, RequestSubresource.DATASET): + representation["format"] = parsed_rq_id.format return representation @@ -3607,7 +3610,7 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: if ( rq_job.parsed_rq_id.action == models.RequestAction.IMPORT - and rq_job.parsed_rq_id.extra["subresource"] == models.RequestSubresource.BACKUP + and rq_job.parsed_rq_id.subresource == models.RequestSubresource.BACKUP ): representation["result_id"] = rq_job.return_value() diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index ca6819304626..dab13088bf1f 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -70,7 +70,12 @@ def create( """Schedule a background job to create a task and return that job's identifier""" q = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) user_id = request.user.id - rq_id = RQId(RequestAction.CREATE, RequestTarget.TASK, db_task.pk).render() + rq_id = RQId( + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.CREATE, + target=RequestTarget.TASK, + id=db_task.pk + ).render() with get_rq_lock_by_user(q, user_id): q.enqueue_call( diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index b76443891386..de4ffbb85482 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -365,7 +365,12 @@ class ProjectViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, lookup_fields = {'owner': 'owner__username', 'assignee': 'assignee__username'} iam_organization_field = 'organization' IMPORT_RQ_ID_FACTORY = functools.partial(RQId, - RequestAction.IMPORT, RequestTarget.PROJECT, subresource=RequestSubresource.DATASET + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.IMPORT, + targte=RequestTarget.PROJECT, + extra={ + "subresource": RequestSubresource.DATASET, + } ) def get_serializer_class(self): @@ -939,7 +944,12 @@ class TaskViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, ordering = "-id" iam_organization_field = 'organization' IMPORT_RQ_ID_FACTORY = functools.partial(RQId, - RequestAction.IMPORT, RequestTarget.TASK, subresource=RequestSubresource.ANNOTATIONS, + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.IMPORT, + target=RequestTarget.TASK, + extra={ + "subresource": RequestSubresource.ANNOTATIONS, + } ) def get_serializer_class(self): @@ -1592,7 +1602,12 @@ def status(self, request, pk): task = self.get_object() # force call of check_object_permissions() response = self._get_rq_response( queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - job_id=RQId(RequestAction.CREATE, RequestTarget.TASK, task.id).render() + job_id=RQId( + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.CREATE, + target=RequestTarget.TASK, + id=task.id + ).render() ) serializer = RqStatusSerializer(data=response) @@ -1850,7 +1865,12 @@ class JobViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, mixins.CreateMo 'assignee': 'assignee__username' } IMPORT_RQ_ID_FACTORY = functools.partial(RQId, - RequestAction.IMPORT, RequestTarget.JOB, subresource=RequestSubresource.ANNOTATIONS + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + action=RequestAction.IMPORT, + target=RequestTarget.JOB, + extra={ + "subresource": RequestSubresource.ANNOTATIONS, + } ) def get_queryset(self): @@ -3231,7 +3251,7 @@ def _import_annotations( rq_id = request.query_params.get('rq_id') rq_id_should_be_checked = bool(rq_id) if not rq_id: - rq_id = rq_id_factory(db_obj.pk).render() + rq_id = rq_id_factory(id=db_obj.pk).render() queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) @@ -3362,7 +3382,7 @@ def _import_project_dataset( elif not format_desc.ENABLED: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) - rq_id = rq_id_factory(db_obj.pk).render() + rq_id = rq_id_factory(id=db_obj.pk).render() queue: DjangoRQ = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) From 95b2b85a7697c5c32b0da2699dac88b0f2a6c8ca Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 19 Mar 2025 11:47:26 +0100 Subject: [PATCH 006/103] Drop POST /api/consensus/merges?rq_id= support --- cvat/apps/consensus/merging_manager.py | 4 +- cvat/apps/consensus/views.py | 127 +++++++------------------ cvat/schema.yml | 32 +------ 3 files changed, 38 insertions(+), 125 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index 303297c14d1b..e23e54c13aef 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -195,9 +195,7 @@ def setup_background_job(self, queue: RqQueue, rq_id: str) -> None: user_id = self.request.user.id with get_rq_lock_by_user(queue, user_id=user_id): - dependency = define_dependent_job( - queue, user_id=user_id, rq_id=rq_id - ) + dependency = define_dependent_job(queue, user_id=user_id, rq_id=rq_id) queue.enqueue( self._merge, target_type=type(self.db_instance), diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index bbbf3fc441a6..271a10cecec1 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -3,10 +3,7 @@ # SPDX-License-Identifier: MIT import textwrap -from datetime import datetime -from django.http import HttpResponseNotFound -from django.utils import timezone from drf_spectacular.utils import ( OpenApiParameter, OpenApiResponse, @@ -14,24 +11,21 @@ extend_schema, extend_schema_view, ) -from rest_framework import mixins, status, viewsets -from rest_framework.exceptions import NotFound, ValidationError -from rest_framework.response import Response -from rq.job import JobStatus as RqJobStatus +from rest_framework import mixins, viewsets +from rest_framework.exceptions import NotFound from cvat.apps.consensus import merging_manager as merging from cvat.apps.consensus.models import ConsensusSettings -from cvat.apps.consensus.permissions import ConsensusMergePermission, ConsensusSettingPermission +from cvat.apps.consensus.permissions import ConsensusSettingPermission from cvat.apps.consensus.serializers import ( ConsensusMergeCreateSerializer, ConsensusSettingsSerializer, ) from cvat.apps.engine.mixins import PartialUpdateModelMixin from cvat.apps.engine.models import Job, Task -from cvat.apps.engine.rq import BaseRQMeta from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import process_failed_job +from django.http import HttpResponseGone @extend_schema(tags=["consensus"]) @@ -41,28 +35,14 @@ class ConsensusMergesViewSet(viewsets.GenericViewSet): @extend_schema( operation_id="consensus_create_merge", summary="Create a consensus merge", - parameters=[ - OpenApiParameter( - CREATE_MERGE_RQ_ID_PARAMETER, - type=str, - description=textwrap.dedent( - """\ - The consensus merge request id. Can be specified to check operation status. - """ - ), - ) - ], - request=ConsensusMergeCreateSerializer(required=False), + request=ConsensusMergeCreateSerializer, responses={ - "201": None, "202": OpenApiResponse( RqIdSerializer, description=textwrap.dedent( """\ A consensus merge request has been enqueued, the request id is returned. - The request status can be checked at this endpoint by passing the {} - as the query parameter. If the request id is specified, this response - means the consensus merge request is queued or is being processed. + The request status can be checked by using common requests API: GET /api/requests/rq_id """.format( CREATE_MERGE_RQ_ID_PARAMETER ) @@ -76,77 +56,34 @@ class ConsensusMergesViewSet(viewsets.GenericViewSet): def create(self, request: ExtendedRequest, *args, **kwargs): rq_id = request.query_params.get(self.CREATE_MERGE_RQ_ID_PARAMETER, None) - if rq_id is None: - input_serializer = ConsensusMergeCreateSerializer(data=request.data) - input_serializer.is_valid(raise_exception=True) - - task_id = input_serializer.validated_data.get("task_id", None) - job_id = input_serializer.validated_data.get("job_id", None) - if task_id: - try: - instance = Task.objects.get(pk=task_id) - except Task.DoesNotExist as ex: - raise NotFound(f"Task {task_id} does not exist") from ex - elif job_id: - try: - instance = Job.objects.select_related("segment").get(pk=job_id) - except Job.DoesNotExist as ex: - raise NotFound(f"Jobs {job_id} do not exist") from ex - - manager = merging.MergingManager(instance, request) - return manager.process() - else: - deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) - response_headers = {"Deprecation": f"@{deprecation_timestamp}"} - - serializer = RqIdSerializer(data={"rq_id": rq_id}) - serializer.is_valid(raise_exception=True) - rq_id = serializer.validated_data["rq_id"] - rq_job = merging.MergingManager.get_job_by_id(rq_id) - - if ( - not rq_job - or not ConsensusMergePermission.create_scope_check_status( - request, rq_job_owner_id=BaseRQMeta.for_job(rq_job).user.id + if rq_id: + return HttpResponseGone( + textwrap.dedent( + f"""\ + This endpoint is no longer handles merge status checking. + The common requests API should be used instead: GET /api/requests/rq_id + """ ) - .check_access() - .allow - ): - # We should not provide job existence information to unauthorized users - return HttpResponseNotFound("Unknown request id", headers=response_headers) - - rq_job_status = rq_job.get_status(refresh=False) - if rq_job_status == RqJobStatus.FAILED: - exc_info = process_failed_job(rq_job) - - exc_name_pattern = f"{merging.MergingNotAvailable.__name__}: " - if (exc_pos := exc_info.find(exc_name_pattern)) != -1: - return Response( - data=exc_info[exc_pos + len(exc_name_pattern) :].strip(), - status=status.HTTP_400_BAD_REQUEST, - headers=response_headers, - ) - - return Response( - data=str(exc_info), - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - headers=response_headers, - ) - elif rq_job_status in ( - RqJobStatus.QUEUED, - RqJobStatus.STARTED, - RqJobStatus.SCHEDULED, - RqJobStatus.DEFERRED, - ): - return Response( - serializer.data, status=status.HTTP_202_ACCEPTED, headers=response_headers - ) - elif rq_job_status == RqJobStatus.FINISHED: - rq_job.delete() - return Response(status=status.HTTP_201_CREATED, headers=response_headers) - - raise AssertionError(f"Unexpected rq job '{rq_id}' status '{rq_job_status}'") + ) + input_serializer = ConsensusMergeCreateSerializer(data=request.data) + input_serializer.is_valid(raise_exception=True) + + task_id = input_serializer.validated_data.get("task_id", None) + job_id = input_serializer.validated_data.get("job_id", None) + if task_id: + try: + instance = Task.objects.get(pk=task_id) + except Task.DoesNotExist as ex: + raise NotFound(f"Task {task_id} does not exist") from ex + elif job_id: + try: + instance = Job.objects.select_related("segment").get(pk=job_id) + except Job.DoesNotExist as ex: + raise NotFound(f"Jobs {job_id} do not exist") from ex + + manager = merging.MergingManager(instance, request) + return manager.process() @extend_schema(tags=["consensus"]) @extend_schema_view( diff --git a/cvat/schema.yml b/cvat/schema.yml index 8ce236334bb8..d6b170865ba5 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -964,13 +964,6 @@ paths: post: operationId: consensus_create_merge summary: Create a consensus merge - parameters: - - in: query - name: rq_id - schema: - type: string - description: | - The consensus merge request id. Can be specified to check operation status. tags: - consensus requestBody: @@ -985,8 +978,6 @@ paths: - signatureAuth: [] - basicAuth: [] responses: - '201': - description: No response body '202': content: application/vnd.cvat+json: @@ -994,9 +985,7 @@ paths: $ref: '#/components/schemas/RqId' description: | A consensus merge request has been enqueued, the request id is returned. - The request status can be checked at this endpoint by passing the rq_id - as the query parameter. If the request id is specified, this response - means the consensus merge request is queued or is being processed. + The request status can be checked by using common requests API: GET /api/requests/rq_id '400': description: Invalid or failed request, check the response data for details /api/consensus/settings: @@ -4239,6 +4228,8 @@ paths: description: '' post: operationId: quality_create_report + description: 'Deprecation warning: Do not use this endpoint ot check the report + computation status' summary: Create a quality report parameters: - in: query @@ -4248,6 +4239,7 @@ paths: description: | The report creation request id. Can be specified to check the report creation status. + deprecated: true tags: - quality requestBody: @@ -4487,12 +4479,7 @@ paths: Details about the syntax used can be found at the link: https://jsonlogic.com/ - Available filter_fields: ['status', 'project_id', 'task_id', 'job_id', 'action', 'target', 'subresource', 'format']. - schema: - type: string - - name: format - in: query - description: A simple equality filter for the format field + Available filter_fields: ['status', 'project_id', 'task_id', 'job_id', 'action', 'target']. schema: type: string - name: job_id @@ -4539,15 +4526,6 @@ paths: - started - failed - finished - - name: subresource - in: query - description: A simple equality filter for the subresource field - schema: - type: string - enum: - - annotations - - dataset - - backup - name: target in: query description: A simple equality filter for the target field From 792295fb9f2a8ececde2cae8cd818f7601ac1452 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 19 Mar 2025 11:48:09 +0100 Subject: [PATCH 007/103] Fix permissions --- cvat/apps/redis_handler/permissions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index a717f2a2949f..c164d9ca6a22 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -53,7 +53,7 @@ def create( and parsed_rq_id.target == RequestTarget.TASK ): permissions.append( - TaskPermission.create_scope_view(request, parsed_rq_id.id, iam_context) + TaskPermission.create_scope_view(request, parsed_rq_id.id) ) continue @@ -62,7 +62,7 @@ def create( and parsed_rq_id.target == RequestTarget.JOB ): permissions.append( - JobPermission.create_scope_view(request, parsed_rq_id.id, iam_context) + JobPermission.create_scope_view(request, parsed_rq_id.id) ) continue From d11b73c30f1822f11feff54dfacc9d08408d3c45 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 19 Mar 2025 11:49:23 +0100 Subject: [PATCH 008/103] Define parsed job id class based on queue config --- cvat/apps/redis_handler/rq.py | 37 +++++++++++++++++++++++--------- cvat/apps/redis_handler/views.py | 30 ++++++++++++++++++++------ cvat/settings/base.py | 3 +++ 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 8b8c7e1ab478..b7d09f000fd4 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -1,14 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar +from typing import ClassVar, Any from uuid import UUID import attrs -from cvat.apps.engine.types import ExtendedRequest - -if TYPE_CHECKING: - from django.contrib.auth.models import User import base64 @@ -27,6 +23,15 @@ def convert_id(value: int | str | UUID) -> int | UUID: return UUID(value) +def convert_extra(value: dict) -> dict[str, Any]: + assert isinstance(value, dict), f"Unexpected type: {type(value)}" + for k, v in value.items(): + if not isinstance(v, str): + value[k] = str(v) + + return value + + @attrs.frozen(kw_only=True) class RQId: FIELD_SEP: ClassVar[str] = "&" @@ -41,22 +46,34 @@ class RQId: ) # todo: dot access - extra: dict | None = attrs.field(default=None) + extra: dict[str, Any] = attrs.field(converter=convert_extra, factory=dict) @property def type(self) -> str: return ":".join([self.action, self.target]) + @classmethod + def from_base(cls, parsed_id: RQId, /): + # method is going to be used by child classes + return cls( + queue=parsed_id.queue, + action=parsed_id.action, + target=parsed_id.target, + id=parsed_id.id, + extra=parsed_id.extra, + ) + def render(self) -> str: + # TODO: add queue name indirectly bytes = self.FIELD_SEP.join( [ self.KEY_VAL_SEP.join([k, v]) for k, v in { "queue": self.queue.value, - "action": self.action, - "target": self.target, + "action": str(self.action), + "target": str(self.target), "id": str(self.id), - **(self.extra or {}), + **self.extra, }.items() ] ).encode() @@ -65,7 +82,7 @@ def render(self) -> str: # TODO: handle exceptions @classmethod - def parse(cls, rq_id: str, /) -> RQId: + def parse(cls, rq_id: str, /): decoded_rq_id = base64.b64decode(rq_id).decode() keys = set(attrs.fields_dict(cls).keys()) - {"extra"} diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 1277dd8d392b..83f45b9e05d4 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -1,7 +1,6 @@ import functools from collections import namedtuple from collections.abc import Iterable -from typing import Optional import django_rq from django.conf import settings @@ -24,11 +23,12 @@ NonModelSimpleFilter, ) from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import RequestAction, RequestStatus, RequestSubresource, RequestTarget +from cvat.apps.engine.models import RequestAction, RequestStatus, RequestTarget from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.serializers import RequestSerializer from cvat.apps.engine.types import ExtendedRequest from cvat.apps.redis_handler.rq import RQId +from django.utils.module_loading import import_string slogger = ServerLogManager(__name__) @@ -55,6 +55,11 @@ class RequestViewSet(viewsets.GenericViewSet): for queue_name, queue_conf in settings.RQ_QUEUES.items() if queue_conf.get("VISIBLE_VIA_REQUESTS_API") } + PARSED_JOB_ID_CLASSES = { + queue_name: import_string(settings.RQ_QUEUES[queue_name]["PARSED_JOB_ID_CLASS"]) + for queue_name in SUPPORTED_QUEUES + if "PARSED_JOB_ID_CLASS" in settings.RQ_QUEUES[queue_name] + } serializer_class = RequestSerializer iam_organization_field = None @@ -118,6 +123,10 @@ def get_queryset(self): def queues(self) -> Iterable[DjangoRQ]: return (django_rq.get_queue(queue_name) for queue_name in self.SUPPORTED_QUEUES) + @classmethod + def get_parsed_id_class(cls, queue_name: str) -> type[RQId]: + return cls.PARSED_JOB_ID_CLASSES.get(queue_name, RQId) + def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: job_ids = set( queue.get_job_ids() @@ -127,13 +136,18 @@ def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: + queue.deferred_job_registry.get_job_ids() ) jobs = [] + + ParsedIdClass = self.get_parsed_id_class(queue.name) + for job in queue.job_class.fetch_many(job_ids, queue.connection): # TODO: move filtration by owner? if job and is_rq_job_owner(job, user_id): try: - parsed_rq_id = RQId.parse(job.id) + parsed_rq_id = ParsedIdClass.parse(job.id) except Exception: # nosec B112 continue + + # todo: fix type annotation job.parsed_rq_id = parsed_rq_id jobs.append(job) @@ -157,7 +171,7 @@ def _get_rq_jobs(self, user_id: int) -> list[RQJob]: return all_jobs - def _get_rq_job_by_id(self, rq_id: str) -> Optional[RQJob]: + def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: """ Get a RQJob by its ID from the queues. @@ -169,10 +183,10 @@ def _get_rq_job_by_id(self, rq_id: str) -> Optional[RQJob]: """ try: parsed_rq_id = RQId.parse(rq_id) - except Exception as ex: + except Exception: return None - job: Optional[RQJob] = None + job: RQJob | None = None if parsed_rq_id.queue.value not in self.SUPPORTED_QUEUES: raise ValidationError("Unsupported queue") @@ -181,6 +195,10 @@ def _get_rq_job_by_id(self, rq_id: str) -> Optional[RQJob]: job = queue.fetch_job(rq_id) if job: + ParsedIdClass = self.get_parsed_id_class(queue.name) + if type(parsed_rq_id) is not ParsedIdClass: + parsed_rq_id = ParsedIdClass.from_base(parsed_rq_id) + job.parsed_rq_id = parsed_rq_id return job diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 35e914523d62..f9a5d2eb3e69 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -295,12 +295,15 @@ class CVAT_QUEUES(Enum): CVAT_QUEUES.IMPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", + # custom fields "VISIBLE_VIA_REQUESTS_API": True, + "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ImportRQId", }, CVAT_QUEUES.EXPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", "VISIBLE_VIA_REQUESTS_API": True, + "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ExportRQId", }, CVAT_QUEUES.AUTO_ANNOTATION.value: { **REDIS_INMEM_SETTINGS, From 6e4583752e2f34dd612f534abcceb182fb087003 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 19 Mar 2025 11:52:08 +0100 Subject: [PATCH 009/103] Fix && update consensus REST API tests --- tests/python/rest_api/test_consensus.py | 156 +++++++++++++++++------- tests/python/rest_api/utils.py | 32 +++-- tests/python/shared/fixtures/data.py | 2 + 3 files changed, 139 insertions(+), 51 deletions(-) diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index 2294990ee601..a16f523e13f6 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -6,6 +6,7 @@ from copy import deepcopy from functools import partial from http import HTTPStatus +from itertools import product from typing import Any, Dict, Optional, Tuple import pytest @@ -14,10 +15,9 @@ from cvat_sdk.api_client.api_client import ApiClient, Endpoint from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff +from shared.utils.config import USER_PASS, make_api_client -from shared.utils.config import make_api_client - -from .utils import CollectionSimpleFilterTestBase, compare_annotations +from .utils import CollectionSimpleFilterTestBase, compare_annotations, wait_background_request class _PermissionTestBase: @@ -49,16 +49,13 @@ def merge( return response assert response.status == HTTPStatus.ACCEPTED - rq_id = json.loads(response.data)["rq_id"] - - while wait_result: - (_, response) = api_client.consensus_api.create_merge( - rq_id=rq_id, _parse_response=False + if wait_result: + rq_id = json.loads(response.data)["rq_id"] + background_request, _ = wait_background_request(api_client, rq_id) + assert ( + background_request.status.value + == models.RequestStatus.allowed_values[("value",)]["FINISHED"] ) - assert response.status in [HTTPStatus.CREATED, HTTPStatus.ACCEPTED] - - if response.status == HTTPStatus.CREATED: - break return response @@ -194,14 +191,14 @@ class TestPostConsensusMerge(_PermissionTestBase): def test_can_merge_task_with_consensus_jobs(self, admin_user, tasks): task_id = next(t["id"] for t in tasks if t["consensus_enabled"]) - assert self.merge(user=admin_user, task_id=task_id).status == HTTPStatus.CREATED + assert self.merge(user=admin_user, task_id=task_id) def test_can_merge_consensus_job(self, admin_user, jobs): job_id = next( j["id"] for j in jobs if j["type"] == "annotation" and j["consensus_replicas"] > 0 ) - assert self.merge(user=admin_user, job_id=job_id).status == HTTPStatus.CREATED + assert self.merge(user=admin_user, job_id=job_id) def test_cannot_merge_task_without_consensus_jobs(self, admin_user, tasks): task_id = next(t["id"] for t in tasks if not t["consensus_enabled"]) @@ -280,28 +277,25 @@ def test_user_merge_in_org_task( else: self._test_merge_403(user["username"], task_id=task["id"]) - # only rq job owner or admin now has the right to check status of report creation - def _test_check_merge_status_by_non_rq_job_owner( + # users with task:view rights can check status of report creation + def _test_check_merge_status( self, rq_id: str, *, staff_user: str, other_user: str, + other_user_status: HTTPStatus = HTTPStatus.FORBIDDEN, ): with make_api_client(other_user) as api_client: - (_, response) = api_client.consensus_api.create_merge( - rq_id=rq_id, _parse_response=False, _check_status=False + (_, response) = api_client.requests_api.retrieve( + rq_id, _parse_response=False, _check_status=False ) - assert response.status == HTTPStatus.NOT_FOUND - assert json.loads(response.data)["detail"] == "Unknown request id" + assert response.status == other_user_status with make_api_client(staff_user) as api_client: - (_, response) = api_client.consensus_api.create_merge( - rq_id=rq_id, _parse_response=False, _check_status=False - ) - assert response.status in {HTTPStatus.ACCEPTED, HTTPStatus.CREATED} + wait_background_request(api_client, rq_id) - def test_non_rq_job_owner_cannot_check_status_of_merge_in_sandbox( + def test_user_without_rights_cannot_check_status_of_merge_in_sandbox( self, find_sandbox_task_with_consensus, users, @@ -315,37 +309,120 @@ def test_non_rq_job_owner_cannot_check_status_of_merge_in_sandbox( u["id"] != task_staff["id"] and not u["is_superuser"] and u["id"] != task["owner"]["id"] + and u["id"] != (task["assignee"] or {}).get("id") ) ) rq_id = self.request_merge(task_id=task["id"], user=task_staff["username"]) - self._test_check_merge_status_by_non_rq_job_owner( + self._test_check_merge_status( rq_id, staff_user=task_staff["username"], other_user=other_user["username"] ) - @pytest.mark.parametrize("role", _PermissionTestBase._default_org_roles) - def test_non_rq_job_owner_cannot_check_status_of_merge_in_org( + @pytest.mark.parametrize( + "same_org, role", + [ + pair + for pair in product([True, False], _PermissionTestBase._default_org_roles) + if not (pair[0] and pair[1] in ["owner", "maintainer"]) + ], + ) + def test_user_without_rights_cannot_check_status_of_merge_in_org( self, find_org_task_with_consensus, find_users, + same_org: bool, role: str, + admin_user, + organizations, ): task, task_staff = find_org_task_with_consensus(is_staff=True, user_org_role="supervisor") - other_user = next( - u - for u in find_users(role=role, org=task["organization"]) - if ( - u["id"] != task_staff["id"] - and not u["is_superuser"] - and u["id"] != task["owner"]["id"] + org_filter = "org" + if not same_org: + org_filter = "exclude_" + org_filter + + try: + other_user = next( + u + for u in find_users( + role=role, **{org_filter: task["organization"]}, exclude_is_superuser=True + ) + if ( + u["id"] != task_staff["id"] + and u["id"] != task["owner"]["id"] + and u["id"] != (task["assignee"] or {}).get("id") + ) ) - ) + except StopIteration: + # create a new user that passes the requirements + with make_api_client(admin_user) as api_client: + user_name = f"{same_org}{role}" + other_user, _ = api_client.auth_api.create_register( + models.RegisterSerializerExRequest( + username=user_name, + password1=USER_PASS, + password2=USER_PASS, + email=f"{user_name}@email.com", + ) + ) + + org_id = ( + task["organization"] + if same_org + else next(o for o in organizations if o["id"] != task["organization"])["id"] + ) + + # looks like a bug in SDK, second post request fails with CSRF issue when the same api_client is used + with make_api_client(admin_user) as api_client: + api_client.invitations_api.create( + models.InvitationWriteRequest( + role=role, + email=other_user["email"], + ), + org_id=org_id, + ) + rq_id = self.request_merge(task_id=task["id"], user=task_staff["username"]) - self._test_check_merge_status_by_non_rq_job_owner( + self._test_check_merge_status( rq_id, staff_user=task_staff["username"], other_user=other_user["username"] ) + @pytest.mark.parametrize( + "role", + # owner and maintainer has rights even without being assigned to a task + ("supervisor", "worker"), + ) + def test_task_assignee_can_check_status_of_merge_in_org( + self, + find_org_task_with_consensus, + role: str, + ): + task, other_user = find_org_task_with_consensus(is_staff=False, user_org_role=role) + task_owner = task["owner"] + + rq_id = self.request_merge(task_id=task["id"], user=task_owner["username"]) + self._test_check_merge_status( + rq_id, + staff_user=task_owner["username"], + other_user=other_user["username"], + other_user_status=HTTPStatus.FORBIDDEN, + ) + + with make_api_client(task_owner["username"]) as api_client: + api_client.tasks_api.partial_update( + task["id"], + patched_task_write_request=models.PatchedTaskWriteRequest( + assignee_id=other_user["id"] + ), + ) + + self._test_check_merge_status( + rq_id, + staff_user=task_owner["username"], + other_user=other_user["username"], + other_user_status=HTTPStatus.OK, + ) + @pytest.mark.parametrize("is_sandbox", (True, False)) def test_admin_can_check_status_of_merge( self, @@ -373,10 +450,7 @@ def test_admin_can_check_status_of_merge( rq_id = self.request_merge(task_id=task["id"], user=task_staff["username"]) with make_api_client(admin["username"]) as api_client: - (_, response) = api_client.consensus_api.create_merge( - rq_id=rq_id, _parse_response=False - ) - assert response.status in {HTTPStatus.ACCEPTED, HTTPStatus.CREATED} + wait_background_request(api_client, rq_id) class TestSimpleConsensusSettingsFilters(CollectionSimpleFilterTestBase): diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 460695f8a887..e9475bdb9dde 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -19,8 +19,8 @@ from cvat_sdk.api_client.exceptions import ForbiddenException from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff - from shared.utils.config import make_api_client +from urllib3 import HTTPResponse def initialize_export(endpoint: Endpoint, *, expect_forbidden: bool = False, **kwargs) -> str: @@ -41,14 +41,13 @@ def initialize_export(endpoint: Endpoint, *, expect_forbidden: bool = False, **k return rq_id -def wait_and_download_v2( +def wait_background_request( api_client: ApiClient, rq_id: str, *, max_retries: int = 50, interval: float = 0.1, - download_result: bool = True, -) -> Optional[bytes]: +) -> tuple[models.Request, HTTPResponse]: for _ in range(max_retries): (background_request, response) = api_client.requests_api.retrieve(rq_id) assert response.status == HTTPStatus.OK @@ -56,13 +55,26 @@ def wait_and_download_v2( background_request.status.value == models.RequestStatus.allowed_values[("value",)]["FINISHED"] ): - break + return background_request, response sleep(interval) - else: - assert False, ( - f"Export process was not finished within allowed time ({interval * max_retries}, sec). " - + f"Last status was: {background_request.status.value}" - ) + + assert False, ( + f"Export process was not finished within allowed time ({interval * max_retries}, sec). " + + f"Last status was: {background_request.status.value}" + ) + + +def wait_and_download_v2( + api_client: ApiClient, + rq_id: str, + *, + max_retries: int = 50, + interval: float = 0.1, + download_result: bool = True, +) -> Optional[bytes]: + background_request, _ = wait_background_request( + api_client, rq_id, max_retries=max_retries, interval=interval + ) if not download_result: return None diff --git a/tests/python/shared/fixtures/data.py b/tests/python/shared/fixtures/data.py index eb8f7393cd8c..eb4f293abc3f 100644 --- a/tests/python/shared/fixtures/data.py +++ b/tests/python/shared/fixtures/data.py @@ -394,6 +394,7 @@ def add_row(**kwargs): id=user["id"], privilege=group, has_analytics_access=user["has_analytics_access"], + is_superuser=user["is_superuser"] ) for membership in memberships: @@ -407,6 +408,7 @@ def add_row(**kwargs): org=membership["organization"], membership_id=membership["id"], has_analytics_access=users_by_name[username]["has_analytics_access"], + is_superuser=users_by_name[username]["is_superuser"] ) return data From e8d53fabc217deaa3aee63cb1062232bccbc71df Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 19 Mar 2025 14:23:00 +0100 Subject: [PATCH 010/103] Update quality reports REST API tests --- cvat/apps/engine/serializers.py | 3 + cvat/apps/quality_control/quality_reports.py | 5 +- cvat/apps/quality_control/views.py | 4 +- tests/python/rest_api/test_consensus.py | 31 ++-- tests/python/rest_api/test_quality_control.py | 147 ++++++++++++++---- tests/python/shared/fixtures/data.py | 4 +- 6 files changed, 139 insertions(+), 55 deletions(-) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 96fbaaa55beb..2fd0ce58ba55 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -3605,12 +3605,15 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: representation["status"] = RQJobStatus.QUEUED if representation["status"] == RQJobStatus.FINISHED: + + # TODO: move into a custom Job class if rq_job.parsed_rq_id.action == models.RequestAction.EXPORT: representation["result_url"] = ExportRQMeta.for_job(rq_job).result_url if ( rq_job.parsed_rq_id.action == models.RequestAction.IMPORT and rq_job.parsed_rq_id.subresource == models.RequestSubresource.BACKUP + or rq_job.parsed_rq_id.queue == settings.CVAT_QUEUES.QUALITY_REPORTS ): representation["result_id"] = rq_job.return_value() diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index baaa6ab57054..7858e74e4254 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2265,7 +2265,7 @@ def generate_report(self) -> ComparisonReport: ) -class QualityReportUpdateManager(AbstractRQJobManager): +class QualityReportRQJobManager(AbstractRQJobManager): _JOB_RESULT_TTL = 120 _JOB_FAILURE_TTL = _JOB_RESULT_TTL @@ -2300,7 +2300,7 @@ def setup_background_job(self, queue: RqQueue, rq_id: str) -> None: with get_rq_lock_by_user(queue, user_id=user_id): dependency = define_dependent_job(queue, user_id=user_id, rq_id=rq_id) queue.enqueue( - self._check_task_quality, + QualityReportUpdateManager._check_task_quality, task_id=self.db_instance.pk, job_id=rq_id, meta=BaseRQMeta.build(request=self.request, db_obj=self.db_instance), @@ -2309,6 +2309,7 @@ def setup_background_job(self, queue: RqQueue, rq_id: str) -> None: depends_on=dependency, ) +class QualityReportUpdateManager: @classmethod @silk_profile() def _check_task_quality(cls, *, task_id: int) -> int: diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 71cd22eb14be..e8e5b6172294 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -277,7 +277,7 @@ def create(self, request, *args, **kwargs): except Task.DoesNotExist as ex: raise NotFound(f"Task {task_id} does not exist") from ex - manager = qc.QualityReportUpdateManager(task, request) + manager = qc.QualityReportRQJobManager(task, request) return manager.process() else: @@ -286,7 +286,7 @@ def create(self, request, *args, **kwargs): serializer = RqIdSerializer(data={"rq_id": rq_id}) serializer.is_valid(raise_exception=True) rq_id = serializer.validated_data["rq_id"] - rq_job = qc.QualityReportUpdateManager.get_job_by_id(rq_id) + rq_job = qc.QualityReportRQJobManager.get_job_by_id(rq_id) # FUTURE-TODO: move into permissions # and allow not only rq job owner to check the status diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index a16f523e13f6..9f504e559f90 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -283,14 +283,14 @@ def _test_check_merge_status( rq_id: str, *, staff_user: str, - other_user: str, - other_user_status: HTTPStatus = HTTPStatus.FORBIDDEN, + another_user: str, + another_user_status: HTTPStatus = HTTPStatus.FORBIDDEN, ): - with make_api_client(other_user) as api_client: + with make_api_client(another_user) as api_client: (_, response) = api_client.requests_api.retrieve( rq_id, _parse_response=False, _check_status=False ) - assert response.status == other_user_status + assert response.status == another_user_status with make_api_client(staff_user) as api_client: wait_background_request(api_client, rq_id) @@ -302,7 +302,7 @@ def test_user_without_rights_cannot_check_status_of_merge_in_sandbox( ): task, task_staff = find_sandbox_task_with_consensus(is_staff=True) - other_user = next( + another_user = next( u for u in users if ( @@ -315,7 +315,7 @@ def test_user_without_rights_cannot_check_status_of_merge_in_sandbox( rq_id = self.request_merge(task_id=task["id"], user=task_staff["username"]) self._test_check_merge_status( - rq_id, staff_user=task_staff["username"], other_user=other_user["username"] + rq_id, staff_user=task_staff["username"], another_user=another_user["username"] ) @pytest.mark.parametrize( @@ -342,7 +342,7 @@ def test_user_without_rights_cannot_check_status_of_merge_in_org( org_filter = "exclude_" + org_filter try: - other_user = next( + another_user = next( u for u in find_users( role=role, **{org_filter: task["organization"]}, exclude_is_superuser=True @@ -357,7 +357,7 @@ def test_user_without_rights_cannot_check_status_of_merge_in_org( # create a new user that passes the requirements with make_api_client(admin_user) as api_client: user_name = f"{same_org}{role}" - other_user, _ = api_client.auth_api.create_register( + another_user, _ = api_client.auth_api.create_register( models.RegisterSerializerExRequest( username=user_name, password1=USER_PASS, @@ -377,14 +377,14 @@ def test_user_without_rights_cannot_check_status_of_merge_in_org( api_client.invitations_api.create( models.InvitationWriteRequest( role=role, - email=other_user["email"], + email=another_user["email"], ), org_id=org_id, ) rq_id = self.request_merge(task_id=task["id"], user=task_staff["username"]) self._test_check_merge_status( - rq_id, staff_user=task_staff["username"], other_user=other_user["username"] + rq_id, staff_user=task_staff["username"], another_user=another_user["username"] ) @pytest.mark.parametrize( @@ -397,30 +397,29 @@ def test_task_assignee_can_check_status_of_merge_in_org( find_org_task_with_consensus, role: str, ): - task, other_user = find_org_task_with_consensus(is_staff=False, user_org_role=role) + task, another_user = find_org_task_with_consensus(is_staff=False, user_org_role=role) task_owner = task["owner"] rq_id = self.request_merge(task_id=task["id"], user=task_owner["username"]) self._test_check_merge_status( rq_id, staff_user=task_owner["username"], - other_user=other_user["username"], - other_user_status=HTTPStatus.FORBIDDEN, + another_user=another_user["username"], ) with make_api_client(task_owner["username"]) as api_client: api_client.tasks_api.partial_update( task["id"], patched_task_write_request=models.PatchedTaskWriteRequest( - assignee_id=other_user["id"] + assignee_id=another_user["id"] ), ) self._test_check_merge_status( rq_id, staff_user=task_owner["username"], - other_user=other_user["username"], - other_user_status=HTTPStatus.OK, + another_user=another_user["username"], + another_user_status=HTTPStatus.OK, ) @pytest.mark.parametrize("is_sandbox", (True, False)) diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py index aab71846c7f0..04178fdcb2fd 100644 --- a/tests/python/rest_api/test_quality_control.py +++ b/tests/python/rest_api/test_quality_control.py @@ -16,9 +16,10 @@ from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff -from shared.utils.config import make_api_client +from shared.utils.config import make_api_client, USER_PASS +from itertools import product -from .utils import CollectionSimpleFilterTestBase, parse_frame_step +from .utils import CollectionSimpleFilterTestBase, parse_frame_step, wait_background_request class _PermissionTestBase: @@ -31,14 +32,14 @@ def create_quality_report(self, user: str, task_id: int): assert response.status == HTTPStatus.ACCEPTED rq_id = json.loads(response.data)["rq_id"] - while True: - (_, response) = api_client.quality_api.create_report( - rq_id=rq_id, _parse_response=False - ) - assert response.status in [HTTPStatus.CREATED, HTTPStatus.ACCEPTED] + background_request, _ = wait_background_request(api_client, rq_id) + assert ( + background_request.status.value + == models.RequestStatus.allowed_values[("value",)]["FINISHED"] + ) + report_id = background_request.result_id - if response.status == HTTPStatus.CREATED: - break + _, response = api_client.quality_api.retrieve_report(report_id, _parse_response=False) return json.loads(response.data) @@ -169,6 +170,7 @@ def find_org_task_without_gt(self, find_org_task): ("worker", False, False), ], ) + _default_org_roles = ("owner", "maintainer", "supervisor", "worker") @pytest.mark.usefixtures("restore_db_per_class") @@ -581,28 +583,63 @@ def _initialize_report_creation(task_id: int, user: str) -> str: return rq_id - # only rq job owner or admin now has the right to check status of report creation - def _test_check_status_of_report_creation_by_non_rq_job_owner( + # users with task:view rights can check status of report creation + def _test_check_status_of_report_creation( self, rq_id: str, *, task_staff: str, another_user: str, + another_user_status: HTTPStatus = HTTPStatus.FORBIDDEN, ): with make_api_client(another_user) as api_client: - (_, response) = api_client.quality_api.create_report( - rq_id=rq_id, _parse_response=False, _check_status=False + (_, response) = api_client.requests_api.retrieve( + rq_id, _parse_response=False, _check_status=False ) - assert response.status == HTTPStatus.NOT_FOUND - assert json.loads(response.data)["detail"] == "Unknown request id" + assert response.status == another_user_status with make_api_client(task_staff) as api_client: - (_, response) = api_client.quality_api.create_report( - rq_id=rq_id, _parse_response=False, _check_status=False + wait_background_request(api_client, rq_id) + + @pytest.mark.parametrize( + "role", + # owner and maintainer has rights even without being assigned to a task + ("supervisor", "worker"), + ) + def test_task_assignee_can_check_status_of_report_creation_in_org( + self, + find_org_task_without_gt: Callable[[bool, str], tuple[dict[str, Any], dict[str, Any]]], + role: str, + admin_user: str, + ): + task, another_user = find_org_task_without_gt(is_staff=False, user_org_role=role) + self.create_gt_job(admin_user, task["id"]) + + task_owner = task["owner"] + + rq_id = self._initialize_report_creation(task_id=task["id"], user=task_owner["username"]) + self._test_check_status_of_report_creation( + rq_id, + task_staff=task_owner["username"], + another_user=another_user["username"], + ) + + with make_api_client(task_owner["username"]) as api_client: + api_client.tasks_api.partial_update( + task["id"], + patched_task_write_request=models.PatchedTaskWriteRequest( + assignee_id=another_user["id"] + ), ) - assert response.status in {HTTPStatus.ACCEPTED, HTTPStatus.CREATED} - def test_non_rq_job_owner_cannot_check_status_of_report_creation_in_sandbox( + self._test_check_status_of_report_creation( + rq_id, + task_staff=task_owner["username"], + another_user=another_user["username"], + another_user_status=HTTPStatus.OK, + ) + + def test_user_without_rights_cannot_check_status_of_report_creation_in_sandbox( self, find_sandbox_task_without_gt: Callable[[bool], tuple[dict[str, Any], dict[str, Any]]], admin_user: str, @@ -619,36 +656,81 @@ def test_non_rq_job_owner_cannot_check_status_of_report_creation_in_sandbox( u["id"] != task_staff["id"] and not u["is_superuser"] and u["id"] != task["owner"]["id"] + and u["id"] != (task["assignee"] or {}).get("id") ) ) rq_id = self._initialize_report_creation(task["id"], task_staff["username"]) - self._test_check_status_of_report_creation_by_non_rq_job_owner( + self._test_check_status_of_report_creation( rq_id, task_staff=task_staff["username"], another_user=another_user["username"] ) - @pytest.mark.parametrize("role", ("owner", "maintainer", "supervisor", "worker")) - def test_non_rq_job_owner_cannot_check_status_of_report_creation_in_org( + @pytest.mark.parametrize( + "same_org, role", + [ + pair + for pair in product([True, False], _PermissionTestBase._default_org_roles) + if not (pair[0] and pair[1] in ["owner", "maintainer"]) + ], + ) + def test_user_without_rights_cannot_check_status_of_report_creation_in_org( self, + same_org: bool, role: str, admin_user: str, find_org_task_without_gt: Callable[[bool, str], tuple[dict[str, Any], dict[str, Any]]], find_users: Callable[..., list[dict[str, Any]]], + organizations, ): task, task_staff = find_org_task_without_gt(is_staff=True, user_org_role="supervisor") self.create_gt_job(admin_user, task["id"]) - another_user = next( - u - for u in find_users(role=role, org=task["organization"]) - if ( - u["id"] != task_staff["id"] - and not u["is_superuser"] - and u["id"] != task["owner"]["id"] + org_filter = "org" + if not same_org: + org_filter = "exclude_" + org_filter + + try: + another_user = next( + u + for u in find_users( + role=role, exclude_is_superuser=True, **{org_filter: task["organization"]} + ) + if ( + u["id"] != task_staff["id"] + and u["id"] != task["owner"]["id"] + and u["id"] != (task["assignee"] or {}).get("id") + ) ) - ) + except StopIteration: + # create a new user that passes the requirements + with make_api_client(admin_user) as api_client: + user_name = f"{same_org}{role}" + another_user, _ = api_client.auth_api.create_register( + models.RegisterSerializerExRequest( + username=user_name, + password1=USER_PASS, + password2=USER_PASS, + email=f"{user_name}@email.com", + ) + ) + + org_id = ( + task["organization"] + if same_org + else next(o for o in organizations if o["id"] != task["organization"])["id"] + ) + + # looks like a bug in SDK, second post request fails with CSRF issue when the same api_client is used + with make_api_client(admin_user) as api_client: + api_client.invitations_api.create( + models.InvitationWriteRequest( + role=role, + email=another_user["email"], + ), + org_id=org_id, + ) rq_id = self._initialize_report_creation(task["id"], task_staff["username"]) - self._test_check_status_of_report_creation_by_non_rq_job_owner( + self._test_check_status_of_report_creation( rq_id, task_staff=task_staff["username"], another_user=another_user["username"] ) @@ -682,8 +764,7 @@ def test_admin_can_check_status_of_report_creation( rq_id = self._initialize_report_creation(task["id"], task_staff["username"]) with make_api_client(admin["username"]) as api_client: - (_, response) = api_client.quality_api.create_report(rq_id=rq_id, _parse_response=False) - assert response.status in {HTTPStatus.ACCEPTED, HTTPStatus.CREATED} + wait_background_request(api_client, rq_id) class TestSimpleQualityReportsFilters(CollectionSimpleFilterTestBase): diff --git a/tests/python/shared/fixtures/data.py b/tests/python/shared/fixtures/data.py index eb4f293abc3f..544ed74a14ff 100644 --- a/tests/python/shared/fixtures/data.py +++ b/tests/python/shared/fixtures/data.py @@ -394,7 +394,7 @@ def add_row(**kwargs): id=user["id"], privilege=group, has_analytics_access=user["has_analytics_access"], - is_superuser=user["is_superuser"] + is_superuser=user["is_superuser"], ) for membership in memberships: @@ -408,7 +408,7 @@ def add_row(**kwargs): org=membership["organization"], membership_id=membership["id"], has_analytics_access=users_by_name[username]["has_analytics_access"], - is_superuser=users_by_name[username]["is_superuser"] + is_superuser=users_by_name[username]["is_superuser"], ) return data From 50d3df59bf7e8abc5b42683056882ea269bd0887 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 24 Mar 2025 23:39:04 +0100 Subject: [PATCH 011/103] Refactor && fix some bugs --- cvat/apps/consensus/merging_manager.py | 63 +- cvat/apps/consensus/views.py | 9 +- cvat/apps/dataset_manager/project.py | 3 + cvat/apps/dataset_manager/task.py | 4 +- cvat/apps/dataset_manager/views.py | 5 +- cvat/apps/engine/background.py | 695 ++++++++------- cvat/apps/engine/backup.py | 194 +--- cvat/apps/engine/mixins.py | 74 +- cvat/apps/engine/permissions.py | 6 +- cvat/apps/engine/rq.py | 14 +- cvat/apps/engine/serializers.py | 161 +--- cvat/apps/engine/views.py | 825 +++--------------- cvat/apps/events/export.py | 192 ++-- cvat/apps/lambda_manager/views.py | 6 +- cvat/apps/quality_control/quality_reports.py | 55 +- cvat/apps/quality_control/views.py | 8 +- cvat/apps/redis_handler/background.py | 338 +++++-- cvat/apps/redis_handler/permissions.py | 2 - cvat/apps/redis_handler/rq.py | 33 +- cvat/apps/redis_handler/serializers.py | 176 ++++ cvat/apps/redis_handler/views.py | 20 +- cvat/settings/base.py | 4 +- tests/python/rest_api/test_consensus.py | 2 +- tests/python/rest_api/test_quality_control.py | 2 +- 24 files changed, 1207 insertions(+), 1684 deletions(-) create mode 100644 cvat/apps/redis_handler/serializers.py diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index e23e54c13aef..4bb3371a4a79 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -3,13 +3,14 @@ # SPDX-License-Identifier: MIT import math +from functools import cached_property from typing import Type +import attrs import datumaro as dm from django.conf import settings from django.db import transaction -from django.http import HttpResponseBadRequest -from django_rq.queues import DjangoRQ as RqQueue +from rest_framework.serializers import ValidationError from cvat.apps.consensus.intersect_merge import IntersectMerge from cvat.apps.consensus.models import ConsensusSettings @@ -27,11 +28,12 @@ clear_annotations_in_jobs, ) from cvat.apps.engine.rq import BaseRQMeta, define_dependent_job +from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import get_rq_lock_by_user from cvat.apps.profiler import silk_profile from cvat.apps.quality_control.quality_reports import ComparisonParameters, JobDataProvider -from cvat.apps.redis_handler.background import AbstractRQJobManager -from cvat.apps.redis_handler.rq import RQId +from cvat.apps.redis_handler.background import AbstractRequestManager +from cvat.apps.redis_handler.rq import RequestId class _TaskMerger: @@ -159,29 +161,36 @@ class MergingNotAvailable(Exception): pass -class MergingManager(AbstractRQJobManager): +@attrs.define(kw_only=True) +class MergingManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.CONSENSUS.value SUPPORTED_RESOURCES = {RequestTarget.TASK, RequestTarget.JOB} - _JOB_RESULT_TTL = 300 - _JOB_FAILURE_TTL = _JOB_RESULT_TTL + @property + def job_result_ttl(self): + return 300 - def build_rq_id(self) -> str: - # todo: add redis migration - return RQId( + @property + def job_failed_ttl(self): + return self.job_result_ttl + + def build_request_id(self) -> str: + return RequestId( queue=self.QUEUE_NAME, action="merge", target=self.resource, id=self.db_instance.pk, ).render() - def _split_to_task_and_job(self) -> tuple[Task, Job | None]: - if isinstance(self.db_instance, Job): - return self.db_instance.segment.task, self.db_instance - - return self.db_instance, None + def init_callback_with_params(self): + self.callback = self._merge + self.callback_kwargs = { + "target_type": type(self.db_instance), + "target_id": self.db_instance.pk, + } def validate_request(self): + super().validate_request() # FUTURE-FIXME: check that there is no indirectly dependent RQ jobs: # e.g merge whole task and merge a particular job from the task task, job = self._split_to_task_and_job() @@ -189,23 +198,13 @@ def validate_request(self): try: _TaskMerger(task=task).check_merging_available(parent_job_id=job.pk if job else None) except MergingNotAvailable as ex: - return HttpResponseBadRequest(str(ex)) - - def setup_background_job(self, queue: RqQueue, rq_id: str) -> None: - user_id = self.request.user.id - - with get_rq_lock_by_user(queue, user_id=user_id): - dependency = define_dependent_job(queue, user_id=user_id, rq_id=rq_id) - queue.enqueue( - self._merge, - target_type=type(self.db_instance), - target_id=self.db_instance.pk, - job_id=rq_id, - meta=BaseRQMeta.build(request=self.request, db_obj=self.db_instance), - result_ttl=self._JOB_RESULT_TTL, - failure_ttl=self._JOB_FAILURE_TTL, - depends_on=dependency, - ) + raise ValidationError(str(ex)) from ex + + def _split_to_task_and_job(self) -> tuple[Task, Job | None]: + if isinstance(self.db_instance, Job): + return self.db_instance.segment.task, self.db_instance + + return self.db_instance, None @classmethod @silk_profile() diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index 271a10cecec1..ffcacf3b0201 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -4,6 +4,7 @@ import textwrap +from django.http import HttpResponseGone from drf_spectacular.utils import ( OpenApiParameter, OpenApiResponse, @@ -23,9 +24,8 @@ ) from cvat.apps.engine.mixins import PartialUpdateModelMixin from cvat.apps.engine.models import Job, Task -from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.types import ExtendedRequest -from django.http import HttpResponseGone +from cvat.apps.redis_handler.serializers import RequestIdSerializer @extend_schema(tags=["consensus"]) @@ -38,7 +38,7 @@ class ConsensusMergesViewSet(viewsets.GenericViewSet): request=ConsensusMergeCreateSerializer, responses={ "202": OpenApiResponse( - RqIdSerializer, + RequestIdSerializer, description=textwrap.dedent( """\ A consensus merge request has been enqueued, the request id is returned. @@ -82,9 +82,10 @@ def create(self, request: ExtendedRequest, *args, **kwargs): except Job.DoesNotExist as ex: raise NotFound(f"Jobs {job_id} do not exist") from ex - manager = merging.MergingManager(instance, request) + manager = merging.MergingManager(request=request, db_instance=instance) return manager.process() + @extend_schema(tags=["consensus"]) @extend_schema_view( list=extend_schema( diff --git a/cvat/apps/dataset_manager/project.py b/cvat/apps/dataset_manager/project.py index 014fdcc27239..7d12ff96830b 100644 --- a/cvat/apps/dataset_manager/project.py +++ b/cvat/apps/dataset_manager/project.py @@ -21,6 +21,7 @@ from cvat.apps.engine.rq import ImportRQMeta from cvat.apps.engine.serializers import DataSerializer, TaskWriteSerializer from cvat.apps.engine.task import _create_thread as create_task +from cvat.apps.engine.utils import av_scan_paths from .annotation import AnnotationIR from .bindings import CvatDatasetNotFoundError, CvatImportError, ProjectData, load_dataset_data @@ -203,6 +204,8 @@ def import_dataset_as_project(src_file, project_id, format_name, conv_mask_to_po rq_job_meta.progress = 0. rq_job_meta.save() + av_scan_paths(src_file) + project = ProjectAnnotationAndData(project_id) project.init_from_db() diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 9d9c78a842e1..d23494ff5b3f 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -30,7 +30,7 @@ from cvat.apps.engine.log import DatasetLogManager from cvat.apps.engine.model_utils import add_prefetch_fields, bulk_create, get_cached from cvat.apps.engine.plugins import plugin_decorator -from cvat.apps.engine.utils import take_by +from cvat.apps.engine.utils import av_scan_paths, take_by from cvat.apps.events.handlers import handle_annotations_change from cvat.apps.profiler import silk_profile @@ -1127,6 +1127,7 @@ def export_task( @transaction.atomic def import_task_annotations(src_file, task_id, format_name, conv_mask_to_poly): + av_scan_paths(src_file) task = TaskAnnotation(task_id) importer = make_importer(format_name) @@ -1139,6 +1140,7 @@ def import_task_annotations(src_file, task_id, format_name, conv_mask_to_poly): @transaction.atomic def import_job_annotations(src_file, job_id, format_name, conv_mask_to_poly): + av_scan_paths(src_file) job = JobAnnotation(job_id, prefetch_images=True) importer = make_importer(format_name) diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 67ff2143e4e2..c1a80906f40e 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -59,7 +59,10 @@ def log_exception(logger: logging.Logger | None = None, exc_info: bool = True): EXPORT_LOCKED_RETRY_INTERVAL = timedelta(seconds=settings.EXPORT_LOCKED_RETRY_INTERVAL) -def get_export_cache_ttl(db_instance: str | Project | Task | Job) -> timedelta: +def get_export_cache_ttl(db_instance: str | Project | Task | Job | None) -> timedelta: + if not db_instance: + return DEFAULT_CACHE_TTL + if isinstance(db_instance, (Project, Task, Job)): db_instance = db_instance.__class__.__name__ diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 8e4d8d16b686..be36b4dabfeb 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -2,43 +2,72 @@ # # SPDX-License-Identifier: MIT -import os.path as osp -from abc import ABC, abstractmethod +from abc import abstractmethod +from dataclasses import asdict as dataclass_asdict from dataclasses import dataclass from datetime import datetime -from typing import Any, ClassVar -from urllib.parse import quote - -import django_rq +from functools import cached_property +from pathlib import Path +from tempfile import NamedTemporaryFile +from types import NoneType +from typing import Any, Callable, ClassVar +from uuid import uuid4 + +import attrs +from attrs.converters import to_bool from django.conf import settings -from django.http.response import HttpResponseBadRequest -from django_rq.queues import DjangoRQ -from rest_framework import serializers, status -from rest_framework.response import Response +from rest_framework import serializers +from rest_framework.exceptions import MethodNotAllowed, ValidationError from rest_framework.reverse import reverse from rq.job import Job as RQJob -from rq.job import JobStatus as RQJobStatus import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.formats.registry import EXPORT_FORMATS -from cvat.apps.dataset_manager.util import get_export_cache_lock +from cvat.apps.dataset_manager.util import TmpDirManager from cvat.apps.dataset_manager.views import get_export_cache_ttl, get_export_callback -from cvat.apps.engine.backup import ProjectExporter, TaskExporter, create_backup -from cvat.apps.engine.cloud_provider import export_resource_to_cloud_storage +from cvat.apps.engine.backup import ( + ProjectExporter, + TaskExporter, + create_backup, + import_project, + import_task, +) +from cvat.apps.engine.cloud_provider import ( + export_resource_to_cloud_storage, + import_resource_from_cloud_storage, +) from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import Location, RequestAction, RequestSubresource, RequestTarget, Task +from cvat.apps.engine.models import ( + Job, + Location, + Project, + RequestAction, + RequestSubresource, + RequestTarget, + Task, +) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq import ExportRQMeta, ExportRQId, define_dependent_job +from cvat.apps.engine.rq import ( + ExportRequestId, + ExportRQMeta, + ImportRequestId, + ImportRQMeta, + define_dependent_job, +) +from cvat.apps.engine.serializers import UploadedFileSerializer, UploadedZipFileSerializer +from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import ( build_annotations_file_name, build_backup_file_name, get_rq_lock_by_user, - get_rq_lock_for_job, is_dataset_export, - sendfile, ) -from cvat.apps.events.handlers import handle_dataset_export +from cvat.apps.events.handlers import handle_dataset_export, handle_dataset_import +from cvat.apps.redis_handler.background import ( + AbstractExportableRequestManager, + AbstractRequestManager, +) slogger = ServerLogManager(__name__) @@ -47,119 +76,6 @@ LOCK_TTL = REQUEST_TIMEOUT - 5 LOCK_ACQUIRE_TIMEOUT = LOCK_TTL - 5 -from cvat.apps.redis_handler.background import AbstractRQJobManager - - -class ResourceExportManager(AbstractRQJobManager): - QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value - SUPPORTED_SUBRESOURCES: ClassVar[set[RequestSubresource]] - - ### Initialization logic ### - - @abstractmethod - def initialize_export_args(self) -> None: ... - - @abstractmethod - def validate_export_args(self) -> Response | None: ... - - - @abstractmethod - def get_download_api_endpoint_view_name(self) -> str: ... - - def make_result_url(self, *, rq_id: str) -> str: - view_name = self.get_download_api_endpoint_view_name() - result_url = reverse(view_name, args=[self.db_instance.pk], request=self.request) - - return result_url + f"?rq_id={quote(rq_id)}" - - def get_updated_date_timestamp(self) -> str: - # use only updated_date for the related resource, don't check children objects - # because every child update should touch the updated_date of the parent resource - return datetime.strftime(self.db_instance.updated_date, "%Y_%m_%d_%H_%M_%S") - - @abstractmethod - def get_result_filename(self) -> str: ... - - @abstractmethod - def send_events(self) -> None: ... - - def after_processing(self): - self.send_events() - - def process(self): - self.initialize_export_args() - return super().process() - - ### Logic related to prepared file downloading ### - - def validate_rq_id(self, rq_id: str) -> None: - parsed_rq_id = ExportRQId.parse(rq_id) - - if ( - parsed_rq_id.action != RequestAction.EXPORT - or parsed_rq_id.target != RequestTarget(self.resource) - or parsed_rq_id.id != self.db_instance.pk - or parsed_rq_id.subresource not in self.SUPPORTED_SUBRESOURCES - ): - raise ValueError("The provided request id does not match exported target or resource") - - def download_file(self) -> Response: - queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) - rq_id = self.request.query_params.get("rq_id") - - if not rq_id: - return HttpResponseBadRequest("Missing request id in the query parameters") - - try: - self.validate_rq_id(rq_id) - except ValueError: - return HttpResponseBadRequest("Invalid export request id") - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - - if not rq_job: - return HttpResponseBadRequest("Unknown export request id") - - # define status once to avoid refreshing it on each check - # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases - rq_job_status = rq_job.get_status(refresh=False) - - if rq_job_status != RQJobStatus.FINISHED: - return HttpResponseBadRequest("The export process is not finished") - - rq_job_meta = ExportRQMeta.for_job(rq_job) - file_path = rq_job.return_value() - - if not file_path: - return ( - Response( - "A result for exporting job was not found for finished RQ job", - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - if rq_job_meta.result_url # user tries to download a final file locally while the export is made to cloud storage - else HttpResponseBadRequest( - "The export process has no result file to be downloaded locally" - ) - ) - - with get_export_cache_lock( - file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT - ): - if not osp.exists(file_path): - return Response( - "The exported file has expired, please retry exporting", - status=status.HTTP_404_NOT_FOUND, - ) - - return sendfile( - self.request, - file_path, - attachment=True, - attachment_filename=rq_job_meta.result_filename, - ) - def cancel_and_delete(rq_job: RQJob) -> None: # In the case the server is configured with ONE_RUNNING_JOB_IN_QUEUE_PER_USER @@ -168,67 +84,42 @@ def cancel_and_delete(rq_job: RQJob) -> None: rq_job.delete() -class DatasetExportManager(ResourceExportManager): +class DatasetExportManager(AbstractExportableRequestManager): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} - SUPPORTED_SUBRESOURCES = {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} @dataclass - class ExportArgs: + class ExportArgs(AbstractExportableRequestManager.ExportArgs): format: str - filename: str save_images: bool - location_config: dict[str, Any] - - @property - def location(self) -> Location: - return self.location_config["location"] - def initialize_export_args(self) -> None: + def init_request_args(self) -> None: + super().init_request_args() save_images = is_dataset_export(self.request) - self.export_callback = get_export_callback(self.db_instance, save_images=save_images) - format_name = self.request.query_params.get("format", "") - filename = self.request.query_params.get("filename", "") - - try: - location_config = get_location_configuration( - db_instance=self.db_instance, - query_params=self.request.query_params, - field_name=StorageType.TARGET, - ) - except ValueError as ex: - raise serializers.ValidationError(str(ex)) from ex - - location = location_config["location"] - - if location not in Location.list(): - raise serializers.ValidationError( - f"Unexpected location {location} specified for the request" - ) self.export_args = self.ExportArgs( + **self.export_args.to_dict(), format=format_name, - filename=filename, save_images=save_images, - location_config=location_config, ) - def validate_export_args(self): + def validate_request(self): + super().validate_request() + format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_export_formats()}.get( self.export_args.format ) if format_desc is None: - raise serializers.ValidationError("Unknown format specified for the request") + raise ValidationError("Unknown format specified for the request") elif not format_desc.ENABLED: - return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) + raise MethodNotAllowed(self.request.method, detail="Format is disabled") - def build_rq_id(self): - return ExportRQId( + def build_request_id(self): + return ExportRequestId( queue=self.QUEUE_NAME, action=RequestAction.EXPORT, target=RequestTarget(self.resource), id=self.db_instance.pk, - # todo: refactor extra={ "subresource": ( RequestSubresource.DATASET @@ -236,23 +127,28 @@ def build_rq_id(self): else RequestSubresource.ANNOTATIONS ), "format": self.export_args.format, - "user_id": self.request.user.id, + "user_id": self.user_id, }, ).render() - def send_events(self): - handle_dataset_export( - self.db_instance, - format_name=self.export_args.format, - cloud_storage_id=self.export_args.location_config.get("storage_id"), - save_images=self.export_args.save_images, + def validate_request_id(self, rq_id: str) -> None: + parsed_rq_id = ExportRequestId.parse(rq_id) + + if ( + parsed_rq_id.action != RequestAction.EXPORT + or parsed_rq_id.target != RequestTarget(self.resource) + or parsed_rq_id.id != self.db_instance.pk + or parsed_rq_id.subresource + not in {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} + ): + raise ValueError("The provided request id does not match exported target or resource") + + def init_callback_with_params(self): + self.callback = get_export_callback( + self.db_instance, save_images=self.export_args.save_images ) + self.callback_args = (self.db_instance.pk, self.export_args.format) - def setup_background_job( - self, - queue: DjangoRQ, - rq_id: str, - ) -> None: try: if self.request.scheme: server_address = self.request.scheme + "://" @@ -260,68 +156,38 @@ def setup_background_job( except Exception: server_address = None - cache_ttl = get_export_cache_ttl(self.db_instance) - - user_id = self.request.user.id - - func = self.export_callback - func_args = (self.db_instance.id, self.export_args.format) - result_url = None + self.callback_kwargs = { + "server_url": server_address, + } if self.export_args.location == Location.CLOUD_STORAGE: - try: - storage_id = self.export_args.location_config["storage_id"] - except KeyError: - raise serializers.ValidationError( - "Cloud storage location was selected as the destination," - " but cloud storage id was not specified" - ) - + storage_id = self.export_args.location_config["storage_id"] db_storage = get_cloud_storage_for_import_or_export( storage_id=storage_id, request=self.request, is_default=self.export_args.location_config["is_default"], ) - func = export_resource_to_cloud_storage - func_args = ( - db_storage, - self.export_callback, - ) + func_args - else: - db_storage = None - result_url = self.make_result_url(rq_id=rq_id) + self.callback_args = (db_storage, self.callback) + self.callback_args + self.callback = export_resource_to_cloud_storage - with get_rq_lock_by_user(queue, user_id): - result_filename = self.get_result_filename() - meta = ExportRQMeta.build_for( - request=self.request, - db_obj=self.db_instance, - result_url=result_url, - result_filename=result_filename, - ) - queue.enqueue_call( - func=func, - args=func_args, - kwargs={ - "server_url": server_address, - }, - job_id=rq_id, - meta=meta, - depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), - result_ttl=cache_ttl.total_seconds(), - failure_ttl=cache_ttl.total_seconds(), - ) + def finalize_request(self): + handle_dataset_export( + self.db_instance, + format_name=self.export_args.format, + cloud_storage_id=self.export_args.location_config.get("storage_id"), + save_images=self.export_args.save_images, + ) def get_result_filename(self) -> str: filename = self.export_args.filename if not filename: - instance_timestamp = self.get_updated_date_timestamp() + timestamp = self.get_file_timestamp() filename = build_annotations_file_name( class_name=self.resource, - identifier=self.db_instance.id, - timestamp=instance_timestamp, + identifier=self.db_instance.pk, + timestamp=timestamp, format_name=self.export_args.format, is_annotation_file=not self.export_args.save_images, extension=(EXPORT_FORMATS[self.export_args.format].EXT).lower(), @@ -329,42 +195,63 @@ def get_result_filename(self) -> str: return filename - def get_download_api_endpoint_view_name(self) -> str: - return f"{self.resource}-download-dataset" + def where_to_redirect(self) -> str: + return reverse( + f"{self.resource}-download-dataset", args=[self.db_instance.pk], request=self.request + ) -class BackupExportManager(ResourceExportManager): +class BackupExportManager(AbstractExportableRequestManager): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} - SUPPORTED_SUBRESOURCES = {RequestSubresource.BACKUP} - @dataclass - class ExportArgs: - filename: str - location_config: dict[str, Any] + def validate_request_id(self, rq_id: str) -> None: + parsed_rq_id = ExportRequestId.parse(rq_id) - @property - def location(self) -> Location: - return self.location_config["location"] + if ( + parsed_rq_id.action != RequestAction.EXPORT + or parsed_rq_id.target != RequestTarget(self.resource) + or parsed_rq_id.id != self.db_instance.pk + or parsed_rq_id.subresource is not RequestSubresource.BACKUP + ): + raise ValueError("The provided request id does not match exported target or resource") - def initialize_export_args(self) -> None: - self.export_callback = create_backup - filename = self.request.query_params.get("filename", "") + def init_callback_with_params(self): + self.callback = create_backup - location_config = get_location_configuration( - db_instance=self.db_instance, - query_params=self.request.query_params, - field_name=StorageType.TARGET, + if isinstance(self.db_instance, Task): + logger = slogger.task[self.db_instance.pk] + Exporter = TaskExporter + else: + logger = slogger.project[self.db_instance.pk] + Exporter = ProjectExporter + + self.callback_args = ( + self.db_instance.pk, + Exporter, + logger, + self.job_result_ttl, ) - self.export_args = self.ExportArgs(filename, location_config) - def validate_export_args(self): - return + if self.export_args.location == Location.CLOUD_STORAGE: + storage_id = self.export_args.location_config["storage_id"] + # TODO: move into validation? + db_storage = get_cloud_storage_for_import_or_export( + storage_id=storage_id, + request=self.request, + is_default=self.export_args.location_config["is_default"], + ) + + self.callback_args = ( + db_storage, + self.callback, + ) + self.callback_args + self.callback = export_resource_to_cloud_storage - def get_result_filename(self) -> str: + def get_result_filename(self): filename = self.export_args.filename if not filename: - instance_timestamp = self.get_updated_date_timestamp() + instance_timestamp = self.get_file_timestamp() filename = build_backup_file_name( class_name=self.resource, @@ -374,88 +261,270 @@ def get_result_filename(self) -> str: return filename - def build_rq_id(self): - return ExportRQId( + def build_request_id(self): + return ExportRequestId( queue=self.QUEUE_NAME, action=RequestAction.EXPORT, target=RequestTarget(self.resource), id=self.db_instance.pk, extra={ "subresource": RequestSubresource.BACKUP, - "user_id": self.request.user.id, + "user_id": self.user_id, }, ).render() - # FUTURE-TODO: move into ResourceExportManager - def setup_background_job( - self, - queue: DjangoRQ, - rq_id: str, - ) -> None: - cache_ttl = get_export_cache_ttl(self.db_instance) - user_id = self.request.user.id + def where_to_redirect(self) -> str: + return reverse( + f"{self.resource}-download-backup", args=[self.db_instance.pk], request=self.request + ) - if isinstance(self.db_instance, Task): - logger = slogger.task[self.db_instance.pk] - Exporter = TaskExporter + def finalize_request(self): + # FUTURE-TODO: send events to event store + pass + + +@attrs.define(kw_only=True) +class ResourceImportManager(AbstractRequestManager): + QUEUE_NAME = settings.CVAT_QUEUES.IMPORT_DATA.value + # SUPPORTED_SUBRESOURCES: ClassVar[set[RequestSubresource]] + + upload_serializer_class: type[UploadedFileSerializer | UploadedZipFileSerializer] = attrs.field( + init=False + ) + + @property + def job_result_ttl(self): + return int(settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds()) + + @property + def job_failed_ttl(self): + return int(settings.IMPORT_CACHE_FAILED_TTL.total_seconds()) + + @dataclass + class ImportArgs: + location_config: dict[str, Any] + + @property + def location(self) -> Location: + return self.location_config["location"] + + def to_dict(self): + return dataclass_asdict(self) + + def init_request_args(self): + try: + location_config = get_location_configuration( + db_instance=self.db_instance, + query_params=self.request.query_params, + field_name=StorageType.SOURCE, + ) + except ValueError as ex: + raise ValidationError(str(ex)) from ex + + location = location_config["location"] + + if location not in Location.list(): + raise ValidationError(f"Unexpected location {location} specified for the request") + + self.import_args = ResourceImportManager.ImportArgs( + location_config=location_config, + ) + + def _handle_cloud_storage_file_upload(self): + storage_id = self.import_args.location_config["storage_id"] + db_storage = get_cloud_storage_for_import_or_export( + storage_id=storage_id, + request=self.request, + is_default=self.import_args.location_config["is_default"], + ) + + key = self.import_args.file_path + with NamedTemporaryFile(prefix="cvat_", dir=TmpDirManager.TMP_ROOT, delete=False) as tf: + self.import_args.file_path = tf.name + return db_storage, key + + def _handle_non_tus_file_upload(self): + file_serializer = self.upload_serializer_class(data=self.request.data) + file_serializer.is_valid(raise_exception=True) + payload_file = file_serializer.validated_data[file_serializer.file.field_name] + + with NamedTemporaryFile(prefix="cvat_", dir=TmpDirManager.TMP_ROOT, delete=False) as tf: + self.import_args.file_path = tf.name + for chunk in payload_file.chunks(): + tf.write(chunk) + + +@attrs.define(kw_only=True) +class DatasetImporter(ResourceImportManager): + SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} + # SUPPORTED_SUBRESOURCES = {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} + + @dataclass + class ImportArgs(ResourceImportManager.ImportArgs): + format: str + file_path: str | None + conv_mask_to_poly: bool + + def __attrs_post_init__(self) -> None: + self.upload_serializer_class = ( + UploadedZipFileSerializer + if isinstance(self.db_instance, Project) + else UploadedFileSerializer + ) + + def init_request_args(self) -> None: + super().init_request_args() + format_name = self.request.query_params.get("format", "") + conv_mask_to_poly = to_bool(self.request.query_params.get("conv_mask_to_poly", True)) + + filename = self.request.query_params.get("filename") + tmp_dir = Path(self.db_instance.get_tmp_dirname()) + + self.import_args = self.ImportArgs( + **self.import_args.to_dict(), + format=format_name, + file_path=str(Path(tmp_dir) / filename) if filename else None, + conv_mask_to_poly=conv_mask_to_poly, + ) + + def init_callback_with_params(self): + if isinstance(self.db_instance, Project): + self.callback = dm.project.import_dataset_as_project + elif isinstance(self.db_instance, Task): + self.callback = dm.task.import_task_annotations else: - logger = slogger.project[self.db_instance.pk] - Exporter = ProjectExporter + assert isinstance(self.db_instance, Job) + self.callback = dm.task.import_job_annotations + + if self.import_args.location == Location.CLOUD_STORAGE: + db_storage, key = self._handle_cloud_storage_file_upload() + elif not self.import_args.file_path: + self._handle_non_tus_file_upload() + + self.callback_args = ( + self.import_args.file_path, + self.db_instance.pk, + self.import_args.format, + self.import_args.conv_mask_to_poly, + ) - func = self.export_callback - func_args = ( - self.db_instance.id, - Exporter, - logger, - cache_ttl, + if self.import_args.location == Location.CLOUD_STORAGE: + self.callback_args = (db_storage, key, self.callback) + self.callback_args + self.callback = import_resource_from_cloud_storage + + def validate_request(self): + super().validate_request() + + format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_import_formats()}.get( + self.import_args.format ) - result_url = None + if format_desc is None: + raise ValidationError(f"Unknown input format {self.import_args.format!r}") + elif not format_desc.ENABLED: + raise MethodNotAllowed(self.request.method, detail="Format is disabled") - if self.export_args.location == Location.CLOUD_STORAGE: - try: - storage_id = self.export_args.location_config["storage_id"] - except KeyError: - raise serializers.ValidationError( - "Cloud storage location was selected as the destination," - " but cloud storage id was not specified" + if self.import_args.location == Location.CLOUD_STORAGE: + if not self.import_args.file_path: + raise ValidationError("The filename was not specified") + + if self.import_args.location_config.get("storage_id") is None: + raise ValidationError( + "Cloud storage location was selected as the source," + + " but cloud storage id was not specified" ) - db_storage = get_cloud_storage_for_import_or_export( - storage_id=storage_id, - request=self.request, - is_default=self.export_args.location_config["is_default"], - ) + def build_request_id(self): + return ExportRequestId( + queue=self.QUEUE_NAME, + action=RequestAction.IMPORT, + target=RequestTarget(self.resource), + id=self.db_instance.pk, + extra={ + "subresource": ( + RequestSubresource.DATASET + if isinstance(self.db_instance, Project) + else RequestSubresource.ANNOTATIONS + ), + }, + ).render() - func = export_resource_to_cloud_storage - func_args = ( - db_storage, - self.export_callback, - ) + func_args - else: - result_url = self.make_result_url(rq_id=rq_id) + def build_meta(self, *, file_path: str): + return ImportRQMeta.build_for( # TODO: looks like tmp_file is not used anywhere + request=self.request, db_obj=self.db_instance, tmp_file=file_path + ) - with get_rq_lock_by_user(queue, user_id): - result_filename = self.get_result_filename() - meta = ExportRQMeta.build_for( - request=self.request, - db_obj=self.db_instance, - result_url=result_url, - result_filename=result_filename, - ) + def finalize_request(self): + handle_dataset_import( + self.db_instance, + format_name=self.import_args.format, + cloud_storage_id=self.import_args.location_config.get("storage_id"), + ) - queue.enqueue_call( - func=func, - args=func_args, - job_id=rq_id, - meta=meta, - depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), - result_ttl=cache_ttl.total_seconds(), - failure_ttl=cache_ttl.total_seconds(), - ) - def get_download_api_endpoint_view_name(self) -> str: - return f"{self.resource}-download-backup" +@attrs.define(kw_only=True) +class BackupImporter(ResourceImportManager): + SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} + # SUPPORTED_SUBRESOURCES = {RequestSubresource.BACKUP} - def send_events(self): - # FUTURE-TODO: send events to event store + resource: RequestTarget = attrs.field(validator=attrs.validators.in_(SUPPORTED_RESOURCES)) + + @dataclass + class ImportArgs(ResourceImportManager.ImportArgs): + file_path: str | None + org_id: int | None + + def __attrs_post_init__(self) -> None: + self.upload_serializer_class = UploadedZipFileSerializer + + def init_request_args(self) -> None: + super().init_request_args() + filename = self.request.query_params.get("filename") + tmp_dir = Path(TmpDirManager.TMP_ROOT) + + self.import_args = self.ImportArgs( + **self.import_args.to_dict(), + file_path=str(Path(tmp_dir) / filename) if filename else None, + org_id=getattr(self.request.iam_context["organization"], "id", None), + ) + + def validate_request(self): + super().validate_request() + + if self.import_args.location == Location.CLOUD_STORAGE: + if not self.import_args.file_path: + raise ValidationError("The filename was not specified") + + if self.import_args.location_config.get("storage_id") is None: + raise ValidationError( + "Cloud storage location was selected as the source," + + " but cloud storage id was not specified" + ) + + def build_request_id(self): + return ImportRequestId( + queue=self.QUEUE_NAME, + action=RequestAction.IMPORT, + target=self.resource, + id=uuid4(), + extra={ + "subresource": RequestSubresource.BACKUP, + }, + ).render() + + def init_callback_with_params(self): + self.callback = import_project if self.resource == RequestTarget.PROJECT else import_task + + if self.import_args.location == Location.CLOUD_STORAGE: + db_storage, key = self._handle_cloud_storage_file_upload() + elif not self.import_args.file_path: + self._handle_non_tus_file_upload() + + self.callback_args = (self.import_args.file_path, self.user_id, self.import_args.org_id) + + if self.import_args.location == Location.CLOUD_STORAGE: + self.callback_args = (db_storage, key, self.callback) + self.callback_args + self.callback = import_resource_from_cloud_storage + + # FUTURE-TODO: send logs to event store + def finalize_request(self): pass diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 8b3ed9a31d1d..9f2de942d0db 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -11,31 +11,25 @@ import re import shutil import tempfile -import uuid from abc import ABCMeta, abstractmethod from collections.abc import Collection, Iterable from copy import deepcopy from datetime import timedelta from enum import Enum from logging import Logger -from tempfile import NamedTemporaryFile from typing import Any, ClassVar, Optional, Type, Union from zipfile import ZipFile -import django_rq import json_stream from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.db import transaction from django.utils import timezone -from rest_framework import serializers, status from rest_framework.exceptions import ValidationError from rest_framework.parsers import JSONParser from rest_framework.renderers import JSONRenderer -from rest_framework.response import Response import cvat.apps.dataset_manager as dm -from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.util import ( ExportCacheManager, TmpDirManager, @@ -51,23 +45,13 @@ retry_current_rq_job, ) from cvat.apps.engine import models -from cvat.apps.engine.cloud_provider import ( - db_storage_to_storage_instance, - import_resource_from_cloud_storage, -) -from cvat.apps.engine.location import StorageType, get_location_configuration +from cvat.apps.engine.cloud_provider import db_storage_to_storage_instance from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( DataChoice, - Location, - RequestAction, - RequestSubresource, - RequestTarget, StorageChoice, StorageMethodChoice, ) -from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq import ImportRQMeta, RQId, define_dependent_job from cvat.apps.engine.serializers import ( AnnotationGuideWriteSerializer, AssetWriteSerializer, @@ -76,23 +60,14 @@ JobWriteSerializer, LabeledDataSerializer, LabelSerializer, - ProjectFileSerializer, ProjectReadSerializer, - RqIdSerializer, SegmentSerializer, SimpleJobSerializer, - TaskFileSerializer, TaskReadSerializer, ValidationParamsSerializer, ) from cvat.apps.engine.task import JobFileMapping, _create_thread -from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import ( - av_scan_paths, - get_rq_lock_by_user, - import_resource_with_clean_up_after, - process_failed_job, -) +from cvat.apps.engine.utils import av_scan_paths slogger = ServerLogManager(__name__) @@ -974,7 +949,7 @@ def import_task(self): return self._db_task @transaction.atomic -def _import_task(filename, user, org_id): +def import_task(filename, user, org_id): av_scan_paths(filename) task_importer = TaskImporter(filename, user, org_id) db_task = task_importer.import_task() @@ -1110,7 +1085,7 @@ def import_project(self): return self._db_project @transaction.atomic -def _import_project(filename, user, org_id): +def import_project(filename, user, org_id): av_scan_paths(filename) project_importer = ProjectImporter(filename, user, org_id) db_project = project_importer.import_project() @@ -1178,166 +1153,5 @@ def create_backup( log_exception(logger) raise - -def _import( - importer: TaskImporter | ProjectImporter, - request: ExtendedRequest, - queue: django_rq.queues.DjangoRQ, - rq_id: str, - Serializer: type[TaskFileSerializer] | type[ProjectFileSerializer], - file_field_name: str, - location_conf: dict, - filename: str | None = None, -): - rq_job = queue.fetch_job(rq_id) - - if not rq_job: - org_id = getattr(request.iam_context['organization'], 'id', None) - location = location_conf.get('location') - - if location == Location.LOCAL: - if not filename: - serializer = Serializer(data=request.data) - serializer.is_valid(raise_exception=True) - payload_file = serializer.validated_data[file_field_name] - with NamedTemporaryFile( - prefix='cvat_', - dir=settings.TMP_FILES_ROOT, - delete=False) as tf: - filename = tf.name - for chunk in payload_file.chunks(): - tf.write(chunk) - else: - file_name = request.query_params.get('filename') - assert file_name, "The filename wasn't specified" - try: - storage_id = location_conf['storage_id'] - except KeyError: - raise serializers.ValidationError( - 'Cloud storage location was selected as the source,' - ' but cloud storage id was not specified') - - db_storage = get_cloud_storage_for_import_or_export( - storage_id=storage_id, request=request, - is_default=location_conf['is_default']) - - key = filename - with NamedTemporaryFile(prefix='cvat_', dir=settings.TMP_FILES_ROOT, delete=False) as tf: - filename = tf.name - - func = import_resource_with_clean_up_after - func_args = (importer, filename, request.user.id, org_id) - - if location == Location.CLOUD_STORAGE: - func_args = (db_storage, key, func) + func_args - func = import_resource_from_cloud_storage - - user_id = request.user.id - - with get_rq_lock_by_user(queue, user_id): - meta = ImportRQMeta.build_for( - request=request, - db_obj=None, - tmp_file=filename, - ) - rq_job = queue.enqueue_call( - func=func, - args=func_args, - job_id=rq_id, - meta=meta, - depends_on=define_dependent_job(queue, user_id), - result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), - failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() - ) - else: - rq_job_meta = ImportRQMeta.for_job(rq_job) - if rq_job_meta.user.id != request.user.id: - return Response(status=status.HTTP_403_FORBIDDEN) - - if rq_job.is_finished: - project_id = rq_job.return_value() - rq_job.delete() - return Response({'id': project_id}, status=status.HTTP_201_CREATED) - elif rq_job.is_failed: - exc_info = process_failed_job(rq_job) - # RQ adds a prefix with exception class name - import_error_prefix = '{}.{}'.format( - CvatImportError.__module__, CvatImportError.__name__) - if exc_info.startswith(import_error_prefix): - exc_info = exc_info.replace(import_error_prefix + ': ', '') - return Response(data=exc_info, - status=status.HTTP_400_BAD_REQUEST) - else: - return Response(data=exc_info, - status=status.HTTP_500_INTERNAL_SERVER_ERROR) - - serializer = RqIdSerializer(data={'rq_id': rq_id}) - serializer.is_valid(raise_exception=True) - - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - def get_backup_dirname(): return TmpDirManager.TMP_ROOT - -def import_project(request: ExtendedRequest, queue_name: str, filename: str | None = None): - if 'rq_id' in request.data: - rq_id = request.data['rq_id'] - else: - rq_id = RQId( - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - action=RequestAction.IMPORT, target=RequestTarget.PROJECT, id=uuid.uuid4(), - extra={ - "subresource": RequestSubresource.BACKUP, - } - ).render() - Serializer = ProjectFileSerializer - file_field_name = 'project_file' - - location_conf = get_location_configuration( - query_params=request.query_params, - field_name=StorageType.SOURCE, - ) - - queue = django_rq.get_queue(queue_name) - - return _import( - importer=_import_project, - request=request, - queue=queue, - rq_id=rq_id, - Serializer=Serializer, - file_field_name=file_field_name, - location_conf=location_conf, - filename=filename - ) - -def import_task(request: ExtendedRequest, queue_name: str, filename: str | None = None): - rq_id = request.data.get('rq_id', RQId( - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - action=RequestAction.IMPORT, - target=RequestTarget.TASK, - id=uuid.uuid4(), - extra={ - "subresource": RequestSubresource.BACKUP, - } - ).render()) - Serializer = TaskFileSerializer - file_field_name = 'task_file' - - location_conf = get_location_configuration( - query_params=request.query_params, - field_name=StorageType.SOURCE - ) - - queue = django_rq.get_queue(queue_name) - - return _import( - importer=_import_task, - request=request, - queue=queue, - rq_id=rq_id, - Serializer=Serializer, - file_field_name=file_field_name, - location_conf=location_conf, - filename=filename - ) diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 0917235293f8..6455a3a799b0 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -12,12 +12,10 @@ from pathlib import Path from tempfile import NamedTemporaryFile from textwrap import dedent -from typing import Callable from unittest import mock from urllib.parse import urljoin import django_rq -from attr.converters import to_bool from django.conf import settings from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema @@ -27,20 +25,17 @@ from cvat.apps.engine.background import BackupExportManager, DatasetExportManager from cvat.apps.engine.handlers import clear_import_cache -from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( - Job, Location, - Project, RequestAction, RequestSubresource, RequestTarget, - Task, ) -from cvat.apps.engine.rq import RQId -from cvat.apps.engine.serializers import DataSerializer, RqIdSerializer +from cvat.apps.engine.rq import RequestId +from cvat.apps.engine.serializers import DataSerializer from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.redis_handler.serializers import RequestIdSerializer slogger = ServerLogManager(__name__) @@ -279,7 +274,7 @@ def init_tus_upload(self, request: ExtendedRequest): if file_exists: # check whether the rq_job is in progress or has been finished/failed object_class_name = self._object.__class__.__name__.lower() - template = RQId( + template = RequestId( queue=settings.CVAT_QUEUES.IMPORT_DATA.value, action=RequestAction.IMPORT, target=RequestTarget(object_class_name), @@ -449,7 +444,7 @@ class DatasetMixin: ], request=OpenApiTypes.NONE, responses={ - '202': OpenApiResponse(response=RqIdSerializer, description='Exporting has been started'), + '202': OpenApiResponse(response=RequestIdSerializer, description='Exporting has been started'), '405': OpenApiResponse(description='Format is not available'), '409': OpenApiResponse(description='Exporting is already in progress'), }, @@ -458,7 +453,7 @@ class DatasetMixin: def initiate_dataset_export(self, request: ExtendedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() - export_manager = DatasetExportManager(self._object, request) + export_manager = DatasetExportManager(request=request, db_instance=self._object) return export_manager.process() @extend_schema(summary='Download a prepared dataset file', @@ -474,60 +469,11 @@ def initiate_dataset_export(self, request: ExtendedRequest, pk: int): @action(methods=['GET'], detail=True, url_path='dataset/download') def download_dataset(self, request: ExtendedRequest, pk: int): obj = self.get_object() # force to call check_object_permissions - export_manager = DatasetExportManager(obj, request) + export_manager = DatasetExportManager(request=request, db_instance=obj) return export_manager.download_file() - # FUTURE-TODO: migrate to new API - def import_annotations( - self, - request: ExtendedRequest, - db_obj: Project | Task | Job, - import_func: Callable[..., None], - rq_func: Callable[..., None], - rq_id_factory: Callable[..., RQId], - ): - is_tus_request = request.headers.get('Upload-Length', None) is not None or \ - request.method == 'OPTIONS' - if is_tus_request: - return self.init_tus_upload(request) - - conv_mask_to_poly = to_bool(request.query_params.get('conv_mask_to_poly', True)) - location_conf = get_location_configuration( - db_instance=db_obj, - query_params=request.query_params, - field_name=StorageType.SOURCE, - ) - - if location_conf['location'] == Location.CLOUD_STORAGE: - format_name = request.query_params.get('format') - file_name = request.query_params.get('filename') - - return import_func( - request=request, - rq_id_factory=rq_id_factory, - rq_func=rq_func, - db_obj=self._object, - format_name=format_name, - location_conf=location_conf, - filename=file_name, - conv_mask_to_poly=conv_mask_to_poly, - ) - - return self.upload_data(request) - class BackupMixin: - # FUTURE-TODO: migrate to new API - def import_backup_v1(self, request: ExtendedRequest, import_func: Callable) -> Response: - location = request.query_params.get("location", Location.LOCAL) - if location == Location.CLOUD_STORAGE: - file_name = request.query_params.get("filename", "") - return import_func( - request, - queue_name=settings.CVAT_QUEUES.IMPORT_DATA.value, - filename=file_name, - ) - return self.upload_data(request) @extend_schema(summary='Initiate process to backup resource', description=dedent("""\ @@ -546,7 +492,7 @@ def import_backup_v1(self, request: ExtendedRequest, import_func: Callable) -> R ], request=OpenApiTypes.NONE, responses={ - '202': OpenApiResponse(response=RqIdSerializer, description='Creating a backup file has been started'), + '202': OpenApiResponse(response=RequestIdSerializer, description='Creating a backup file has been started'), '400': OpenApiResponse(description='Wrong query parameters were passed'), '409': OpenApiResponse(description='The backup process has already been initiated and is not yet finished'), }, @@ -554,7 +500,7 @@ def import_backup_v1(self, request: ExtendedRequest, import_func: Callable) -> R @action(detail=True, methods=['POST'], serializer_class=None, url_path='backup/export') def initiate_backup_export(self, request: ExtendedRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions - export_manager = BackupExportManager(db_object, request) + export_manager = BackupExportManager(request=request, db_instance=db_object) return export_manager.process() @@ -571,5 +517,5 @@ def initiate_backup_export(self, request: ExtendedRequest, pk: int): @action(methods=['GET'], detail=True, url_path='backup/download') def download_backup(self, request: ExtendedRequest, pk: int): obj = self.get_object() # force to call check_object_permissions - export_manager = BackupExportManager(obj, request) + export_manager = BackupExportManager(request=request, db_instance=obj) return export_manager.download_file() diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index 1b76dc8cf78e..e9df673357b9 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -13,7 +13,7 @@ from django.shortcuts import get_object_or_404 from rest_framework.exceptions import PermissionDenied, ValidationError -from cvat.apps.engine.rq import ExportRQId +from cvat.apps.engine.rq import ExportRequestId from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import is_dataset_export from cvat.apps.iam.permissions import ( @@ -47,7 +47,7 @@ def _get_key(d: dict[str, Any], key_path: Union[str, Sequence[str]]) -> Optional return d class DownloadExportedExtension: - rq_job_id: ExportRQId | None + rq_job_id: ExportRequestId | None class Scopes(StrEnum): DOWNLOAD_EXPORTED_FILE = 'download:exported_file' @@ -56,7 +56,7 @@ class Scopes(StrEnum): def extend_params_with_rq_job_details(*, request: ExtendedRequest, params: dict[str, Any]) -> None: if rq_id := request.query_params.get("rq_id"): try: - params["rq_job_id"] = ExportRQId.parse(rq_id) + params["rq_job_id"] = ExportRequestId.parse(rq_id) return except Exception: raise ValidationError("Unexpected request id format") diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 54a2f52e1687..9c3cefee8572 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -5,6 +5,7 @@ from __future__ import annotations from abc import ABCMeta, abstractmethod +from functools import cached_property from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol from django.conf import settings @@ -16,10 +17,9 @@ from rq.registry import BaseRegistry as RQBaseRegistry from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.redis_handler.rq import RequestId from .models import RequestSubresource -from cvat.apps.redis_handler.rq import RQId -from functools import cached_property if TYPE_CHECKING: from django.contrib.auth.models import User @@ -53,7 +53,7 @@ class RequestField: HIDDEN = "hidden" # import specific fields - TMP_FILE = "tmp_file" + TMP_FILE = "tmp_file" # TODO: unused field TASK_PROGRESS = "task_progress" # export specific fields @@ -334,7 +334,7 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: return BaseRQMeta.for_job(rq_job).user.id == user_id -class ExportRQId(RQId): +class ExportRequestId(RequestId): @cached_property def user_id(self) -> int: return int(self.extra["user_id"]) @@ -348,8 +348,12 @@ def format(self) -> str | None: # TODO: quote/unquote return self.extra.get("format") + @property + def type(self) -> str: + return self.TYPE_SEP.join([self.action, self.subresource]) + -class ImportRQId(RQId): +class ImportRequestId(RequestId): @cached_property def subresource(self) -> RequestSubresource | None: if subresource := self.extra.get("subresource"): diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 2fd0ce58ba55..8559e8f00159 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -15,14 +15,11 @@ from collections.abc import Iterable, Sequence from contextlib import closing from copy import copy -from datetime import timedelta -from decimal import Decimal from inspect import isclass from tempfile import NamedTemporaryFile from typing import Any, Optional, Union import django_rq -import rq.defaults as rq_defaults from django.conf import settings from django.contrib.auth.models import Group, User from django.db import transaction @@ -31,8 +28,6 @@ from drf_spectacular.utils import OpenApiExample, extend_schema_field, extend_schema_serializer from numpy import random from rest_framework import exceptions, serializers -from rq.job import Job as RQJob -from rq.job import JobStatus as RQJobStatus from cvat.apps.dataset_manager.formats.utils import get_label_color from cvat.apps.engine import field_validation, models @@ -40,9 +35,7 @@ from cvat.apps.engine.frame_provider import FrameQuality, TaskFrameProvider from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.model_utils import bulk_create -from cvat.apps.engine.models import RequestAction, RequestSubresource from cvat.apps.engine.permissions import TaskPermission -from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta from cvat.apps.engine.task_validation import HoneypotFrameSelector from cvat.apps.engine.utils import ( CvatChunkTimestampMismatchError, @@ -50,13 +43,10 @@ format_list, get_list_view_name, grouped, - parse_exception_message, parse_specific_attributes, reverse, take_by, ) -from cvat.apps.lambda_manager.rq import LambdaRQMeta -from cvat.apps.redis_handler.rq import RQId from utils.dataset_manifest import ImageManifestManager slogger = ServerLogManager(__name__) @@ -1816,12 +1806,9 @@ class RqStatusSerializer(serializers.Serializer): def __init__(self, instance=None, data=..., **kwargs): warnings.warn("RqStatusSerializer is deprecated, " - "use cvat.apps.engine.serializers.RequestSerializer instead", DeprecationWarning) + "use cvat.apps.redis_handler.serializers.RequestSerializer instead", DeprecationWarning) super().__init__(instance, data, **kwargs) -class RqIdSerializer(serializers.Serializer): - rq_id = serializers.CharField(help_text="Request id") - class JobFiles(serializers.ListField): """ @@ -2947,24 +2934,16 @@ class FileInfoSerializer(serializers.Serializer): type = serializers.ChoiceField(choices=["REG", "DIR"]) mime_type = serializers.CharField(max_length=255) -class AnnotationFileSerializer(serializers.Serializer): - annotation_file = serializers.FileField() -class DatasetFileSerializer(serializers.Serializer): - dataset_file = serializers.FileField() +class UploadedFileSerializer(serializers.Serializer): + file = serializers.FileField() - @staticmethod - def validate_dataset_file(value): +class UploadedZipFileSerializer(UploadedFileSerializer): + def validate_file(self, value): if os.path.splitext(value.name)[1] != '.zip': - raise serializers.ValidationError('Dataset file should be zip archive') + raise serializers.ValidationError('A file should be zip archive') return value -class TaskFileSerializer(serializers.Serializer): - task_file = serializers.FileField() - -class ProjectFileSerializer(serializers.Serializer): - project_file = serializers.FileField() - class CommentReadSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(allow_null=True, required=False) @@ -3490,131 +3469,3 @@ def create(self, validated_data): class Meta: model = models.AnnotationGuide fields = ('id', 'task_id', 'project_id', 'markdown', ) - -class UserIdentifiersSerializer(BasicUserSerializer): - class Meta(BasicUserSerializer.Meta): - fields = ( - "id", - "username", - ) - - -class RequestDataOperationSerializer(serializers.Serializer): - type = serializers.CharField() - target = serializers.ChoiceField(choices=models.RequestTarget.choices) - project_id = serializers.IntegerField(required=False, allow_null=True) - task_id = serializers.IntegerField(required=False, allow_null=True) - job_id = serializers.IntegerField(required=False, allow_null=True) - format = serializers.CharField(required=False, allow_null=True) - function_id = serializers.CharField(required=False, allow_null=True) - - def to_representation(self, rq_job: RQJob) -> dict[str, Any]: - parsed_rq_id: RQId = rq_job.parsed_rq_id - - base_rq_job_meta = BaseRQMeta.for_job(rq_job) - representation = { - # TODO: update to action + subresource in export jobs - "type": parsed_rq_id.type, - "target": parsed_rq_id.target, - "project_id": base_rq_job_meta.project_id, - "task_id": base_rq_job_meta.task_id, - "job_id": base_rq_job_meta.job_id, - } - if parsed_rq_id.action == RequestAction.AUTOANNOTATE: - representation["function_id"] = LambdaRQMeta.for_job(rq_job).function_id - elif parsed_rq_id.action in ( - RequestAction.IMPORT, RequestAction.EXPORT - ) and parsed_rq_id.subresource in (RequestSubresource.ANNOTATIONS, RequestSubresource.DATASET): - representation["format"] = parsed_rq_id.format - - return representation - -class RequestSerializer(serializers.Serializer): - # SerializerMethodField is not used here to mark "status" field as required and fix schema generation. - # Marking them as read_only leads to generating type as allOf with one reference to RequestStatus component. - # The client generated using openapi-generator from such a schema contains wrong type like: - # status (bool, date, datetime, dict, float, int, list, str, none_type): [optional] - status = serializers.ChoiceField(source="get_status", choices=models.RequestStatus.choices) - message = serializers.SerializerMethodField() - id = serializers.CharField() - operation = RequestDataOperationSerializer(source="*") - progress = serializers.SerializerMethodField() - created_date = serializers.DateTimeField(source="created_at") - started_date = serializers.DateTimeField( - required=False, allow_null=True, source="started_at", - ) - finished_date = serializers.DateTimeField( - required=False, allow_null=True, source="ended_at", - ) - expiry_date = serializers.SerializerMethodField() - owner = serializers.SerializerMethodField() - result_url = serializers.URLField(required=False, allow_null=True) - result_id = serializers.IntegerField(required=False, allow_null=True) - - def __init__(self, *args, **kwargs): - self._base_rq_job_meta: BaseRQMeta | None = None - super().__init__(*args, **kwargs) - - @extend_schema_field(UserIdentifiersSerializer()) - def get_owner(self, rq_job: RQJob) -> dict[str, Any]: - assert self._base_rq_job_meta - return UserIdentifiersSerializer(self._base_rq_job_meta.user).data - - @extend_schema_field( - serializers.FloatField(min_value=0, max_value=1, required=False, allow_null=True) - ) - def get_progress(self, rq_job: RQJob) -> Decimal: - rq_job_meta = ImportRQMeta.for_job(rq_job) - # progress of task creation is stored in "task_progress" field - # progress of project import is stored in "progress" field - return Decimal(rq_job_meta.progress or rq_job_meta.task_progress or 0.) - - @extend_schema_field(serializers.DateTimeField(required=False, allow_null=True)) - def get_expiry_date(self, rq_job: RQJob) -> Optional[str]: - delta = None - if rq_job.is_finished: - delta = rq_job.result_ttl or rq_defaults.DEFAULT_RESULT_TTL - elif rq_job.is_failed: - delta = rq_job.failure_ttl or rq_defaults.DEFAULT_FAILURE_TTL - - if rq_job.ended_at and delta: - expiry_date = rq_job.ended_at + timedelta(seconds=delta) - return expiry_date.replace(tzinfo=timezone.utc) - - return None - - @extend_schema_field(serializers.CharField(allow_blank=True)) - def get_message(self, rq_job: RQJob) -> str: - assert self._base_rq_job_meta - rq_job_status = rq_job.get_status() - message = '' - - if RQJobStatus.STARTED == rq_job_status: - message = self._base_rq_job_meta.status or message - elif RQJobStatus.FAILED == rq_job_status: - message = self._base_rq_job_meta.formatted_exception or parse_exception_message(str(rq_job.exc_info or "Unknown error")) - - return message - - def to_representation(self, rq_job: RQJob) -> dict[str, Any]: - self._base_rq_job_meta = BaseRQMeta.for_job(rq_job) - representation = super().to_representation(rq_job) - - # FUTURE-TODO: support such statuses on UI - if representation["status"] in (RQJobStatus.DEFERRED, RQJobStatus.SCHEDULED): - representation["status"] = RQJobStatus.QUEUED - - if representation["status"] == RQJobStatus.FINISHED: - - # TODO: move into a custom Job class - if rq_job.parsed_rq_id.action == models.RequestAction.EXPORT: - representation["result_url"] = ExportRQMeta.for_job(rq_job).result_url - - if ( - rq_job.parsed_rq_id.action == models.RequestAction.IMPORT - and rq_job.parsed_rq_id.subresource == models.RequestSubresource.BACKUP - or rq_job.parsed_rq_id.queue == settings.CVAT_QUEUES.QUALITY_REPORTS - ): - representation["result_id"] = rq_job.return_value() - - return representation diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index de4ffbb85482..186e2f68bec3 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: MIT -import functools import itertools import os import os.path as osp @@ -13,15 +12,12 @@ import traceback import zlib from abc import ABCMeta, abstractmethod -from collections import namedtuple -from collections.abc import Iterable from contextlib import suppress from copy import copy from datetime import datetime from pathlib import Path -from tempfile import NamedTemporaryFile from types import SimpleNamespace -from typing import Any, Callable, Optional, Union, cast +from typing import Any, Optional, Union, cast import django_rq from attr.converters import to_bool @@ -34,9 +30,6 @@ from django.db.models.query import Prefetch from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseGone, HttpResponseNotFound from django.utils import timezone -from django.utils.decorators import method_decorator -from django.views.decorators.cache import never_cache -from django_rq.queues import DjangoRQ from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import ( OpenApiExample, @@ -47,7 +40,6 @@ extend_schema_view, ) from PIL import Image -from redis.exceptions import ConnectionError as RedisConnectionError from rest_framework import mixins, serializers, status, viewsets from rest_framework.decorators import action from rest_framework.exceptions import APIException, NotFound, PermissionDenied, ValidationError @@ -56,23 +48,15 @@ from rest_framework.response import Response from rest_framework.settings import api_settings from rq.job import Job as RQJob -from rq.job import JobStatus as RQJobStatus import cvat.apps.dataset_manager as dm import cvat.apps.dataset_manager.views # pylint: disable=unused-import -from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine import backup +from cvat.apps.engine.background import BackupImporter, DatasetImporter +from cvat.apps.engine.backup import import_project, import_task from cvat.apps.engine.cache import CvatChunkTimestampMismatchError, LockError, MediaCache -from cvat.apps.engine.cloud_provider import ( - db_storage_to_storage_instance, - import_resource_from_cloud_storage, -) -from cvat.apps.engine.filters import ( - NonModelJsonLogicFilter, - NonModelOrderingFilter, - NonModelSimpleFilter, -) +from cvat.apps.engine.cloud_provider import db_storage_to_storage_instance from cvat.apps.engine.frame_provider import ( DataWithMeta, FrameQuality, @@ -80,11 +64,15 @@ JobFrameProvider, TaskFrameProvider, ) -from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.media_extractors import get_mime from cvat.apps.engine.mixins import BackupMixin, DatasetMixin, PartialUpdateModelMixin, UploadMixin from cvat.apps.engine.model_utils import bulk_create -from cvat.apps.engine.models import AnnotationGuide, Asset, ClientFile, CloudProviderChoice +from cvat.apps.engine.models import ( + AnnotationGuide, + Asset, + ClientFile, + CloudProviderChoice, +) from cvat.apps.engine.models import CloudStorage as CloudStorageModel from cvat.apps.engine.models import ( Comment, @@ -96,8 +84,6 @@ Location, Project, RequestAction, - RequestStatus, - RequestSubresource, RequestTarget, StorageChoice, StorageMethodChoice, @@ -113,19 +99,15 @@ ProjectPermission, TaskPermission, UserPermission, - get_cloud_storage_for_import_or_export, get_iam_context, ) from cvat.apps.engine.rq import ( ImportRQMeta, - RQId, + RequestId, RQMetaWithFailureInfo, - define_dependent_job, - is_rq_job_owner, ) from cvat.apps.engine.serializers import ( AboutSerializer, - AnnotationFileSerializer, AnnotationGuideReadSerializer, AnnotationGuideWriteSerializer, AssetReadSerializer, @@ -139,7 +121,6 @@ DataMetaReadSerializer, DataMetaWriteSerializer, DataSerializer, - DatasetFileSerializer, FileInfoSerializer, IssueReadSerializer, IssueWriteSerializer, @@ -151,33 +132,26 @@ LabeledDataSerializer, LabelSerializer, PluginsSerializer, - ProjectFileSerializer, ProjectReadSerializer, ProjectWriteSerializer, - RequestSerializer, - RqIdSerializer, RqStatusSerializer, - TaskFileSerializer, TaskReadSerializer, TaskValidationLayoutReadSerializer, TaskValidationLayoutWriteSerializer, TaskWriteSerializer, + UploadedFileSerializer, + UploadedZipFileSerializer, UserSerializer, ) from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import ( - av_scan_paths, - get_rq_lock_by_user, - get_rq_lock_for_job, - import_resource_with_clean_up_after, parse_exception_message, - process_failed_job, sendfile, ) from cvat.apps.engine.view_utils import tus_chunk_action -from cvat.apps.events.handlers import handle_dataset_import from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.iam.permissions import IsAuthenticatedOrReadPublicResource, PolicyEnforcer +from cvat.apps.redis_handler.serializers import RequestIdSerializer from utils.dataset_manifest import ImageManifestManager from . import models, task @@ -200,6 +174,13 @@ def get_410_response_for_export_api(path: str) -> HttpResponseGone: To download the prepared file, use the result_url obtained from the response of the previous request. """)) +def get_410_response_for_import_api(path: str) -> HttpResponseGone: + return HttpResponseGone(textwrap.dedent(f"""\ + This endpoint is no longer supported. + To check the status of the import process, use GET /api/requests/rq_id, + where rq_id is obtained from the response of the previous request. + """)) + @extend_schema(tags=['server']) class ServerViewSet(viewsets.ViewSet): serializer_class = None @@ -364,14 +345,6 @@ class ProjectViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, ordering = "-id" lookup_fields = {'owner': 'owner__username', 'assignee': 'assignee__username'} iam_organization_field = 'organization' - IMPORT_RQ_ID_FACTORY = functools.partial(RQId, - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - action=RequestAction.IMPORT, - targte=RequestTarget.PROJECT, - extra={ - "subresource": RequestSubresource.DATASET, - } - ) def get_serializer_class(self): if self.request.method in SAFE_METHODS: @@ -400,50 +373,6 @@ def perform_create(self, serializer, **kwargs): # Required for the extra summary information added in the queryset serializer.instance = self.get_queryset().get(pk=serializer.instance.pk) - @extend_schema(methods=['GET'], summary='Check dataset import status', - description=textwrap.dedent(""" - Utilizing this endpoint to check the status of the process - of importing a project dataset from a file is deprecated. - In addition, this endpoint no longer handles the project dataset export process. - - Consider using new API: - - `POST /api/projects//dataset/export/?save_images=True` to initiate export process - - `GET /api/requests/` to check process status - - `GET result_url` to download a prepared file - - Where: - - `rq_id` can be found in the response on initializing request - - `result_url` can be found in the response on checking status request - """), - parameters=[ - OpenApiParameter('format', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - deprecated=True - ), - OpenApiParameter('filename', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - deprecated=True - ), - OpenApiParameter('action', description='Used to check the import status', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, enum=['import_status'], - deprecated=True - ), - OpenApiParameter('location', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list(), - deprecated=True - ), - OpenApiParameter('cloud_storage_id', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False, - deprecated=True - ), - OpenApiParameter('rq_id', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), - ], - deprecated=True, - responses={ - '410': OpenApiResponse(description='API endpoint no longer supports exporting datasets'), - }) @extend_schema(methods=['POST'], summary='Import a dataset into a project', description=textwrap.dedent(""" @@ -460,19 +389,17 @@ def perform_create(self, serializer, **kwargs): enum=Location.list()), OpenApiParameter('cloud_storage_id', description='Storage id', location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - OpenApiParameter('use_default_location', description='Use the location that was configured in the project to import annotations', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), OpenApiParameter('filename', description='Dataset file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=PolymorphicProxySerializer('DatasetWrite', - # TODO: refactor to use required=False when possible - serializers=[DatasetFileSerializer, OpenApiTypes.NONE], - resource_type_field_name=None - ), - responses={ - '202': OpenApiResponse(RqIdSerializer, description='Importing has been started'), + request=UploadedZipFileSerializer(required=False), + # request=PolymorphicProxySerializer('DatasetWrite', + # # TODO: refactor to use required=False when possible + # serializers=[UploadedZipFileSerializer, OpenApiTypes.NONE], + # resource_type_field_name=None + # ), + responses={ + '202': OpenApiResponse(RequestIdSerializer, description='Importing has been started'), '400': OpenApiResponse(description='Failed to import dataset'), '405': OpenApiResponse(description='Format is not available'), }) @@ -490,61 +417,13 @@ def dataset(self, request: ExtendedRequest, pk: int): # depends on rq job status (like 201 - finished), # while GET /api/requests/rq_id returns a 200 status code # if such a request exists regardless of job status. - - deprecation_timestamp = int(datetime(2025, 2, 27, tzinfo=timezone.utc).timestamp()) - response_headers = { - "Deprecation": f"@{deprecation_timestamp}" - } - - queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) - rq_id = request.query_params.get('rq_id') - if not rq_id: - return Response( - 'The rq_id param should be specified in the query parameters', - status=status.HTTP_400_BAD_REQUEST, - headers=response_headers, - ) - - rq_job = queue.fetch_job(rq_id) - - if rq_job is None: - return Response(status=status.HTTP_404_NOT_FOUND, headers=response_headers) - # check that the user has access to the current rq_job - elif not is_rq_job_owner(rq_job, request.user.id): - return Response(status=status.HTTP_403_FORBIDDEN, headers=response_headers) - - if rq_job.is_finished: - rq_job.delete() - return Response(status=status.HTTP_201_CREATED, headers=response_headers) - elif rq_job.is_failed: - exc_info = process_failed_job(rq_job) - - return Response( - data=str(exc_info), - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - headers=response_headers - ) - else: - return Response( - data=self._get_rq_response( - settings.CVAT_QUEUES.IMPORT_DATA.value, - rq_id, - ), - status=status.HTTP_202_ACCEPTED, - headers=response_headers - ) + return get_410_response_for_import_api() # we cannot redirect to the new API here since this endpoint used not only to check the status # of exporting process|download a result file, but also to initiate export process return get_410_response_for_export_api("/api/projects/id/dataset/export?save_images=True") - return self.import_annotations( - request=request, - db_obj=self._object, - import_func=_import_project_dataset, - rq_func=dm.project.import_dataset_as_project, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - ) + return self.upload_data(request) @tus_chunk_action(detail=True, suffix_base="dataset") @@ -561,37 +440,13 @@ def get_upload_dir(self): def upload_finished(self, request: ExtendedRequest): if self.action == 'dataset': - format_name = request.query_params.get("format", "") - filename = request.query_params.get("filename", "") - conv_mask_to_poly = to_bool(request.query_params.get('conv_mask_to_poly', True)) - tmp_dir = self._object.get_tmp_dirname() - uploaded_file = os.path.join(tmp_dir, filename) - if not os.path.isfile(uploaded_file): - uploaded_file = None - - return _import_project_dataset( - request=request, - filename=uploaded_file, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - rq_func=dm.project.import_dataset_as_project, - db_obj=self._object, - format_name=format_name, - conv_mask_to_poly=conv_mask_to_poly - ) + importer = DatasetImporter(request=request, db_instance=self._object) + return importer.process() + elif self.action == 'import_backup': - filename = request.query_params.get("filename", "") - if filename: - tmp_dir = backup.get_backup_dirname() - backup_file = os.path.join(tmp_dir, filename) - if os.path.isfile(backup_file): - return backup.import_project( - request, - settings.CVAT_QUEUES.IMPORT_DATA.value, - filename=backup_file, - ) - return Response(data='No such file were uploaded', - status=status.HTTP_400_BAD_REQUEST) - return backup.import_project(request, settings.CVAT_QUEUES.IMPORT_DATA.value) + importer = BackupImporter(request=request, resource=RequestTarget.PROJECT) + return importer.process() + return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) @@ -609,14 +464,14 @@ def export_backup(self, request: ExtendedRequest, pk: int): description=textwrap.dedent(""" The backup import process is as follows: - The first request POST /api/projects/backup will initiate file upload and will create - the rq job on the server in which the process of a project creating from an uploaded backup - will be carried out. + The first request POST /api/projects/backup schedules a background job on the server + in which the process of a project creating from an uploaded backup is carried out. + + To check the status of the import process, use GET /api/requests/rq_id, + where rq_id is request ID obtained from the response of the previous request. - After initiating the backup upload, you will receive an rq_id parameter. - Make sure to include this parameter as a query parameter in your subsequent requests - to track the status of the project creation. - Once the project has been successfully created, the server will return the id of the newly created project. + Once the import completes successfully, the response will contain the ID + of the newly created project in the result_id field. """), parameters=[ *ORGANIZATION_OPEN_API_PARAMETERS, @@ -627,27 +482,21 @@ def export_backup(self, request: ExtendedRequest, pk: int): location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), OpenApiParameter('filename', description='Backup file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('rq_id', description='rq id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=PolymorphicProxySerializer('BackupWrite', - # TODO: refactor to use required=False when possible - serializers=[ProjectFileSerializer, OpenApiTypes.NONE], - resource_type_field_name=None - ), - # TODO: for some reason the code generated by the openapi generator from schema with different serializers - # contains only one serializer, need to fix that. - # https://github.com/OpenAPITools/openapi-generator/issues/6126 - responses={ - # 201: OpenApiResponse(inline_serializer("ImportedProjectIdSerializer", fields={"id": serializers.IntegerField(required=True)}) - '201': OpenApiResponse(description='The project has been imported'), - '202': OpenApiResponse(RqIdSerializer, description='Importing a backup file has been started'), + request=UploadedZipFileSerializer(required=False), + # request=PolymorphicProxySerializer('BackupWrite', + # # TODO: refactor to use required=False when possible + # serializers=[UploadedZipFileSerializer, OpenApiTypes.NONE], + # resource_type_field_name=None + # ), + responses={ + '202': OpenApiResponse(RequestIdSerializer, description='Import of a backup file has started'), }) @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): - return self.import_backup_v1(request, backup.import_project) + return self.upload_data(request) @tus_chunk_action(detail=False, suffix_base="backup") def append_backup_chunk(self, request: ExtendedRequest, file_id: str): @@ -943,14 +792,6 @@ class TaskViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, ordering_fields = list(filter_fields) ordering = "-id" iam_organization_field = 'organization' - IMPORT_RQ_ID_FACTORY = functools.partial(RQId, - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - action=RequestAction.IMPORT, - target=RequestTarget.TASK, - extra={ - "subresource": RequestSubresource.ANNOTATIONS, - } - ) def get_serializer_class(self): if self.request.method in SAFE_METHODS: @@ -973,14 +814,14 @@ def get_queryset(self): description=textwrap.dedent(""" The backup import process is as follows: - The first request POST /api/tasks/backup will initiate file upload and will create - the rq job on the server in which the process of a task creating from an uploaded backup - will be carried out. + The first request POST /api/tasks/backup creates a background job on the server + in which the process of a task creating from an uploaded backup is carried out. - After initiating the backup upload, you will receive an rq_id parameter. - Make sure to include this parameter as a query parameter in your subsequent requests - to track the status of the task creation. - Once the task has been successfully created, the server will return the id of the newly created task. + To check the status of the import process, use GET /api/requests/rq_id, + where rq_id is request ID obtained from the response of the previous request. + + Once the import completes successfully, the response will contain the ID + of the newly created task in the result_id field. """), parameters=[ *ORGANIZATION_OPEN_API_PARAMETERS, @@ -991,24 +832,17 @@ def get_queryset(self): location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), OpenApiParameter('filename', description='Backup file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('rq_id', description='rq id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=TaskFileSerializer(required=False), - # TODO: for some reason the code generated by the openapi generator from schema with different serializers - # contains only one serializer, need to fix that. - # https://github.com/OpenAPITools/openapi-generator/issues/6126 - responses={ - # 201: OpenApiResponse(inline_serializer("ImportedTaskIdSerializer", fields={"id": serializers.IntegerField(required=True)}) - '201': OpenApiResponse(description='The task has been imported'), - '202': OpenApiResponse(RqIdSerializer, description='Importing a backup file has been started'), + request=UploadedZipFileSerializer(required=False), + responses={ + '202': OpenApiResponse(RequestIdSerializer, description='Import of a backup file has started'), }) @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): - return self.import_backup_v1(request, backup.import_task) + return self.upload_data(request) @tus_chunk_action(detail=False, suffix_base="backup") def append_backup_chunk(self, request: ExtendedRequest, file_id: str): @@ -1135,23 +969,8 @@ def append_files(self, request): def upload_finished(self, request: ExtendedRequest): @transaction.atomic def _handle_upload_annotations(request: ExtendedRequest): - format_name = request.query_params.get("format", "") - filename = request.query_params.get("filename", "") - conv_mask_to_poly = to_bool(request.query_params.get('conv_mask_to_poly', True)) - tmp_dir = self._object.get_tmp_dirname() - annotation_file = os.path.join(tmp_dir, filename) - if os.path.isfile(annotation_file): - return _import_annotations( - request=request, - filename=annotation_file, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - rq_func=dm.task.import_task_annotations, - db_obj=self._object, - format_name=format_name, - conv_mask_to_poly=conv_mask_to_poly, - ) - return Response(data='No such file were uploaded', - status=status.HTTP_400_BAD_REQUEST) + importer = DatasetImporter(request=request, db_instance=self._object) + return importer.process() def _handle_upload_data(request: ExtendedRequest): with transaction.atomic(): @@ -1215,26 +1034,13 @@ def _handle_upload_data(request: ExtendedRequest): # Need to process task data when the transaction is committed rq_id = task.create(self._object, data, request) - rq_id_serializer = RqIdSerializer(data={'rq_id': rq_id}) - rq_id_serializer.is_valid(raise_exception=True) - + rq_id_serializer = RequestIdSerializer({'rq_id': rq_id}) return Response(rq_id_serializer.data, status=status.HTTP_202_ACCEPTED) @transaction.atomic def _handle_upload_backup(request: ExtendedRequest): - filename = request.query_params.get("filename", "") - if filename: - tmp_dir = backup.get_backup_dirname() - backup_file = os.path.join(tmp_dir, filename) - if os.path.isfile(backup_file): - return backup.import_task( - request, - settings.CVAT_QUEUES.IMPORT_DATA.value, - filename=backup_file, - ) - return Response(data='No such file were uploaded', - status=status.HTTP_400_BAD_REQUEST) - return backup.import_task(request, settings.CVAT_QUEUES.IMPORT_DATA.value) + importer = BackupImporter(request=request, resource=RequestTarget.TASK) + return importer.process() if self.action == 'annotations': return _handle_upload_annotations(request) @@ -1320,10 +1126,10 @@ def _handle_upload_backup(request: ExtendedRequest): '202': OpenApiResponse( response=PolymorphicProxySerializer( component_name='DataResponse', - # FUTURE-FIXME: endpoint should return RqIdSerializer or OpenApiTypes.NONE - # but SDK generated from a schema with nullable RqIdSerializer + # FUTURE-FIXME: endpoint should return RequestIdSerializer or OpenApiTypes.NONE + # but SDK generated from a schema with nullable RequestIdSerializer # throws an error when tried to convert empty response to a specific type - serializers=[RqIdSerializer, OpenApiTypes.BINARY], + serializers=[RequestIdSerializer, OpenApiTypes.BINARY], resource_type_field_name=None ), @@ -1397,80 +1203,12 @@ def append_data_chunk(self, request: ExtendedRequest, pk: int, file_id: str): return self.append_tus_chunk(request, file_id) @extend_schema(methods=['GET'], summary='Get task annotations', - description=textwrap.dedent("""\ - Deprecation warning: - - Utilizing this endpoint to export annotations as a dataset in - a specific format is no longer possible. - - Consider using new API: - - `POST /api/tasks//dataset/export?save_images=False` to initiate export process - - `GET /api/requests/` to check process status, - where `rq_id` is request id returned on initializing request - - `GET result_url` to download a prepared file, - where `result_url` can be found in the response on checking status request - """), - parameters=[ - # FUTURE-TODO: the following parameters should be removed after a few releases - OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - description="This parameter is no longer supported", - deprecated=True - ), - OpenApiParameter('filename', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - deprecated=True - ), - OpenApiParameter('action', location=OpenApiParameter.QUERY, - description='This parameter is no longer supported', - type=OpenApiTypes.STR, required=False, enum=['download'], - deprecated=True - ), - OpenApiParameter('location', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list(), - deprecated=True - ), - OpenApiParameter('cloud_storage_id', description='This parameter is no longer supported', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False, - deprecated=True - ), - ], responses={ '200': OpenApiResponse(LabeledDataSerializer), '400': OpenApiResponse(description="Exporting without data is not allowed"), '410': OpenApiResponse(description="API endpoint no longer handles exporting process"), }) - @extend_schema(methods=['PUT'], summary='Replace task annotations / Get annotation import status', - description=textwrap.dedent(""" - Utilizing this endpoint to check status of the import process is deprecated - in favor of the new requests API: - GET /api/requests/, where `rq_id` parameter is returned in the response - on initializing request. - """), - parameters=[ - # deprecated parameters - OpenApiParameter( - 'format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - description='Input format name\nYou can get the list of supported formats at:\n/server/annotation/formats', - deprecated=True, - ), - OpenApiParameter( - 'rq_id', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - description='rq id', - deprecated=True, - ), - ], - request=PolymorphicProxySerializer('TaskAnnotationsUpdate', - # TODO: refactor to use required=False when possible - serializers=[LabeledDataSerializer, AnnotationFileSerializer, OpenApiTypes.NONE], - resource_type_field_name=None - ), - responses={ - '201': OpenApiResponse(description='Import has finished'), - '202': OpenApiResponse(description='Import is in progress'), - '405': OpenApiResponse(description='Format is not available'), - }) @extend_schema(methods=['POST'], summary="Import annotations into a task", description=textwrap.dedent(""" @@ -1492,16 +1230,22 @@ def append_data_chunk(self, request: ExtendedRequest, pk: int, file_id: str): OpenApiParameter('filename', description='Annotation file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=PolymorphicProxySerializer('TaskAnnotationsWrite', - # TODO: refactor to use required=False when possible - serializers=[AnnotationFileSerializer, OpenApiTypes.NONE], - resource_type_field_name=None - ), + request=UploadedFileSerializer(required=False), + # request=PolymorphicProxySerializer('TaskAnnotationsWrite', + # # TODO: refactor to use required=False when possible + # serializers=[UploadedFileSerializer, OpenApiTypes.NONE], + # resource_type_field_name=None + # ), responses={ '201': OpenApiResponse(description='Uploading has finished'), - '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), + '202': OpenApiResponse(RequestIdSerializer, description='Uploading has been started'), '405': OpenApiResponse(description='Format is not available'), }) + @extend_schema(methods=['PUT'], summary='Replace task annotations', + request=LabeledDataSerializer, + responses={ + '200': OpenApiResponse(description='Annotations have been replaced'), + }) @extend_schema(methods=['PATCH'], summary='Update task annotations', parameters=[ OpenApiParameter('action', location=OpenApiParameter.QUERY, required=True, @@ -1533,38 +1277,17 @@ def annotations(self, request: ExtendedRequest, pk: int): return Response(data) elif request.method == 'POST' or request.method == 'OPTIONS': - # NOTE: initialization process of annotations import - format_name = request.query_params.get('format', '') - return self.import_annotations( - request=request, - db_obj=self._object, - import_func=_import_annotations, - rq_func=dm.task.import_task_annotations, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - ) + return self.upload_data(request) + elif request.method == 'PUT': - format_name = request.query_params.get('format', '') - # deprecated logic, will be removed in one of the next releases - if format_name: - # NOTE: continue process of import annotations - conv_mask_to_poly = to_bool(request.query_params.get('conv_mask_to_poly', True)) - location_conf = get_location_configuration( - db_instance=self._object, query_params=request.query_params, field_name=StorageType.SOURCE - ) - return _import_annotations( - request=request, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - rq_func=dm.task.import_task_annotations, - db_obj=self._object, - format_name=format_name, - location_conf=location_conf, - conv_mask_to_poly=conv_mask_to_poly - ) - else: - serializer = LabeledDataSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - data = dm.task.put_task_data(pk, serializer.validated_data) - return Response(data) + if "format" in request.query_params.keys(): + return get_410_response_for_import_api() + + serializer = LabeledDataSerializer(data=request.data) + if serializer.is_valid(raise_exception=True): + data = dm.task.put_task_data(pk, serializer.validated_data) + return Response(data) + elif request.method == 'DELETE': dm.task.delete_task_data(pk) return Response(status=status.HTTP_204_NO_CONTENT) @@ -1602,7 +1325,7 @@ def status(self, request, pk): task = self.get_object() # force call of check_object_permissions() response = self._get_rq_response( queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - job_id=RQId( + job_id=RequestId( queue=settings.CVAT_QUEUES.IMPORT_DATA.value, action=RequestAction.CREATE, target=RequestTarget.TASK, @@ -1864,14 +1587,6 @@ class JobViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, mixins.CreateMo 'project_name': 'segment__task__project__name', 'assignee': 'assignee__username' } - IMPORT_RQ_ID_FACTORY = functools.partial(RQId, - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - action=RequestAction.IMPORT, - target=RequestTarget.JOB, - extra={ - "subresource": RequestSubresource.ANNOTATIONS, - } - ) def get_queryset(self): queryset = super().get_queryset() @@ -1922,24 +1637,9 @@ def get_upload_dir(self): # UploadMixin method def upload_finished(self, request: ExtendedRequest): if self.action == 'annotations': - format_name = request.query_params.get("format", "") - filename = request.query_params.get("filename", "") - conv_mask_to_poly = to_bool(request.query_params.get('conv_mask_to_poly', True)) - tmp_dir = self.get_upload_dir() - annotation_file = os.path.join(tmp_dir, filename) - if os.path.isfile(annotation_file): - return _import_annotations( - request=request, - filename=annotation_file, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - rq_func=dm.task.import_job_annotations, - db_obj=self._object, - format_name=format_name, - conv_mask_to_poly=conv_mask_to_poly, - ) - else: - return Response(data='No such file were uploaded', - status=status.HTTP_400_BAD_REQUEST) + importer = DatasetImporter(request=request, db_instance=self._object) + return importer.process() + return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) @@ -2008,53 +1708,18 @@ def upload_finished(self, request: ExtendedRequest): OpenApiParameter('filename', description='Annotation file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=AnnotationFileSerializer(required=False), + request=UploadedFileSerializer(required=False), responses={ '201': OpenApiResponse(description='Uploading has finished'), - '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), + '202': OpenApiResponse(RequestIdSerializer, description='Uploading has been started'), '405': OpenApiResponse(description='Format is not available'), }) - @extend_schema(methods=['PUT'], - summary='Replace job annotations / Get annotation import status', - description=textwrap.dedent(""" - Utilizing this endpoint to check status of the import process is deprecated - in favor of the new requests API: - GET /api/requests/, where `rq_id` parameter is returned in the response - on initializing request. - """), - parameters=[ - - OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - description='Input format name\nYou can get the list of supported formats at:\n/server/annotation/formats', - deprecated=True, - ), - OpenApiParameter('location', description='where to import the annotation from', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list(), - deprecated=True, - ), - OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False, - deprecated=True, - ), - OpenApiParameter('filename', description='Annotation file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - deprecated=True, - ), - OpenApiParameter('rq_id', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - description='rq id', - deprecated=True, - ), - ], - request=PolymorphicProxySerializer( - component_name='JobAnnotationsUpdate', - serializers=[LabeledDataSerializer, AnnotationFileSerializer(required=False)], - resource_type_field_name=None - ), - responses={ - '201': OpenApiResponse(description='Import has finished'), - '202': OpenApiResponse(description='Import is in progress'), - '405': OpenApiResponse(description='Format is not available'), + @extend_schema( + methods=['PUT'], + summary='Replace job annotations', + request=LabeledDataSerializer, + responses={ + '200': OpenApiResponse(description='Annotations have been replaced'), }) @extend_schema(methods=['PATCH'], summary='Update job annotations', parameters=[ @@ -2085,40 +1750,19 @@ def annotations(self, request: ExtendedRequest, pk: int): return Response(annotations) elif request.method == 'POST' or request.method == 'OPTIONS': - format_name = request.query_params.get('format', '') - return self.import_annotations( - request=request, - db_obj=self._object, - import_func=_import_annotations, - rq_func=dm.task.import_job_annotations, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - ) + return self.upload_data(request) elif request.method == 'PUT': - format_name = request.query_params.get('format', '') - if format_name: - # deprecated logic, will be removed in one of the next releases - conv_mask_to_poly = to_bool(request.query_params.get('conv_mask_to_poly', True)) - location_conf = get_location_configuration( - db_instance=self._object, query_params=request.query_params, field_name=StorageType.SOURCE - ) - return _import_annotations( - request=request, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - rq_func=dm.task.import_job_annotations, - db_obj=self._object, - format_name=format_name, - location_conf=location_conf, - conv_mask_to_poly=conv_mask_to_poly - ) - else: - serializer = LabeledDataSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - try: - data = dm.task.put_job_data(pk, serializer.validated_data) - except (AttributeError, IntegrityError) as e: - return Response(data=str(e), status=status.HTTP_400_BAD_REQUEST) - return Response(data) + if "format" in request.query_params.keys(): + return get_410_response_for_import_api() + + serializer = LabeledDataSerializer(data=request.data) + if serializer.is_valid(raise_exception=True): + try: + data = dm.task.put_job_data(pk, serializer.validated_data) + except (AttributeError, IntegrityError) as e: + return Response(data=str(e), status=status.HTTP_400_BAD_REQUEST) + return Response(data) elif request.method == 'DELETE': dm.task.delete_job_data(pk) return Response(status=status.HTTP_204_NO_CONTENT) @@ -3228,238 +2872,3 @@ def rq_exception_handler(rq_job: RQJob, exc_type: type[Exception], exc_value: Ex rq_job_meta.save() return True - -def _import_annotations( - request: ExtendedRequest, - rq_id_factory: Callable[..., RQId], - rq_func: Callable[..., None], - db_obj: Task | Job, - format_name: str, - filename: str = None, - location_conf: dict[str, Any] | None = None, - conv_mask_to_poly: bool = True, -): - - format_desc = {f.DISPLAY_NAME: f - for f in dm.views.get_import_formats()}.get(format_name) - if format_desc is None: - raise serializers.ValidationError( - "Unknown input format '{}'".format(format_name)) - elif not format_desc.ENABLED: - return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) - - rq_id = request.query_params.get('rq_id') - rq_id_should_be_checked = bool(rq_id) - if not rq_id: - rq_id = rq_id_factory(id=db_obj.pk).render() - - queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - - if rq_job: - if rq_id_should_be_checked and not is_rq_job_owner(rq_job, request.user.id): - return Response(status=status.HTTP_403_FORBIDDEN) - - if request.method == 'POST': - if rq_job.get_status(refresh=False) not in (RQJobStatus.FINISHED, RQJobStatus.FAILED): - return Response(status=status.HTTP_409_CONFLICT, data='Import job already exists') - - rq_job.delete() - rq_job = None - - if not rq_job: - # If filename is specified we consider that file was uploaded via TUS, so it exists in filesystem - # Then we dont need to create temporary file - # Or filename specify key in cloud storage so we need to download file - location = location_conf.get('location') if location_conf else Location.LOCAL - db_storage = None - - if not filename or location == Location.CLOUD_STORAGE: - if location != Location.CLOUD_STORAGE: - serializer = AnnotationFileSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - anno_file = serializer.validated_data['annotation_file'] - with NamedTemporaryFile( - prefix='cvat_{}'.format(db_obj.pk), - dir=settings.TMP_FILES_ROOT, - delete=False) as tf: - filename = tf.name - for chunk in anno_file.chunks(): - tf.write(chunk) - else: - assert filename, 'The filename was not specified' - - try: - storage_id = location_conf['storage_id'] - except KeyError: - raise serializers.ValidationError( - 'Cloud storage location was selected as the source,' - ' but cloud storage id was not specified') - db_storage = get_cloud_storage_for_import_or_export( - storage_id=storage_id, request=request, - is_default=location_conf['is_default']) - - key = filename - with NamedTemporaryFile( - prefix='cvat_{}'.format(db_obj.pk), - dir=settings.TMP_FILES_ROOT, - delete=False) as tf: - filename = tf.name - - func = import_resource_with_clean_up_after - func_args = (rq_func, filename, db_obj.pk, format_name, conv_mask_to_poly) - - if location == Location.CLOUD_STORAGE: - func_args = (db_storage, key, func) + func_args - func = import_resource_from_cloud_storage - - av_scan_paths(filename) - user_id = request.user.id - - with get_rq_lock_by_user(queue, user_id): - meta = ImportRQMeta.build_for(request=request, db_obj=db_obj, tmp_file=filename) - queue.enqueue_call( - func=func, - args=func_args, - job_id=rq_id, - depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), - meta=meta, - result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), - failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() - ) - - # log events after releasing Redis lock - if not rq_job: - handle_dataset_import(db_obj, format_name=format_name, cloud_storage_id=db_storage.id if db_storage else None) - - serializer = RqIdSerializer(data={'rq_id': rq_id}) - serializer.is_valid(raise_exception=True) - - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - - # Deprecated logic, /api/requests API should be used instead - # https://greenbytes.de/tech/webdav/draft-ietf-httpapi-deprecation-header-latest.html#the-deprecation-http-response-header-field - deprecation_timestamp = int(datetime(2025, 2, 14, tzinfo=timezone.utc).timestamp()) - response_headers = { - "Deprecation": f"@{deprecation_timestamp}" - } - - rq_job_status = rq_job.get_status(refresh=False) - if RQJobStatus.FINISHED == rq_job_status: - rq_job.delete() - return Response(status=status.HTTP_201_CREATED, headers=response_headers) - elif RQJobStatus.FAILED == rq_job_status: - exc_info = process_failed_job(rq_job) - - import_error_prefix = f'{CvatImportError.__module__}.{CvatImportError.__name__}:' - if exc_info.startswith("Traceback") and import_error_prefix in exc_info: - exc_message = exc_info.split(import_error_prefix)[-1].strip() - return Response(data=exc_message, status=status.HTTP_400_BAD_REQUEST, headers=response_headers) - else: - return Response(data=exc_info, - status=status.HTTP_500_INTERNAL_SERVER_ERROR, headers=response_headers) - - return Response(status=status.HTTP_202_ACCEPTED, headers=response_headers) - -def _import_project_dataset( - request: ExtendedRequest, - rq_id_factory: Callable[..., RQId], - rq_func: Callable[..., None], - db_obj: Project, - format_name: str, - filename: str | None = None, - conv_mask_to_poly: bool = True, - location_conf: dict[str, Any] | None = None -): - format_desc = {f.DISPLAY_NAME: f - for f in dm.views.get_import_formats()}.get(format_name) - if format_desc is None: - raise serializers.ValidationError( - "Unknown input format '{}'".format(format_name)) - elif not format_desc.ENABLED: - return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) - - rq_id = rq_id_factory(id=db_obj.pk).render() - - queue: DjangoRQ = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - - if rq_job: - rq_job_status = rq_job.get_status(refresh=False) - if rq_job_status not in (RQJobStatus.FINISHED, RQJobStatus.FAILED): - return Response(status=status.HTTP_409_CONFLICT, data='Import job already exists') - - # for some reason the previous job has not been deleted - # (e.g the user closed the browser tab when job has been created - # but no one requests for checking status were not made) - rq_job.delete() - rq_job = None - - location = location_conf.get('location') if location_conf else None - db_storage = None - - if not filename and location != Location.CLOUD_STORAGE: - serializer = DatasetFileSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - dataset_file = serializer.validated_data['dataset_file'] - with NamedTemporaryFile( - prefix='cvat_{}'.format(db_obj.pk), - dir=settings.TMP_FILES_ROOT, - delete=False) as tf: - filename = tf.name - for chunk in dataset_file.chunks(): - tf.write(chunk) - - elif location == Location.CLOUD_STORAGE: - assert filename, 'The filename was not specified' - try: - storage_id = location_conf['storage_id'] - except KeyError: - raise serializers.ValidationError( - 'Cloud storage location was selected as the source,' - ' but cloud storage id was not specified') - db_storage = get_cloud_storage_for_import_or_export( - storage_id=storage_id, request=request, - is_default=location_conf['is_default']) - - key = filename - with NamedTemporaryFile( - prefix='cvat_{}'.format(db_obj.pk), - dir=settings.TMP_FILES_ROOT, - delete=False) as tf: - filename = tf.name - - func = import_resource_with_clean_up_after - func_args = (rq_func, filename, db_obj.pk, format_name, conv_mask_to_poly) - - if location == Location.CLOUD_STORAGE: - func_args = (db_storage, key, func) + func_args - func = import_resource_from_cloud_storage - - user_id = request.user.id - - with get_rq_lock_by_user(queue, user_id): - meta = ImportRQMeta.build_for(request=request, db_obj=db_obj, tmp_file=filename) - queue.enqueue_call( - func=func, - args=func_args, - job_id=rq_id, - meta=meta, - depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), - result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), - failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() - ) - - - handle_dataset_import(db_obj, format_name=format_name, cloud_storage_id=db_storage.id if db_storage else None) - - serializer = RqIdSerializer(data={'rq_id': rq_id}) - serializer.is_valid(raise_exception=True) - - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index b7500571ceb9..b436b02c4e61 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -5,27 +5,36 @@ import csv import os import uuid +from dataclasses import dataclass from datetime import datetime, timedelta, timezone -from logging import Logger +from functools import cached_property +from pathlib import Path +import attrs import clickhouse_connect -import django_rq from dateutil import parser from django.conf import settings +from django.utils import timezone from rest_framework import serializers, status from rest_framework.response import Response +from rest_framework.reverse import reverse +from rq import get_current_job +from cvat.apps.dataset_manager.util import TmpDirManager from cvat.apps.dataset_manager.views import log_exception from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.rq import RQMetaWithFailureInfo -from cvat.apps.engine.utils import sendfile +from cvat.apps.engine.rq import ExportRQMeta, RQMetaWithFailureInfo, define_dependent_job +from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.engine.utils import get_rq_lock_by_user, sendfile +from cvat.apps.redis_handler.background import AbstractExportableRequestManager +from cvat.apps.redis_handler.rq import RequestId slogger = ServerLogManager(__name__) DEFAULT_CACHE_TTL = timedelta(hours=1) -def _create_csv(query_params, output_filename, cache_ttl): +def _create_csv(query_params: dict): try: clickhouse_settings = settings.CLICKHOUSE["events"] @@ -65,79 +74,117 @@ def _create_csv(query_params, output_filename, cache_ttl): ) as client: result = client.query(query, parameters=parameters) + current_job = get_current_job() + output_filename = Path(TmpDirManager.TMP_ROOT) / current_job.id + with open(output_filename, "w", encoding="UTF8") as f: writer = csv.writer(f) writer.writerow(result.column_names) writer.writerows(result.result_rows) - archive_ctime = os.path.getctime(output_filename) - scheduler = django_rq.get_scheduler(settings.CVAT_QUEUES.EXPORT_DATA.value) - cleaning_job = scheduler.enqueue_in( - time_delta=cache_ttl, - func=_clear_export_cache, - file_path=output_filename, - file_ctime=archive_ctime, - logger=slogger.glob, - ) - slogger.glob.info( - f"The {output_filename} is created " - f"and available for downloading for the next {cache_ttl}. " - f"Export cache cleaning job is enqueued, id '{cleaning_job.id}'" - ) return output_filename except Exception: log_exception(slogger.glob) raise -def export(request, filter_query, queue_name): - action = request.query_params.get("action", None) - filename = request.query_params.get("filename", None) - - query_params = { - "org_id": filter_query.get("org_id", None), - "project_id": filter_query.get("project_id", None), - "task_id": filter_query.get("task_id", None), - "job_id": filter_query.get("job_id", None), - "user_id": filter_query.get("user_id", None), - "from": filter_query.get("from", None), - "to": filter_query.get("to", None), - } - - try: - if query_params["from"]: - query_params["from"] = parser.parse(query_params["from"]).timestamp() - except parser.ParserError: - raise serializers.ValidationError( - f"Cannot parse 'from' datetime parameter: {query_params['from']}" - ) - try: - if query_params["to"]: - query_params["to"] = parser.parse(query_params["to"]).timestamp() - except parser.ParserError: - raise serializers.ValidationError( - f"Cannot parse 'to' datetime parameter: {query_params['to']}" - ) - - if query_params["from"] and query_params["to"] and query_params["from"] > query_params["to"]: - raise serializers.ValidationError("'from' must be before than 'to'") - - # Set the default time interval to last 30 days - if not query_params["from"] and not query_params["to"]: - query_params["to"] = datetime.now(timezone.utc) - query_params["from"] = query_params["to"] - timedelta(days=30) +class EventsRequestId(RequestId): + @property + def user_id(self) -> int: + return self.extra["user_id"] + + +@attrs.define(kw_only=True) +class EventsRqJobManager(AbstractExportableRequestManager): + + filter_query: dict = attrs.field() + query_id: uuid.UUID = attrs.field(factory=uuid.uuid4) # temporary arg + + def build_request_id(self): + return EventsRequestId( + queue=self.QUEUE_NAME, + action="export", + target="events", + # id=uuid.uuid4(), + id=self.query_id, + ).render() + + def define_query_params(self) -> dict: + query_params = { + "org_id": self.filter_query.get("org_id", None), + "project_id": self.filter_query.get("project_id", None), + "task_id": self.filter_query.get("task_id", None), + "job_id": self.filter_query.get("job_id", None), + "user_id": self.filter_query.get("user_id", None), + "from": self.filter_query.get("from", None), + "to": self.filter_query.get("to", None), + } + try: + if query_params["from"]: + query_params["from"] = parser.parse(query_params["from"]).timestamp() + except parser.ParserError: + raise serializers.ValidationError( + f"Cannot parse 'from' datetime parameter: {query_params['from']}" + ) + try: + if query_params["to"]: + query_params["to"] = parser.parse(query_params["to"]).timestamp() + except parser.ParserError: + raise serializers.ValidationError( + f"Cannot parse 'to' datetime parameter: {query_params['to']}" + ) + + if ( + query_params["from"] + and query_params["to"] + and query_params["from"] > query_params["to"] + ): + raise serializers.ValidationError("'from' must be before than 'to'") + + # Set the default time interval to last 30 days + if not query_params["from"] and not query_params["to"]: + query_params["to"] = datetime.now(timezone.utc) + query_params["from"] = query_params["to"] - timedelta(days=30) + + return query_params + + def init_callback_with_params(self): + self.callback = _create_csv + query_params = self.define_query_params() + self.callback_args = (query_params,) + + def where_to_redirect(self) -> str: + return reverse("events-download-file", request=self.request) + + def get_result_filename(self): + if self.export_args.filename: + return self.export_args.filename + + timestamp = self.get_file_timestamp() + return f"logs_{timestamp}.csv" + + +# FUTURE-TODO: delete deprecated function +def export(request: ExtendedRequest, filter_query: dict): + action = request.query_params.get("action", None) if action not in (None, "download"): raise serializers.ValidationError("Unexpected action specified for the request") - query_id = request.query_params.get("query_id", None) or uuid.uuid4() - rq_id = f"export:csv-logs-{query_id}-by-{request.user}" + filename = request.query_params.get("filename", None) + + query_id = request.query_params.get("query_id") + manager = EventsRqJobManager( + request=request, filter_query=filter_query, **({"query_id": query_id} if query_id else {}) + ) + request_id = manager.build_request_id() + queue = manager.get_queue() + response_data = { - "query_id": query_id, + "query_id": manager.query_id, } - queue: django_rq.queues.DjangoRQ = django_rq.get_queue(queue_name) - rq_job = queue.fetch_job(rq_id) + rq_job = queue.fetch_job(request_id) if rq_job: if rq_job.is_finished: @@ -159,26 +206,9 @@ def export(request, filter_query, queue_name): else: return Response(data=response_data, status=status.HTTP_202_ACCEPTED) - ttl = DEFAULT_CACHE_TTL.total_seconds() - output_filename = os.path.join(settings.TMP_FILES_ROOT, f"{query_id}.csv") - queue.enqueue_call( - func=_create_csv, - args=(query_params, output_filename, DEFAULT_CACHE_TTL), - job_id=rq_id, - meta={}, - result_ttl=ttl, - failure_ttl=ttl, - ) + manager.init_request_args() + manager.validate_request() + manager.init_callback_with_params() + manager.setup_new_job(queue, request_id) return Response(data=response_data, status=status.HTTP_202_ACCEPTED) - - -def _clear_export_cache(file_path: str, file_ctime: float, logger: Logger) -> None: - try: - if os.path.exists(file_path) and os.path.getctime(file_path) == file_ctime: - os.remove(file_path) - - logger.info("Export cache file '{}' successfully removed".format(file_path)) - except Exception: - log_exception(logger) - raise diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index dcd62a531886..2e144a3cca87 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -45,7 +45,7 @@ SourceType, Task, ) -from cvat.apps.engine.rq import RQId, define_dependent_job +from cvat.apps.engine.rq import RequestId, define_dependent_job from cvat.apps.engine.serializers import LabeledDataSerializer from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import get_rq_lock_by_user, get_rq_lock_for_job @@ -607,7 +607,9 @@ def enqueue( job: Optional[int] = None, ) -> LambdaJob: queue = self._get_queue() - rq_id = RQId(RequestAction.AUTOANNOTATE, RequestTarget.TASK, task).render() + rq_id = RequestId( + queue=queue.name, action=RequestAction.AUTOANNOTATE, target=RequestTarget.TASK, id=task + ).render() # Ensure that there is no race condition when processing parallel requests. # Enqueuing an RQ job with (queue, user) lock but without (queue, rq_id) lock diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 7858e74e4254..a3e1afdbcbd4 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -18,12 +18,12 @@ import datumaro.util.annotation_util import datumaro.util.mask_tools import numpy as np -from attrs import asdict, define, fields_dict +from attrs import asdict, define, fields_dict, frozen from datumaro.util import dump_json, parse_json from django.conf import settings from django.db import transaction -from django.http import HttpResponseBadRequest from django_rq.queues import DjangoRQ as RqQueue +from rest_framework.serializers import ValidationError from scipy.optimize import linear_sum_assignment from cvat.apps.dataset_manager.bindings import ( @@ -43,6 +43,7 @@ Image, Job, JobType, + Project, RequestTarget, ShapeType, StageChoice, @@ -53,7 +54,7 @@ ) from cvat.apps.engine.rq import BaseRQMeta, define_dependent_job from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import get_rq_lock_by_user, get_rq_lock_for_job +from cvat.apps.engine.utils import get_rq_lock_by_user from cvat.apps.profiler import silk_profile from cvat.apps.quality_control import models from cvat.apps.quality_control.models import ( @@ -61,8 +62,8 @@ AnnotationConflictType, AnnotationType, ) -from cvat.apps.redis_handler.background import AbstractRQJobManager -from cvat.apps.redis_handler.rq import RQId +from cvat.apps.redis_handler.background import AbstractRequestManager +from cvat.apps.redis_handler.rq import RequestId class Serializable: @@ -2265,15 +2266,21 @@ def generate_report(self) -> ComparisonReport: ) -class QualityReportRQJobManager(AbstractRQJobManager): - _JOB_RESULT_TTL = 120 - _JOB_FAILURE_TTL = _JOB_RESULT_TTL - +@define(kw_only=True) +class QualityReportRQJobManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.QUALITY_REPORTS.value SUPPORTED_RESOURCES: ClassVar[set[RequestTarget]] = {RequestTarget.TASK} - def build_rq_id(self): - return RQId( + @property + def job_result_ttl(self): + return 120 + + @property + def job_failed_ttl(self): + return self.job_result_ttl + + def build_request_id(self): + return RequestId( queue=self.QUEUE_NAME, action="compute", target=self.resource, @@ -2281,33 +2288,27 @@ def build_rq_id(self): ).render() def validate_request(self): + super().validate_request() + if self.db_instance.dimension != DimensionType.DIM_2D: - return HttpResponseBadRequest("Quality reports are only supported in 2d tasks") + raise ValidationError("Quality reports are only supported in 2d tasks") gt_job = self.db_instance.gt_job if gt_job is None or not ( gt_job.stage == StageChoice.ACCEPTANCE and gt_job.state == StatusChoice.COMPLETED ): - return HttpResponseBadRequest( + raise ValidationError( "Quality reports require a Ground Truth job in the task " f"at the {StageChoice.ACCEPTANCE} stage " f"and in the {StatusChoice.COMPLETED} state" ) - def setup_background_job(self, queue: RqQueue, rq_id: str) -> None: - user_id = self.request.user.id - - with get_rq_lock_by_user(queue, user_id=user_id): - dependency = define_dependent_job(queue, user_id=user_id, rq_id=rq_id) - queue.enqueue( - QualityReportUpdateManager._check_task_quality, - task_id=self.db_instance.pk, - job_id=rq_id, - meta=BaseRQMeta.build(request=self.request, db_obj=self.db_instance), - result_ttl=self._JOB_RESULT_TTL, - failure_ttl=self._JOB_FAILURE_TTL, - depends_on=dependency, - ) + def init_callback_with_params(self): + self.callback = QualityReportUpdateManager._check_task_quality + self.callback_kwargs = { + "task_id": self.db_instance.pk, + } + class QualityReportUpdateManager: @classmethod diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index e8e5b6172294..45394f5ab861 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -24,7 +24,6 @@ from cvat.apps.engine.mixins import PartialUpdateModelMixin from cvat.apps.engine.models import Task from cvat.apps.engine.rq import BaseRQMeta -from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.utils import get_server_url from cvat.apps.quality_control import quality_reports as qc from cvat.apps.quality_control.models import ( @@ -44,6 +43,7 @@ QualityReportSerializer, QualitySettingsSerializer, ) +from cvat.apps.redis_handler.serializers import RequestIdSerializer @extend_schema(tags=["quality"]) @@ -244,7 +244,7 @@ def get_queryset(self): responses={ "201": QualityReportSerializer, "202": OpenApiResponse( - RqIdSerializer, + RequestIdSerializer, description=textwrap.dedent( """\ A quality report request has been enqueued, the request id is returned. @@ -277,13 +277,13 @@ def create(self, request, *args, **kwargs): except Task.DoesNotExist as ex: raise NotFound(f"Task {task_id} does not exist") from ex - manager = qc.QualityReportRQJobManager(task, request) + manager = qc.QualityReportRQJobManager(request=request, db_instance=task) return manager.process() else: deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) response_headers = {"Deprecation": f"@{deprecation_timestamp}"} - serializer = RqIdSerializer(data={"rq_id": rq_id}) + serializer = RequestIdSerializer(data={"rq_id": rq_id}) serializer.is_valid(raise_exception=True) rq_id = serializer.validated_data["rq_id"] rq_job = qc.QualityReportRQJobManager.get_job_by_id(rq_id) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index e90e4bb1e559..b996a2d170b5 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -2,125 +2,337 @@ # # SPDX-License-Identifier: MIT +import os.path as osp from abc import ABCMeta, abstractmethod -from typing import ClassVar, Optional +from dataclasses import asdict as dataclass_asdict +from dataclasses import dataclass +from datetime import datetime +from functools import cached_property +from types import NoneType +from typing import Any, Callable, ClassVar +from urllib.parse import quote +import attrs import django_rq +from django.conf import settings +from django.db.models import Model +from django.http.response import HttpResponseBadRequest +from django.utils import timezone from django_rq.queues import DjangoRQ, DjangoScheduler from rest_framework import status +from rest_framework.exceptions import MethodNotAllowed from rest_framework.response import Response +from rest_framework.serializers import ValidationError from rq.job import Job as RQJob from rq.job import JobStatus as RQJobStatus +from cvat.apps.dataset_manager.util import get_export_cache_lock + +# from cvat.apps.dataset_manager.views import get_export_cache_ttl +from cvat.apps.engine.location import Location, StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.serializers import RqIdSerializer +from cvat.apps.engine.models import RequestTarget +from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, define_dependent_job from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import get_rq_lock_for_job +from cvat.apps.engine.utils import ( + get_rq_lock_by_user, + get_rq_lock_for_job, + sendfile, +) +from cvat.apps.redis_handler.serializers import RequestIdSerializer slogger = ServerLogManager(__name__) -from django.conf import settings -from cvat.apps.engine.models import Job, Project, RequestSubresource, RequestTarget, Task +REQUEST_TIMEOUT = 60 +# it's better to return LockNotAvailableError instead of response with 504 status +LOCK_TTL = REQUEST_TIMEOUT - 5 +LOCK_ACQUIRE_TIMEOUT = LOCK_TTL - 5 -# TODO: describe here protocol - -class AbstractRQJobManager(metaclass=ABCMeta): +@attrs.define(kw_only=True) +class AbstractRequestManager(metaclass=ABCMeta): + SUPPORTED_RESOURCES: ClassVar[set[RequestTarget]] QUEUE_NAME: ClassVar[str] - SUPPORTED_RESOURCES: ClassVar[set[RequestSubresource]] + REQUEST_ID_KEY = "rq_id" + + # todo: frozen + request: ExtendedRequest = attrs.field() + user_id: int = attrs.field(init=False) + + callback: Callable = attrs.field(init=False, validator=attrs.validators.instance_of(Callable)) + callback_args: tuple | None = attrs.field(init=False, default=None) + callback_kwargs: dict[str, Any] | None = attrs.field(init=False, default=None) + + db_instance: Model | None = attrs.field(default=None) + resource: RequestTarget | None = attrs.field( + init=False, + default=None, + # validator=attrs.validators.in_({NoneType} | SUPPORTED_RESOURCES), + on_setattr=attrs.setters.validate, + ) + + @resource.validator + def validate_resource(self, attribute: attrs.Attribute, value: Any): + if value and value not in self.SUPPORTED_RESOURCES: + raise ValidationError(f"Unsupported resource: {self.resource}") + + def __attrs_post_init__(self): + self.user_id = self.request.user.id + + if self.db_instance is not None: + self.resource = RequestTarget(self.db_instance.__class__.__name__.lower()) @classmethod def get_queue(cls) -> DjangoRQ: return django_rq.get_queue(cls.QUEUE_NAME) + @property + @abstractmethod + def job_result_ttl(self) -> int: ... + + @property + @abstractmethod + def job_failed_ttl(self) -> int: ... + + @abstractmethod + def build_request_id(self): ... + @classmethod - # @abstractmethod - def validate_rq_id(rq_id: str, /) -> None: ... + def validate_request_id(rq_id: str, /) -> None: ... @classmethod - def get_job_by_id(cls, rq_id: str, /, *, validate: bool = True) -> RQJob | None: + def get_job_by_id(cls, id_: str, /, *, validate: bool = True) -> RQJob | None: if validate: try: - cls.validate_rq_id(rq_id) + cls.validate_request_id(id_) except Exception: return None queue = cls.get_queue() - return queue.fetch_job(rq_id) - - def __init__( - self, - db_instance: Project | Task | Job, - request: ExtendedRequest, - ) -> None: - """ - Args: - db_instance (Union[models.Project, models.Task, models.Job]): Model instance - request (ExtendedRequest): Incoming HTTP request - """ - self.db_instance = db_instance - self.request = request - self.resource = db_instance.__class__.__name__.lower() - if self.resource not in self.SUPPORTED_RESOURCES: - raise ValueError("Unexpected type of db_instance: {}".format(type(db_instance))) - - def handle_existing_rq_job( - self, rq_job: Optional[RQJob], queue: DjangoRQ - ) -> Optional[Response]: - if not rq_job: + return queue.fetch_job(id_) + + def init_request_args(self): + pass + + @abstractmethod + def init_callback_with_params(self) -> None: ... + + def validate_request(self) -> Response | None: + """Hook to run some validations before processing a request""" + + # TODO: uncomment + # if self.request.method != "POST": + # raise MethodNotAllowed( + # self.request.method, + # detail="Only POST requests can be used to initiate a background process" + # ) + + def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | None: + if not job: return None - rq_job_status = rq_job.get_status(refresh=False) + job_status = job.get_status(refresh=False) - if rq_job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED}: + if job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED}: return Response( data="Request is being processed", status=status.HTTP_409_CONFLICT, ) - if rq_job_status == RQJobStatus.DEFERRED: - rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) + if job_status == RQJobStatus.DEFERRED: + job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) - if rq_job_status == RQJobStatus.SCHEDULED: + if job_status == RQJobStatus.SCHEDULED: scheduler: DjangoScheduler = django_rq.get_scheduler(queue.name, queue=queue) # remove the job id from the set with scheduled keys - scheduler.cancel(rq_job) - rq_job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) + scheduler.cancel(job) + job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) - rq_job.delete() + job.delete() return None - def validate_request(self) -> Response | None: - """Hook to run some validations before processing a request""" - - def after_processing(self) -> None: - """Hook to run some actions (e.g. collect events) after processing a request""" + def build_meta(self): + return BaseRQMeta.build(request=self.request, db_obj=self.db_instance) - @abstractmethod - def setup_background_job(self, queue: DjangoRQ, rq_id: str) -> None: ... + def setup_new_job(self, queue: DjangoRQ, id_: str, /): + with get_rq_lock_by_user(queue, self.user_id): + queue.enqueue_call( + func=self.callback, + args=self.callback_args, + kwargs=self.callback_kwargs, + job_id=id_, + meta=self.build_meta(), + depends_on=define_dependent_job(queue, self.user_id, rq_id=id_), + result_ttl=self.job_result_ttl, + failure_ttl=self.job_failed_ttl, + ) - @abstractmethod - def build_rq_id(self): ... + def finalize_request(self) -> None: + """Hook to run some actions (e.g. collect events) after processing a request""" - def get_response(self, rq_id: str) -> Response: - serializer = RqIdSerializer({"rq_id": rq_id}) + def get_response(self, id_: str) -> Response: + serializer = RequestIdSerializer({"rq_id": id_}) return Response(serializer.data, status=status.HTTP_202_ACCEPTED) def process(self) -> Response: - if invalid_response := self.validate_request(): - return invalid_response + self.init_request_args() + self.validate_request() + self.init_callback_with_params() queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) - rq_id = self.build_rq_id() + request_id = self.build_request_id() # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) + with get_rq_lock_for_job(queue, request_id): + job = queue.fetch_job(request_id) - if response := self.handle_existing_rq_job(rq_job, queue): + if response := self.handle_existing_job(job, queue): return response - self.setup_background_job(queue, rq_id) + self.setup_new_job(queue, request_id) + + self.finalize_request() + return self.get_response(request_id) + + +class AbstractExportableRequestManager(AbstractRequestManager): + QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value + + @property + def job_result_ttl(self): + from cvat.apps.dataset_manager.views import get_export_cache_ttl + + return int(get_export_cache_ttl(self.db_instance).total_seconds()) + + @property + def job_failed_ttl(self): + return self.job_result_ttl + + @dataclass + class ExportArgs: + filename: str | None + location_config: dict[str, Any] + + @property + def location(self) -> Location: + return self.location_config["location"] + + def to_dict(self): + return dataclass_asdict(self) + + @abstractmethod + def get_result_filename(self) -> str: ... + + @abstractmethod + def where_to_redirect(self) -> str: ... + + def make_result_url(self, *, request_id: str) -> str: + return self.where_to_redirect() + f"?{self.REQUEST_ID_KEY}={quote(request_id)}" + + def get_file_timestamp(self) -> str: + # use only updated_date for the related resource, don't check children objects + # because every child update should touch the updated_date of the parent resource + date = self.db_instance.updated_date if self.db_instance else timezone.now() + return datetime.strftime(date, "%Y_%m_%d_%H_%M_%S") + + def init_request_args(self) -> None: + try: + location_config = get_location_configuration( + db_instance=self.db_instance, + query_params=self.request.query_params, + field_name=StorageType.TARGET, + ) + except ValueError as ex: + raise ValidationError(str(ex)) from ex + + location = location_config["location"] + + if location not in Location.list(): + raise ValidationError(f"Unexpected location {location} specified for the request") + + self.export_args = AbstractExportableRequestManager.ExportArgs( + location_config=location_config, filename=self.request.query_params.get("filename") + ) + + def build_meta(self, *, request_id: str): + return ExportRQMeta.build_for( + request=self.request, + db_obj=self.db_instance, + result_url=( + self.make_result_url(request_id=request_id) + if self.export_args.location != Location.CLOUD_STORAGE + else None + ), + result_filename=self.get_result_filename(), + ) + + # TODO:refactor and fix for import too + def setup_new_job(self, queue: DjangoRQ, id_: str, /): + with get_rq_lock_by_user(queue, self.user_id): + queue.enqueue_call( + func=self.callback, + args=self.callback_args, + kwargs=self.callback_kwargs, + job_id=id_, + meta=self.build_meta(request_id=id_), + depends_on=define_dependent_job(queue, self.user_id, rq_id=id_), + result_ttl=self.job_result_ttl, + failure_ttl=self.job_failed_ttl, + ) + + def download_file(self) -> Response: + queue = self.get_queue() + request_id = self.request.query_params.get(self.REQUEST_ID_KEY) + + if not request_id: + return HttpResponseBadRequest("Missing request id in the query parameters") + + try: + self.validate_request_id(request_id) + except ValueError: + return HttpResponseBadRequest("Invalid export request id") + + # ensure that there is no race condition when processing parallel requests + with get_rq_lock_for_job(queue, request_id): + job = queue.fetch_job(request_id) + + if not job: + return HttpResponseBadRequest("Unknown export request id") + + # define status once to avoid refreshing it on each check + # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases + job_status = job.get_status(refresh=False) + + if job_status != RQJobStatus.FINISHED: + return HttpResponseBadRequest("The export process is not finished") + + job_meta = ExportRQMeta.for_job(job) + file_path = job.return_value() + + if not file_path: + return ( + Response( + "A result for exporting job was not found for finished RQ job", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + if job_meta.result_url # user tries to download a final file locally while the export is made to cloud storage + else HttpResponseBadRequest( + "The export process has no result file to be downloaded locally" + ) + ) + + with get_export_cache_lock( + file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT + ): + if not osp.exists(file_path): + return Response( + "The exported file has expired, please retry exporting", + status=status.HTTP_404_NOT_FOUND, + ) - self.after_processing() - return self.get_response(rq_id) + return sendfile( + self.request, + file_path, + attachment=True, + attachment_filename=job_meta.result_filename, + ) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index c164d9ca6a22..9067876f69ad 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -8,7 +8,6 @@ from django.conf import settings from rest_framework.exceptions import PermissionDenied -from rest_framework.serializers import ValidationError from rq.job import Job as RQJob from cvat.apps.engine.rq import is_rq_job_owner @@ -20,7 +19,6 @@ from cvat.apps.engine.models import RequestTarget from cvat.apps.engine.permissions import JobPermission, TaskPermission -from cvat.apps.redis_handler.rq import RQId class RequestPermission(OpenPolicyAgentPermission): diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index b7d09f000fd4..d705477fa807 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -1,13 +1,10 @@ from __future__ import annotations -from typing import ClassVar, Any +import base64 +from typing import Any, ClassVar from uuid import UUID import attrs - - -import base64 - from django.conf import settings @@ -33,9 +30,10 @@ def convert_extra(value: dict) -> dict[str, Any]: @attrs.frozen(kw_only=True) -class RQId: +class RequestId: FIELD_SEP: ClassVar[str] = "&" KEY_VAL_SEP: ClassVar[str] = "=" + TYPE_SEP: ClassVar[str] = ":" queue: settings.CVAT_QUEUES = attrs.field(converter=settings.CVAT_QUEUES) action: str = attrs.field(validator=attrs.validators.instance_of(str)) @@ -50,17 +48,18 @@ class RQId: @property def type(self) -> str: - return ":".join([self.action, self.target]) + return self.TYPE_SEP.join([self.action, self.target]) - @classmethod - def from_base(cls, parsed_id: RQId, /): + # @classmethod + # def from_base(cls, parsed_id: RequestId, /): + def convert_to(self, child_class: type[RequestId], /): # method is going to be used by child classes - return cls( - queue=parsed_id.queue, - action=parsed_id.action, - target=parsed_id.target, - id=parsed_id.id, - extra=parsed_id.extra, + return child_class( + queue=self.queue, + action=self.action, + target=self.target, + id=self.id, + extra=self.extra, ) def render(self) -> str: @@ -88,8 +87,8 @@ def parse(cls, rq_id: str, /): keys = set(attrs.fields_dict(cls).keys()) - {"extra"} params = {} - for pair in decoded_rq_id.split(RQId.FIELD_SEP): - key, value = pair.split(RQId.KEY_VAL_SEP, maxsplit=1) + for pair in decoded_rq_id.split(RequestId.FIELD_SEP): + key, value = pair.split(RequestId.KEY_VAL_SEP, maxsplit=1) if key in keys: params[key] = value else: diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py new file mode 100644 index 000000000000..591ff24a6728 --- /dev/null +++ b/cvat/apps/redis_handler/serializers.py @@ -0,0 +1,176 @@ +# Copyright (C) CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +from datetime import timedelta +from decimal import Decimal +from typing import Any + +import rq.defaults as rq_defaults +from django.conf import settings +from django.utils import timezone +from drf_spectacular.utils import extend_schema_field +from rest_framework import serializers +from rq.job import Job as RQJob +from rq.job import JobStatus as RQJobStatus + +from cvat.apps.engine import models +from cvat.apps.engine.log import ServerLogManager +from cvat.apps.engine.models import RequestAction, RequestSubresource +from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta +from cvat.apps.engine.serializers import BasicUserSerializer +from cvat.apps.engine.utils import ( + parse_exception_message, +) +from cvat.apps.lambda_manager.rq import LambdaRQMeta +from cvat.apps.redis_handler.rq import RequestId + +slogger = ServerLogManager(__name__) + + +class RequestIdSerializer(serializers.Serializer): + rq_id = serializers.CharField(help_text="Request id") + + +class UserIdentifiersSerializer(BasicUserSerializer): + class Meta(BasicUserSerializer.Meta): + fields = ( + "id", + "username", + ) + + +class RequestDataOperationSerializer(serializers.Serializer): + type = serializers.CharField() + target = serializers.ChoiceField(choices=models.RequestTarget.choices) + project_id = serializers.IntegerField(required=False, allow_null=True) + task_id = serializers.IntegerField(required=False, allow_null=True) + job_id = serializers.IntegerField(required=False, allow_null=True) + format = serializers.CharField(required=False, allow_null=True) + function_id = serializers.CharField(required=False, allow_null=True) + + def to_representation(self, rq_job: RQJob) -> dict[str, Any]: + parsed_rq_id: RequestId = rq_job.parsed_rq_id + + base_rq_job_meta = BaseRQMeta.for_job(rq_job) + representation = { + "type": parsed_rq_id.type, + "target": parsed_rq_id.target, + "project_id": base_rq_job_meta.project_id, + "task_id": base_rq_job_meta.task_id, + "job_id": base_rq_job_meta.job_id, + } + if parsed_rq_id.action == RequestAction.AUTOANNOTATE: + representation["function_id"] = LambdaRQMeta.for_job(rq_job).function_id + elif parsed_rq_id.action in ( + RequestAction.IMPORT, + RequestAction.EXPORT, + ) and parsed_rq_id.subresource in ( + RequestSubresource.ANNOTATIONS, + RequestSubresource.DATASET, + ): + representation["format"] = parsed_rq_id.format + + return representation + + +class RequestSerializer(serializers.Serializer): + # SerializerMethodField is not used here to mark "status" field as required and fix schema generation. + # Marking them as read_only leads to generating type as allOf with one reference to RequestStatus component. + # The client generated using openapi-generator from such a schema contains wrong type like: + # status (bool, date, datetime, dict, float, int, list, str, none_type): [optional] + status = serializers.ChoiceField(source="get_status", choices=models.RequestStatus.choices) + message = serializers.SerializerMethodField() + id = serializers.CharField() + operation = RequestDataOperationSerializer(source="*") + progress = serializers.SerializerMethodField() + created_date = serializers.DateTimeField(source="created_at") + started_date = serializers.DateTimeField( + required=False, + allow_null=True, + source="started_at", + ) + finished_date = serializers.DateTimeField( + required=False, + allow_null=True, + source="ended_at", + ) + expiry_date = serializers.SerializerMethodField() + owner = serializers.SerializerMethodField() + result_url = serializers.URLField(required=False, allow_null=True) + result_id = serializers.IntegerField(required=False, allow_null=True) + + def __init__(self, *args, **kwargs): + self._base_rq_job_meta: BaseRQMeta | None = None + super().__init__(*args, **kwargs) + + @extend_schema_field(UserIdentifiersSerializer()) + def get_owner(self, rq_job: RQJob) -> dict[str, Any]: + assert self._base_rq_job_meta + return UserIdentifiersSerializer(self._base_rq_job_meta.user).data + + @extend_schema_field( + serializers.FloatField(min_value=0, max_value=1, required=False, allow_null=True) + ) + def get_progress(self, rq_job: RQJob) -> Decimal: + rq_job_meta = ImportRQMeta.for_job(rq_job) + # progress of task creation is stored in "task_progress" field + # progress of project import is stored in "progress" field + return Decimal(rq_job_meta.progress or rq_job_meta.task_progress or 0.0) + + @extend_schema_field(serializers.DateTimeField(required=False, allow_null=True)) + def get_expiry_date(self, rq_job: RQJob) -> str | None: + delta = None + if rq_job.is_finished: + delta = rq_job.result_ttl or rq_defaults.DEFAULT_RESULT_TTL + elif rq_job.is_failed: + delta = rq_job.failure_ttl or rq_defaults.DEFAULT_FAILURE_TTL + + if rq_job.ended_at and delta: + expiry_date = rq_job.ended_at + timedelta(seconds=delta) + return expiry_date.replace(tzinfo=timezone.utc) + + return None + + @extend_schema_field(serializers.CharField(allow_blank=True)) + def get_message(self, rq_job: RQJob) -> str: + # TODO: from cvat.apps.engine.utils import parse_exception_message + + assert self._base_rq_job_meta + rq_job_status = rq_job.get_status() + message = "" + + if RQJobStatus.STARTED == rq_job_status: + message = self._base_rq_job_meta.status or message + elif RQJobStatus.FAILED == rq_job_status: + + message = self._base_rq_job_meta.formatted_exception or parse_exception_message( + str(rq_job.exc_info or "Unknown error") + ) + + return message + + def to_representation(self, rq_job: RQJob) -> dict[str, Any]: + self._base_rq_job_meta = BaseRQMeta.for_job(rq_job) + representation = super().to_representation(rq_job) + + # FUTURE-TODO: support such statuses on UI + if representation["status"] in (RQJobStatus.DEFERRED, RQJobStatus.SCHEDULED): + representation["status"] = RQJobStatus.QUEUED + + if representation["status"] == RQJobStatus.FINISHED: + + # TODO: move into a custom Job class + if rq_job.parsed_rq_id.action == models.RequestAction.EXPORT: + representation["result_url"] = ExportRQMeta.for_job(rq_job).result_url + + if ( + rq_job.parsed_rq_id.action == models.RequestAction.IMPORT + and rq_job.parsed_rq_id.subresource == models.RequestSubresource.BACKUP + or rq_job.parsed_rq_id.queue == settings.CVAT_QUEUES.QUALITY_REPORTS + ): + representation["result_id"] = rq_job.return_value() + + return representation diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 83f45b9e05d4..7644ea612dba 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -6,6 +6,7 @@ from django.conf import settings from django.http import HttpResponseBadRequest, HttpResponseNotFound from django.utils.decorators import method_decorator +from django.utils.module_loading import import_string from django.views.decorators.cache import never_cache from django_rq.queues import DjangoRQ from drf_spectacular.utils import OpenApiResponse, extend_schema, extend_schema_view @@ -23,12 +24,15 @@ NonModelSimpleFilter, ) from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import RequestAction, RequestStatus, RequestTarget +from cvat.apps.engine.models import ( # todo: move to the app + RequestAction, + RequestStatus, + RequestTarget, +) from cvat.apps.engine.rq import is_rq_job_owner -from cvat.apps.engine.serializers import RequestSerializer from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.rq import RQId -from django.utils.module_loading import import_string +from cvat.apps.redis_handler.rq import RequestId +from cvat.apps.redis_handler.serializers import RequestSerializer slogger = ServerLogManager(__name__) @@ -124,8 +128,8 @@ def queues(self) -> Iterable[DjangoRQ]: return (django_rq.get_queue(queue_name) for queue_name in self.SUPPORTED_QUEUES) @classmethod - def get_parsed_id_class(cls, queue_name: str) -> type[RQId]: - return cls.PARSED_JOB_ID_CLASSES.get(queue_name, RQId) + def get_parsed_id_class(cls, queue_name: str) -> type[RequestId]: + return cls.PARSED_JOB_ID_CLASSES.get(queue_name, RequestId) def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: job_ids = set( @@ -182,7 +186,7 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: Optional[RQJob]: The retrieved RQJob, or None if not found. """ try: - parsed_rq_id = RQId.parse(rq_id) + parsed_rq_id = RequestId.parse(rq_id) except Exception: return None @@ -197,7 +201,7 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: if job: ParsedIdClass = self.get_parsed_id_class(queue.name) if type(parsed_rq_id) is not ParsedIdClass: - parsed_rq_id = ParsedIdClass.from_base(parsed_rq_id) + parsed_rq_id = parsed_rq_id.convert_to(ParsedIdClass) job.parsed_rq_id = parsed_rq_id diff --git a/cvat/settings/base.py b/cvat/settings/base.py index f9a5d2eb3e69..23cbc7c9a551 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -297,13 +297,13 @@ class CVAT_QUEUES(Enum): "DEFAULT_TIMEOUT": "4h", # custom fields "VISIBLE_VIA_REQUESTS_API": True, - "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ImportRQId", + "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ImportRequestId", }, CVAT_QUEUES.EXPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", "VISIBLE_VIA_REQUESTS_API": True, - "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ExportRQId", + "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ExportRequestId", }, CVAT_QUEUES.AUTO_ANNOTATION.value: { **REDIS_INMEM_SETTINGS, diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index 9f504e559f90..22ad4d45253c 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -389,7 +389,7 @@ def test_user_without_rights_cannot_check_status_of_merge_in_org( @pytest.mark.parametrize( "role", - # owner and maintainer has rights even without being assigned to a task + # owner and maintainer have rights even without being assigned to a task ("supervisor", "worker"), ) def test_task_assignee_can_check_status_of_merge_in_org( diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py index 04178fdcb2fd..39d6eecba8ae 100644 --- a/tests/python/rest_api/test_quality_control.py +++ b/tests/python/rest_api/test_quality_control.py @@ -603,7 +603,7 @@ def _test_check_status_of_report_creation( @pytest.mark.parametrize( "role", - # owner and maintainer has rights even without being assigned to a task + # owner and maintainer have rights even without being assigned to a task ("supervisor", "worker"), ) def test_task_assignee_can_check_status_of_report_creation_in_org( From 7b75bdc33c0873ba8a76790c1c15d13bb18c9f2e Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 25 Mar 2025 13:50:36 +0100 Subject: [PATCH 012/103] Update task creation flow && add request validation when exporting something --- cvat/apps/dataset_manager/project.py | 2 +- cvat/apps/engine/background.py | 126 +++++++++++++++----------- cvat/apps/engine/backup.py | 4 +- cvat/apps/engine/mixins.py | 10 +- cvat/apps/engine/task.py | 37 +------- cvat/apps/engine/views.py | 9 +- cvat/apps/redis_handler/background.py | 51 +++++++---- 7 files changed, 122 insertions(+), 117 deletions(-) diff --git a/cvat/apps/dataset_manager/project.py b/cvat/apps/dataset_manager/project.py index 7d12ff96830b..e81f81bcf7e4 100644 --- a/cvat/apps/dataset_manager/project.py +++ b/cvat/apps/dataset_manager/project.py @@ -20,7 +20,7 @@ from cvat.apps.engine.model_utils import bulk_create from cvat.apps.engine.rq import ImportRQMeta from cvat.apps.engine.serializers import DataSerializer, TaskWriteSerializer -from cvat.apps.engine.task import _create_thread as create_task +from cvat.apps.engine.task import create_thread as create_task from cvat.apps.engine.utils import av_scan_paths from .annotation import AnnotationIR diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index be36b4dabfeb..52c3c624faa3 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -39,6 +39,7 @@ from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( + Data, Job, Location, Project, @@ -56,6 +57,7 @@ define_dependent_job, ) from cvat.apps.engine.serializers import UploadedFileSerializer, UploadedZipFileSerializer +from cvat.apps.engine.task import create_thread as create_task from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import ( build_annotations_file_name, @@ -68,6 +70,7 @@ AbstractExportableRequestManager, AbstractRequestManager, ) +from cvat.apps.redis_handler.rq import RequestId slogger = ServerLogManager(__name__) @@ -84,7 +87,7 @@ def cancel_and_delete(rq_job: RQJob) -> None: rq_job.delete() -class DatasetExportManager(AbstractExportableRequestManager): +class DatasetExporter(AbstractExportableRequestManager): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} @dataclass @@ -131,8 +134,8 @@ def build_request_id(self): }, ).render() - def validate_request_id(self, rq_id: str) -> None: - parsed_rq_id = ExportRequestId.parse(rq_id) + def validate_request_id(self, request_id, /) -> None: + parsed_rq_id = ExportRequestId.parse(request_id) if ( parsed_rq_id.action != RequestAction.EXPORT @@ -201,11 +204,11 @@ def where_to_redirect(self) -> str: ) -class BackupExportManager(AbstractExportableRequestManager): +class BackupExporter(AbstractExportableRequestManager): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} - def validate_request_id(self, rq_id: str) -> None: - parsed_rq_id = ExportRequestId.parse(rq_id) + def validate_request_id(self, request_id, /) -> None: + parsed_rq_id = ExportRequestId.parse(request_id) if ( parsed_rq_id.action != RequestAction.EXPORT @@ -234,7 +237,6 @@ def init_callback_with_params(self): if self.export_args.location == Location.CLOUD_STORAGE: storage_id = self.export_args.location_config["storage_id"] - # TODO: move into validation? db_storage = get_cloud_storage_for_import_or_export( storage_id=storage_id, request=self.request, @@ -284,14 +286,15 @@ def finalize_request(self): @attrs.define(kw_only=True) -class ResourceImportManager(AbstractRequestManager): +class ResourceImporter(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.IMPORT_DATA.value - # SUPPORTED_SUBRESOURCES: ClassVar[set[RequestSubresource]] upload_serializer_class: type[UploadedFileSerializer | UploadedZipFileSerializer] = attrs.field( init=False ) + tmp_dir: Path = attrs.field(init=False) + @property def job_result_ttl(self): return int(settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds()) @@ -303,6 +306,7 @@ def job_failed_ttl(self): @dataclass class ImportArgs: location_config: dict[str, Any] + file_path: str | None @property def location(self) -> Location: @@ -312,6 +316,10 @@ def to_dict(self): return dataclass_asdict(self) def init_request_args(self): + super().init_request_args() + filename = self.request.query_params.get("filename") + file_path = (self.tmp_dir / filename) if filename else None + try: location_config = get_location_configuration( db_instance=self.db_instance, @@ -321,15 +329,29 @@ def init_request_args(self): except ValueError as ex: raise ValidationError(str(ex)) from ex - location = location_config["location"] - - if location not in Location.list(): - raise ValidationError(f"Unexpected location {location} specified for the request") - - self.import_args = ResourceImportManager.ImportArgs( + self.import_args = self.ImportArgs( location_config=location_config, + file_path=file_path, ) + def validate_request(self): + super().validate_request() + + if self.import_args.location not in Location.list(): + raise ValidationError( + f"Unexpected location {self.import_args.location} specified for the request" + ) + + if self.import_args.location == Location.CLOUD_STORAGE: + if not self.import_args.file_path: + raise ValidationError("The filename was not specified") + + if self.import_args.location_config.get("storage_id") is None: + raise ValidationError( + "Cloud storage location was selected as the source," + + " but cloud storage id was not specified" + ) + def _handle_cloud_storage_file_upload(self): storage_id = self.import_args.location_config["storage_id"] db_storage = get_cloud_storage_for_import_or_export( @@ -355,35 +377,31 @@ def _handle_non_tus_file_upload(self): @attrs.define(kw_only=True) -class DatasetImporter(ResourceImportManager): +class DatasetImporter(ResourceImporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} - # SUPPORTED_SUBRESOURCES = {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} @dataclass - class ImportArgs(ResourceImportManager.ImportArgs): + class ImportArgs(ResourceImporter.ImportArgs): format: str - file_path: str | None conv_mask_to_poly: bool def __attrs_post_init__(self) -> None: + super().__attrs_post_init__() self.upload_serializer_class = ( UploadedZipFileSerializer if isinstance(self.db_instance, Project) else UploadedFileSerializer ) + self.tmp_dir = Path(self.db_instance.get_tmp_dirname()) def init_request_args(self) -> None: super().init_request_args() format_name = self.request.query_params.get("format", "") conv_mask_to_poly = to_bool(self.request.query_params.get("conv_mask_to_poly", True)) - filename = self.request.query_params.get("filename") - tmp_dir = Path(self.db_instance.get_tmp_dirname()) - self.import_args = self.ImportArgs( **self.import_args.to_dict(), format=format_name, - file_path=str(Path(tmp_dir) / filename) if filename else None, conv_mask_to_poly=conv_mask_to_poly, ) @@ -423,16 +441,6 @@ def validate_request(self): elif not format_desc.ENABLED: raise MethodNotAllowed(self.request.method, detail="Format is disabled") - if self.import_args.location == Location.CLOUD_STORAGE: - if not self.import_args.file_path: - raise ValidationError("The filename was not specified") - - if self.import_args.location_config.get("storage_id") is None: - raise ValidationError( - "Cloud storage location was selected as the source," - + " but cloud storage id was not specified" - ) - def build_request_id(self): return ExportRequestId( queue=self.QUEUE_NAME, @@ -462,44 +470,30 @@ def finalize_request(self): @attrs.define(kw_only=True) -class BackupImporter(ResourceImportManager): +class BackupImporter(ResourceImporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} - # SUPPORTED_SUBRESOURCES = {RequestSubresource.BACKUP} resource: RequestTarget = attrs.field(validator=attrs.validators.in_(SUPPORTED_RESOURCES)) + upload_serializer_class: type[UploadedZipFileSerializer] = attrs.field( + init=False, default=UploadedZipFileSerializer + ) @dataclass - class ImportArgs(ResourceImportManager.ImportArgs): - file_path: str | None + class ImportArgs(ResourceImporter.ImportArgs): org_id: int | None def __attrs_post_init__(self) -> None: - self.upload_serializer_class = UploadedZipFileSerializer + super().__attrs_post_init__() + self.tmp_dir = Path(TmpDirManager.TMP_ROOT) def init_request_args(self) -> None: super().init_request_args() - filename = self.request.query_params.get("filename") - tmp_dir = Path(TmpDirManager.TMP_ROOT) self.import_args = self.ImportArgs( **self.import_args.to_dict(), - file_path=str(Path(tmp_dir) / filename) if filename else None, org_id=getattr(self.request.iam_context["organization"], "id", None), ) - def validate_request(self): - super().validate_request() - - if self.import_args.location == Location.CLOUD_STORAGE: - if not self.import_args.file_path: - raise ValidationError("The filename was not specified") - - if self.import_args.location_config.get("storage_id") is None: - raise ValidationError( - "Cloud storage location was selected as the source," - + " but cloud storage id was not specified" - ) - def build_request_id(self): return ImportRequestId( queue=self.QUEUE_NAME, @@ -528,3 +522,27 @@ def init_callback_with_params(self): # FUTURE-TODO: send logs to event store def finalize_request(self): pass + + +@attrs.define(kw_only=True) +class TaskCreator(AbstractRequestManager): + QUEUE_NAME = settings.CVAT_QUEUES.IMPORT_DATA.value + SUPPORTED_RESOURCES = {RequestTarget.TASK} + + db_data: Data = attrs.field() + + @property + def job_failure_ttl(self): + return int(settings.IMPORT_CACHE_FAILED_TTL.total_seconds()) + + def build_request_id(self): + return RequestId( + queue=self.QUEUE_NAME, + action=RequestAction.CREATE, + target=RequestTarget.TASK, + id=self.db_instance.pk, + ).render() + + def init_callback_with_params(self): + self.callback = create_task + self.callback_args = (self.db_instance.pk, self.db_data) diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 9f2de942d0db..949918ba9678 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -66,7 +66,7 @@ TaskReadSerializer, ValidationParamsSerializer, ) -from cvat.apps.engine.task import JobFileMapping, _create_thread +from cvat.apps.engine.task import JobFileMapping, create_thread as create_task from cvat.apps.engine.utils import av_scan_paths slogger = ServerLogManager(__name__) @@ -892,7 +892,7 @@ def _import_task(self): if validation_params: data['validation_params'] = validation_params - _create_thread(self._db_task.pk, data.copy(), is_backup_restore=True) + create_task(self._db_task.pk, data.copy(), is_backup_restore=True) self._db_task.refresh_from_db() db_data.refresh_from_db() diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 6455a3a799b0..94f2b0bd99ed 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -23,7 +23,7 @@ from rest_framework.decorators import action from rest_framework.response import Response -from cvat.apps.engine.background import BackupExportManager, DatasetExportManager +from cvat.apps.engine.background import BackupExporter, DatasetExporter from cvat.apps.engine.handlers import clear_import_cache from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( @@ -453,7 +453,7 @@ class DatasetMixin: def initiate_dataset_export(self, request: ExtendedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() - export_manager = DatasetExportManager(request=request, db_instance=self._object) + export_manager = DatasetExporter(request=request, db_instance=self._object) return export_manager.process() @extend_schema(summary='Download a prepared dataset file', @@ -469,7 +469,7 @@ def initiate_dataset_export(self, request: ExtendedRequest, pk: int): @action(methods=['GET'], detail=True, url_path='dataset/download') def download_dataset(self, request: ExtendedRequest, pk: int): obj = self.get_object() # force to call check_object_permissions - export_manager = DatasetExportManager(request=request, db_instance=obj) + export_manager = DatasetExporter(request=request, db_instance=obj) return export_manager.download_file() @@ -500,7 +500,7 @@ class BackupMixin: @action(detail=True, methods=['POST'], serializer_class=None, url_path='backup/export') def initiate_backup_export(self, request: ExtendedRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions - export_manager = BackupExportManager(request=request, db_instance=db_object) + export_manager = BackupExporter(request=request, db_instance=db_object) return export_manager.process() @@ -517,5 +517,5 @@ def initiate_backup_export(self, request: ExtendedRequest, pk: int): @action(methods=['GET'], detail=True, url_path='backup/download') def download_backup(self, request: ExtendedRequest, pk: int): obj = self.get_object() # force to call check_object_permissions - export_manager = BackupExportManager(request=request, db_instance=obj) + export_manager = BackupExporter(request=request, db_instance=obj) return export_manager.download_file() diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index dab13088bf1f..020ef1229a51 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -20,7 +20,6 @@ import attrs import av -import django_rq import rq from django.conf import settings from django.db import transaction @@ -46,11 +45,9 @@ sort, ) from cvat.apps.engine.model_utils import bulk_create -from cvat.apps.engine.models import RequestAction, RequestTarget -from cvat.apps.engine.rq import ImportRQMeta, RQId, define_dependent_job +from cvat.apps.engine.rq import ImportRQMeta from cvat.apps.engine.task_validation import HoneypotFrameSelector -from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import av_scan_paths, format_list, get_rq_lock_by_user, take_by +from cvat.apps.engine.utils import av_scan_paths, format_list, take_by from cvat.utils.http import PROXIES_FOR_UNTRUSTED_URLS, make_requests_session from utils.dataset_manifest import ImageManifestManager, VideoManifestManager, is_manifest from utils.dataset_manifest.core import VideoManifestValidator, is_dataset_manifest @@ -60,34 +57,6 @@ slogger = ServerLogManager(__name__) -############################# Low Level server API - -def create( - db_task: models.Task, - data: models.Data, - request: ExtendedRequest, -) -> str: - """Schedule a background job to create a task and return that job's identifier""" - q = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) - user_id = request.user.id - rq_id = RQId( - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - action=RequestAction.CREATE, - target=RequestTarget.TASK, - id=db_task.pk - ).render() - - with get_rq_lock_by_user(q, user_id): - q.enqueue_call( - func=_create_thread, - args=(db_task.pk, data), - job_id=rq_id, - meta=ImportRQMeta.build_for(request=request, db_obj=db_task), - depends_on=define_dependent_job(q, user_id), - failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds(), - ) - - return rq_id ############################# Internal implementation for server API @@ -579,7 +548,7 @@ def _create_task_manifest_from_cloud_data( manifest.create() @transaction.atomic -def _create_thread( +def create_thread( db_task: Union[int, models.Task], data: dict[str, Any], *, diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 186e2f68bec3..92c412f0a1f4 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -53,7 +53,7 @@ import cvat.apps.dataset_manager.views # pylint: disable=unused-import from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine import backup -from cvat.apps.engine.background import BackupImporter, DatasetImporter +from cvat.apps.engine.background import BackupImporter, DatasetImporter, TaskCreator from cvat.apps.engine.backup import import_project, import_task from cvat.apps.engine.cache import CvatChunkTimestampMismatchError, LockError, MediaCache from cvat.apps.engine.cloud_provider import db_storage_to_storage_instance @@ -154,7 +154,7 @@ from cvat.apps.redis_handler.serializers import RequestIdSerializer from utils.dataset_manifest import ImageManifestManager -from . import models, task +from . import models from .log import ServerLogManager slogger = ServerLogManager(__name__) @@ -1033,9 +1033,8 @@ def _handle_upload_data(request: ExtendedRequest): data['stop_frame'] = None # Need to process task data when the transaction is committed - rq_id = task.create(self._object, data, request) - rq_id_serializer = RequestIdSerializer({'rq_id': rq_id}) - return Response(rq_id_serializer.data, status=status.HTTP_202_ACCEPTED) + creator = TaskCreator(request=request, db_instance=self._object, db_data=data) + return creator.process() @transaction.atomic def _handle_upload_backup(request: ExtendedRequest): diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index b996a2d170b5..f43b46fafd9e 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -67,7 +67,6 @@ class AbstractRequestManager(metaclass=ABCMeta): resource: RequestTarget | None = attrs.field( init=False, default=None, - # validator=attrs.validators.in_({NoneType} | SUPPORTED_RESOURCES), on_setattr=attrs.setters.validate, ) @@ -87,28 +86,34 @@ def get_queue(cls) -> DjangoRQ: return django_rq.get_queue(cls.QUEUE_NAME) @property - @abstractmethod - def job_result_ttl(self) -> int: ... + def job_result_ttl(self) -> int | None: + """ + Time to live for successful job result in seconds, + if not set, the default result TTL will be used + """ + return None @property - @abstractmethod - def job_failed_ttl(self) -> int: ... + def job_failed_ttl(self) -> int | None: + """ + Time to live for failures in seconds, + if not set, the default failure TTL will be used + """ + return None @abstractmethod def build_request_id(self): ... - @classmethod - def validate_request_id(rq_id: str, /) -> None: ... + def validate_request_id(self, request_id: str, /) -> None: ... - @classmethod - def get_job_by_id(cls, id_: str, /, *, validate: bool = True) -> RQJob | None: + def get_job_by_id(self, id_: str, /, *, validate: bool = True) -> RQJob | None: if validate: try: - cls.validate_request_id(id_) + self.validate_request_id(id_) except Exception: return None - queue = cls.get_queue() + queue = self.get_queue() return queue.fetch_job(id_) def init_request_args(self): @@ -236,6 +241,7 @@ def get_file_timestamp(self) -> str: return datetime.strftime(date, "%Y_%m_%d_%H_%M_%S") def init_request_args(self) -> None: + super().init_request_args() try: location_config = get_location_configuration( db_instance=self.db_instance, @@ -245,15 +251,28 @@ def init_request_args(self) -> None: except ValueError as ex: raise ValidationError(str(ex)) from ex - location = location_config["location"] - - if location not in Location.list(): - raise ValidationError(f"Unexpected location {location} specified for the request") - self.export_args = AbstractExportableRequestManager.ExportArgs( location_config=location_config, filename=self.request.query_params.get("filename") ) + def validate_request(self): + super().validate_request() + + if self.export_args.location not in Location.list(): + raise ValidationError( + f"Unexpected location {self.export_args.location} specified for the request" + ) + + if self.export_args.location == Location.CLOUD_STORAGE: + if not self.export_args.filename: + raise ValidationError("The filename was not specified") + + if self.export_args.location_config.get("storage_id") is None: + raise ValidationError( + "Cloud storage location was selected as the source," + + " but cloud storage id was not specified" + ) + def build_meta(self, *, request_id: str): return ExportRQMeta.build_for( request=self.request, From 8111cabb628f35c1facc2674fd1b6942527e0e3f Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 25 Mar 2025 13:51:25 +0100 Subject: [PATCH 013/103] Update events API --- cvat/apps/engine/rq.py | 9 +- cvat/apps/events/export.py | 32 ++++-- cvat/apps/events/permissions.py | 31 +++++- cvat/apps/events/rules/events.rego | 11 +- cvat/apps/events/views.py | 169 ++++++++++++++++++----------- 5 files changed, 165 insertions(+), 87 deletions(-) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 9c3cefee8572..0ed0cd2bdf54 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -340,17 +340,18 @@ def user_id(self) -> int: return int(self.extra["user_id"]) @cached_property - def subresource(self) -> RequestSubresource: - return RequestSubresource(self.extra["subresource"]) + def subresource(self) -> RequestSubresource | None: + if subresource := self.extra.get("subresource"): + return RequestSubresource(subresource) + return None @cached_property def format(self) -> str | None: - # TODO: quote/unquote return self.extra.get("format") @property def type(self) -> str: - return self.TYPE_SEP.join([self.action, self.subresource]) + return self.TYPE_SEP.join([self.action, self.subresource or self.target]) class ImportRequestId(RequestId): diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index b436b02c4e61..c6a9e6574b19 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -26,6 +26,7 @@ from cvat.apps.engine.rq import ExportRQMeta, RQMetaWithFailureInfo, define_dependent_job from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import get_rq_lock_by_user, sendfile +from cvat.apps.events.permissions import EventsPermission from cvat.apps.redis_handler.background import AbstractExportableRequestManager from cvat.apps.redis_handler.rq import RequestId @@ -95,20 +96,31 @@ def user_id(self) -> int: @attrs.define(kw_only=True) -class EventsRqJobManager(AbstractExportableRequestManager): +class EventsExporter(AbstractExportableRequestManager): - filter_query: dict = attrs.field() - query_id: uuid.UUID = attrs.field(factory=uuid.uuid4) # temporary arg + filter_query: dict = attrs.field(init=False) + query_id: uuid.UUID = attrs.field(init=False) # temporary arg + + def __attrs_post_init__(self): + super().__attrs_post_init__() + self.query_id = self.request.query_params.get("query_id") or uuid.uuid4() def build_request_id(self): return EventsRequestId( queue=self.QUEUE_NAME, action="export", target="events", - # id=uuid.uuid4(), id=self.query_id, + extra={ + "user_id": self.user_id, + } ).render() + def init_request_args(self): + super().init_request_args() + perm = EventsPermission.create_scope_list(self.request) + self.filter_query = perm.filter(self.request.query_params) + def define_query_params(self) -> dict: query_params = { "org_id": self.filter_query.get("org_id", None), @@ -166,17 +178,13 @@ def get_result_filename(self): # FUTURE-TODO: delete deprecated function -def export(request: ExtendedRequest, filter_query: dict): - action = request.query_params.get("action", None) +def export(request: ExtendedRequest): + action = request.query_params.get("action") if action not in (None, "download"): raise serializers.ValidationError("Unexpected action specified for the request") - filename = request.query_params.get("filename", None) - - query_id = request.query_params.get("query_id") - manager = EventsRqJobManager( - request=request, filter_query=filter_query, **({"query_id": query_id} if query_id else {}) - ) + filename = request.query_params.get("filename") + manager = EventsExporter(request=request) request_id = manager.build_request_id() queue = manager.get_queue() diff --git a/cvat/apps/events/permissions.py b/cvat/apps/events/permissions.py index c5fa706e7f56..e23f35224470 100644 --- a/cvat/apps/events/permissions.py +++ b/cvat/apps/events/permissions.py @@ -3,24 +3,34 @@ # # SPDX-License-Identifier: MIT +from typing import Any + from django.conf import settings from rest_framework.exceptions import PermissionDenied +from cvat.apps.engine.permissions import DownloadExportedExtension +from cvat.apps.engine.types import ExtendedRequest from cvat.apps.iam.permissions import OpenPolicyAgentPermission, StrEnum from cvat.utils.http import make_requests_session -class EventsPermission(OpenPolicyAgentPermission): +class EventsPermission(OpenPolicyAgentPermission, DownloadExportedExtension): class Scopes(StrEnum): SEND_EVENTS = "send:events" DUMP_EVENTS = "dump:events" @classmethod - def create(cls, request, view, obj, iam_context): + def create( + cls, request: ExtendedRequest, view, obj: None, iam_context: dict[str, Any] + ) -> list[OpenPolicyAgentPermission]: permissions = [] if view.basename == "events": for scope in cls.get_scopes(request, view, obj): - self = cls.create_base_perm(request, view, scope, iam_context, obj) + scope_params = {} + if DownloadExportedExtension.Scopes.DOWNLOAD_EXPORTED_FILE == scope: + cls.extend_params_with_rq_job_details(request=request, params=scope_params) + + self = cls.create_base_perm(request, view, scope, iam_context, obj, **scope_params) permissions.append(self) return permissions @@ -29,7 +39,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.url = settings.IAM_OPA_DATA_URL + "/events/allow" - def filter(self, query_params): + def filter(self, query_params: dict[str, Any]): url = self.url.replace("/allow", "/filter") with make_requests_session() as session: @@ -47,14 +57,23 @@ def filter(self, query_params): return filter_params @staticmethod - def get_scopes(request, view, obj): + def get_scopes(request: ExtendedRequest, view, obj: None): Scopes = __class__.Scopes return [ { ("create", "POST"): Scopes.SEND_EVENTS, + ("initiate_export", "POST"): Scopes.DUMP_EVENTS, + ("download_file", "GET"): DownloadExportedExtension.Scopes.DOWNLOAD_EXPORTED_FILE, + # deprecated permissions: ("list", "GET"): Scopes.DUMP_EVENTS, }[(view.action, request.method)] ] def get_resource(self): - return None + data = None + + if DownloadExportedExtension.Scopes.DOWNLOAD_EXPORTED_FILE == self.scope: + data = {} + self.extend_resource_with_rq_job_details(data) + + return data diff --git a/cvat/apps/events/rules/events.rego b/cvat/apps/events/rules/events.rego index 58ec43763b2f..dcef46c7c251 100644 --- a/cvat/apps/events/rules/events.rego +++ b/cvat/apps/events/rules/events.rego @@ -6,7 +6,7 @@ import data.utils import data.organizations # input: { -# "scope": <"send:events","dump:events"> or null, +# "scope": <"send:events","dump:events","download:exported_file"> or null, # "auth": { # "user": { # "id": , @@ -22,6 +22,9 @@ import data.organizations # } # } or null, # } +# "resource": { +# "rq_job": { "owner": { "id": } } or null, +# } or null, # } default allow := false @@ -46,6 +49,12 @@ allow if { organizations.has_perm(organizations.WORKER) } +allow if { + input.scope == utils.DOWNLOAD_EXPORTED_FILE + input.auth.user.id == input.resource.rq_job.owner.id +} + + filter := [] if { utils.is_admin utils.is_sandbox diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index ead94b12a68f..06bc61e57068 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -6,17 +6,79 @@ from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema from rest_framework import status, viewsets +from rest_framework.decorators import action from rest_framework.renderers import JSONRenderer from rest_framework.response import Response from cvat.apps.engine.log import vlogger -from cvat.apps.events.permissions import EventsPermission +from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.events.export import EventsExporter + from cvat.apps.events.serializers import ClientEventsSerializer from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS +from cvat.apps.redis_handler.serializers import RequestIdSerializer from .export import export from .handlers import handle_client_events_push +api_filter_parameters = ( + OpenApiParameter( + "org_id", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.INT, + required=False, + description="Filter events by organization ID", + ), + OpenApiParameter( + "project_id", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.INT, + required=False, + description="Filter events by project ID", + ), + OpenApiParameter( + "task_id", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.INT, + required=False, + description="Filter events by task ID", + ), + OpenApiParameter( + "job_id", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.INT, + required=False, + description="Filter events by job ID", + ), + OpenApiParameter( + "user_id", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.INT, + required=False, + description="Filter events by user ID", + ), + OpenApiParameter( + "from", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.DATETIME, + required=False, + description="Filter events after the datetime. If no 'from' or 'to' parameters are passed, the last 30 days will be set.", + ), + OpenApiParameter( + "to", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.DATETIME, + required=False, + description="Filter events before the datetime. If no 'from' or 'to' parameters are passed, the last 30 days will be set.", + ), + OpenApiParameter( + "filename", + description="Desired output file name", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.STR, + required=False, + ) +) class EventsViewSet(viewsets.ViewSet): serializer_class = None @@ -51,62 +113,7 @@ def create(self, request): methods=["GET"], description="The log is returned in the CSV format.", parameters=[ - OpenApiParameter( - "org_id", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.INT, - required=False, - description="Filter events by organization ID", - ), - OpenApiParameter( - "project_id", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.INT, - required=False, - description="Filter events by project ID", - ), - OpenApiParameter( - "task_id", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.INT, - required=False, - description="Filter events by task ID", - ), - OpenApiParameter( - "job_id", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.INT, - required=False, - description="Filter events by job ID", - ), - OpenApiParameter( - "user_id", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.INT, - required=False, - description="Filter events by user ID", - ), - OpenApiParameter( - "from", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.DATETIME, - required=False, - description="Filter events after the datetime. If no 'from' or 'to' parameters are passed, the last 30 days will be set.", - ), - OpenApiParameter( - "to", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.DATETIME, - required=False, - description="Filter events before the datetime. If no 'from' or 'to' parameters are passed, the last 30 days will be set.", - ), - OpenApiParameter( - "filename", - description="Desired output file name", - location=OpenApiParameter.QUERY, - type=OpenApiTypes.STR, - required=False, - ), + *api_filter_parameters, OpenApiParameter( "action", location=OpenApiParameter.QUERY, @@ -128,12 +135,46 @@ def create(self, request): "201": OpenApiResponse(description="CSV log file is ready for downloading"), "202": OpenApiResponse(description="Creating a CSV log file has been started"), }, + deprecated=True, ) def list(self, request): - perm = EventsPermission.create_scope_list(request) - filter_query = perm.filter(request.query_params) - return export( - request=request, - filter_query=filter_query, - queue_name=settings.CVAT_QUEUES.EXPORT_DATA.value, - ) + self.check_permissions(request) + return export(request=request) + + @extend_schema( + summary="Initiate a process to export events", + request=None, + parameters=[*api_filter_parameters], + responses={ + "202": OpenApiResponse(RequestIdSerializer), + }, + ) + @action(detail=False, methods=["POST"], url_path="file/export") + def initiate_export(self, request: ExtendedRequest): + self.check_permissions(request) + exporter = EventsExporter(request=request) + return exporter.process() + + @extend_schema( + summary="Download a prepared file with events", + request=None, + parameters=[ + OpenApiParameter( + "rq_id", + description="Request ID", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.STR, + required=True, + ), + ], + responses={ + "200": OpenApiResponse(description="Download of file started"), + }, + exclude=True, # private API endpoint that should be used only as result_url + ) + @action(detail=False, methods=["GET"], url_path="file/download") + def download_file(self, request: ExtendedRequest): + self.check_permissions(request) + exporter = EventsExporter(request=request) + return exporter.download_file() + From 0adada65bb58007c1d2d9d85c3eef3b7a892a205 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 25 Mar 2025 16:06:38 +0100 Subject: [PATCH 014/103] [events] Update query params supported by API endpoint --- cvat/apps/events/export.py | 10 ++++------ cvat/apps/events/views.py | 39 ++++++++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index c6a9e6574b19..a892602bde2f 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -5,9 +5,7 @@ import csv import os import uuid -from dataclasses import dataclass from datetime import datetime, timedelta, timezone -from functools import cached_property from pathlib import Path import attrs @@ -23,9 +21,9 @@ from cvat.apps.dataset_manager.util import TmpDirManager from cvat.apps.dataset_manager.views import log_exception from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.rq import ExportRQMeta, RQMetaWithFailureInfo, define_dependent_job +from cvat.apps.engine.rq import RQMetaWithFailureInfo from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import get_rq_lock_by_user, sendfile +from cvat.apps.engine.utils import sendfile from cvat.apps.events.permissions import EventsPermission from cvat.apps.redis_handler.background import AbstractExportableRequestManager from cvat.apps.redis_handler.rq import RequestId @@ -113,7 +111,7 @@ def build_request_id(self): id=self.query_id, extra={ "user_id": self.user_id, - } + }, ).render() def init_request_args(self): @@ -177,7 +175,7 @@ def get_result_filename(self): return f"logs_{timestamp}.csv" -# FUTURE-TODO: delete deprecated function +# FUTURE-TODO: delete deprecated function after several releases def export(request: ExtendedRequest): action = request.query_params.get("action") if action not in (None, "download"): diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index 06bc61e57068..b90c50d4a1d5 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -2,18 +2,18 @@ # # SPDX-License-Identifier: MIT -from django.conf import settings from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema from rest_framework import status, viewsets from rest_framework.decorators import action +from rest_framework.exceptions import ValidationError from rest_framework.renderers import JSONRenderer from rest_framework.response import Response +from cvat.apps.engine.location import Location from cvat.apps.engine.log import vlogger from cvat.apps.engine.types import ExtendedRequest from cvat.apps.events.export import EventsExporter - from cvat.apps.events.serializers import ClientEventsSerializer from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.redis_handler.serializers import RequestIdSerializer @@ -77,9 +77,10 @@ location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - ) + ), ) + class EventsViewSet(viewsets.ViewSet): serializer_class = None @@ -108,6 +109,7 @@ def create(self, request): return Response(serializer.validated_data, status=status.HTTP_201_CREATED) + # FUTURE-TODO: remove deprecated API endpoint after several releases @extend_schema( summary="Get an event log", methods=["GET"], @@ -137,14 +139,40 @@ def create(self, request): }, deprecated=True, ) - def list(self, request): + def list(self, request: ExtendedRequest): self.check_permissions(request) + + if ( + request.query_params.get("cloud_storage_id") + or request.query_params.get("location") == Location.CLOUD_STORAGE + ): + raise ValidationError( + "This endpoint does not support exporting events to cloud storage" + ) + return export(request=request) @extend_schema( summary="Initiate a process to export events", request=None, - parameters=[*api_filter_parameters], + parameters=[ + *api_filter_parameters, + OpenApiParameter( + "location", + description="Where need to save events file", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.STR, + required=False, + enum=Location.list(), + ), + OpenApiParameter( + "cloud_storage_id", + description="Storage id", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.INT, + required=False, + ), + ], responses={ "202": OpenApiResponse(RequestIdSerializer), }, @@ -177,4 +205,3 @@ def download_file(self, request: ExtendedRequest): self.check_permissions(request) exporter = EventsExporter(request=request) return exporter.download_file() - From b5b313e77584b07135561f58b017168d81345b4a Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 25 Mar 2025 17:34:46 +0100 Subject: [PATCH 015/103] Small code cleanup --- cvat/apps/consensus/merging_manager.py | 4 --- cvat/apps/consensus/views.py | 8 ++--- cvat/apps/engine/background.py | 32 ++---------------- cvat/apps/engine/backup.py | 3 +- cvat/apps/engine/views.py | 19 ++--------- cvat/apps/events/export.py | 7 ++-- cvat/apps/quality_control/quality_reports.py | 4 --- cvat/apps/redis_handler/background.py | 35 +++++++++++++++----- cvat/apps/redis_handler/views.py | 17 +++++----- 9 files changed, 50 insertions(+), 79 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index 4bb3371a4a79..b115cb983839 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -170,10 +170,6 @@ class MergingManager(AbstractRequestManager): def job_result_ttl(self): return 300 - @property - def job_failed_ttl(self): - return self.job_result_ttl - def build_request_id(self) -> str: return RequestId( queue=self.QUEUE_NAME, diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index ffcacf3b0201..6738304f668a 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -42,10 +42,8 @@ class ConsensusMergesViewSet(viewsets.GenericViewSet): description=textwrap.dedent( """\ A consensus merge request has been enqueued, the request id is returned. - The request status can be checked by using common requests API: GET /api/requests/rq_id - """.format( - CREATE_MERGE_RQ_ID_PARAMETER - ) + The request status can be checked by using common requests API: GET /api/requests/ + """ ), ), "400": OpenApiResponse( @@ -59,7 +57,7 @@ def create(self, request: ExtendedRequest, *args, **kwargs): if rq_id: return HttpResponseGone( textwrap.dedent( - f"""\ + """\ This endpoint is no longer handles merge status checking. The common requests API should be used instead: GET /api/requests/rq_id """ diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 52c3c624faa3..d56b809c6907 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -146,7 +146,7 @@ def validate_request_id(self, request_id, /) -> None: ): raise ValueError("The provided request id does not match exported target or resource") - def init_callback_with_params(self): + def _init_callback_with_params(self): self.callback = get_export_callback( self.db_instance, save_images=self.export_args.save_images ) @@ -163,17 +163,6 @@ def init_callback_with_params(self): "server_url": server_address, } - if self.export_args.location == Location.CLOUD_STORAGE: - storage_id = self.export_args.location_config["storage_id"] - db_storage = get_cloud_storage_for_import_or_export( - storage_id=storage_id, - request=self.request, - is_default=self.export_args.location_config["is_default"], - ) - - self.callback_args = (db_storage, self.callback) + self.callback_args - self.callback = export_resource_to_cloud_storage - def finalize_request(self): handle_dataset_export( self.db_instance, @@ -218,7 +207,7 @@ def validate_request_id(self, request_id, /) -> None: ): raise ValueError("The provided request id does not match exported target or resource") - def init_callback_with_params(self): + def _init_callback_with_params(self): self.callback = create_backup if isinstance(self.db_instance, Task): @@ -235,20 +224,6 @@ def init_callback_with_params(self): self.job_result_ttl, ) - if self.export_args.location == Location.CLOUD_STORAGE: - storage_id = self.export_args.location_config["storage_id"] - db_storage = get_cloud_storage_for_import_or_export( - storage_id=storage_id, - request=self.request, - is_default=self.export_args.location_config["is_default"], - ) - - self.callback_args = ( - db_storage, - self.callback, - ) + self.callback_args - self.callback = export_resource_to_cloud_storage - def get_result_filename(self): filename = self.export_args.filename @@ -316,7 +291,6 @@ def to_dict(self): return dataclass_asdict(self) def init_request_args(self): - super().init_request_args() filename = self.request.query_params.get("filename") file_path = (self.tmp_dir / filename) if filename else None @@ -519,8 +493,8 @@ def init_callback_with_params(self): self.callback_args = (db_storage, key, self.callback) + self.callback_args self.callback = import_resource_from_cloud_storage - # FUTURE-TODO: send logs to event store def finalize_request(self): + # FUTURE-TODO: send logs to event store pass diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 949918ba9678..6adc0d7449f3 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -66,7 +66,8 @@ TaskReadSerializer, ValidationParamsSerializer, ) -from cvat.apps.engine.task import JobFileMapping, create_thread as create_task +from cvat.apps.engine.task import JobFileMapping +from cvat.apps.engine.task import create_thread as create_task from cvat.apps.engine.utils import av_scan_paths slogger = ServerLogManager(__name__) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 92c412f0a1f4..d47ee69381d1 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -174,8 +174,8 @@ def get_410_response_for_export_api(path: str) -> HttpResponseGone: To download the prepared file, use the result_url obtained from the response of the previous request. """)) -def get_410_response_for_import_api(path: str) -> HttpResponseGone: - return HttpResponseGone(textwrap.dedent(f"""\ +def get_410_response_for_import_api() -> HttpResponseGone: + return HttpResponseGone(textwrap.dedent("""\ This endpoint is no longer supported. To check the status of the import process, use GET /api/requests/rq_id, where rq_id is obtained from the response of the previous request. @@ -393,11 +393,6 @@ def perform_create(self, serializer, **kwargs): location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], request=UploadedZipFileSerializer(required=False), - # request=PolymorphicProxySerializer('DatasetWrite', - # # TODO: refactor to use required=False when possible - # serializers=[UploadedZipFileSerializer, OpenApiTypes.NONE], - # resource_type_field_name=None - # ), responses={ '202': OpenApiResponse(RequestIdSerializer, description='Importing has been started'), '400': OpenApiResponse(description='Failed to import dataset'), @@ -484,11 +479,6 @@ def export_backup(self, request: ExtendedRequest, pk: int): location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], request=UploadedZipFileSerializer(required=False), - # request=PolymorphicProxySerializer('BackupWrite', - # # TODO: refactor to use required=False when possible - # serializers=[UploadedZipFileSerializer, OpenApiTypes.NONE], - # resource_type_field_name=None - # ), responses={ '202': OpenApiResponse(RequestIdSerializer, description='Import of a backup file has started'), }) @@ -1230,11 +1220,6 @@ def append_data_chunk(self, request: ExtendedRequest, pk: int, file_id: str): location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], request=UploadedFileSerializer(required=False), - # request=PolymorphicProxySerializer('TaskAnnotationsWrite', - # # TODO: refactor to use required=False when possible - # serializers=[UploadedFileSerializer, OpenApiTypes.NONE], - # resource_type_field_name=None - # ), responses={ '201': OpenApiResponse(description='Uploading has finished'), '202': OpenApiResponse(RequestIdSerializer, description='Uploading has been started'), diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index a892602bde2f..acb7ea0d16eb 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -16,7 +16,9 @@ from rest_framework import serializers, status from rest_framework.response import Response from rest_framework.reverse import reverse +from rest_framework.exceptions import MethodNotAllowed from rq import get_current_job +from contextlib import suppress from cvat.apps.dataset_manager.util import TmpDirManager from cvat.apps.dataset_manager.views import log_exception @@ -159,7 +161,7 @@ def define_query_params(self) -> dict: return query_params - def init_callback_with_params(self): + def _init_callback_with_params(self): self.callback = _create_csv query_params = self.define_query_params() self.callback_args = (query_params,) @@ -213,7 +215,8 @@ def export(request: ExtendedRequest): return Response(data=response_data, status=status.HTTP_202_ACCEPTED) manager.init_request_args() - manager.validate_request() + with suppress(MethodNotAllowed): + manager.validate_request() manager.init_callback_with_params() manager.setup_new_job(queue, request_id) diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index a3e1afdbcbd4..94e96042ccab 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2275,10 +2275,6 @@ class QualityReportRQJobManager(AbstractRequestManager): def job_result_ttl(self): return 120 - @property - def job_failed_ttl(self): - return self.job_result_ttl - def build_request_id(self): return RequestId( queue=self.QUEUE_NAME, diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index f43b46fafd9e..523e35deedc8 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -25,6 +25,8 @@ from rest_framework.serializers import ValidationError from rq.job import Job as RQJob from rq.job import JobStatus as RQJobStatus +from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export +from cvat.apps.engine.cloud_provider import export_resource_to_cloud_storage from cvat.apps.dataset_manager.util import get_export_cache_lock @@ -117,7 +119,9 @@ def get_job_by_id(self, id_: str, /, *, validate: bool = True) -> RQJob | None: return queue.fetch_job(id_) def init_request_args(self): - pass + """ + Hook to initialize operation args based on the request + """ @abstractmethod def init_callback_with_params(self) -> None: ... @@ -125,12 +129,11 @@ def init_callback_with_params(self) -> None: ... def validate_request(self) -> Response | None: """Hook to run some validations before processing a request""" - # TODO: uncomment - # if self.request.method != "POST": - # raise MethodNotAllowed( - # self.request.method, - # detail="Only POST requests can be used to initiate a background process" - # ) + if self.request.method != "POST": + raise MethodNotAllowed( + self.request.method, + detail="Only POST requests can be used to initiate a background process", + ) def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | None: if not job: @@ -241,7 +244,6 @@ def get_file_timestamp(self) -> str: return datetime.strftime(date, "%Y_%m_%d_%H_%M_%S") def init_request_args(self) -> None: - super().init_request_args() try: location_config = get_location_configuration( db_instance=self.db_instance, @@ -255,6 +257,23 @@ def init_request_args(self) -> None: location_config=location_config, filename=self.request.query_params.get("filename") ) + @abstractmethod + def _init_callback_with_params(self): ... + + def init_callback_with_params(self): + self._init_callback_with_params() + + if self.export_args.location == Location.CLOUD_STORAGE: + storage_id = self.export_args.location_config["storage_id"] + db_storage = get_cloud_storage_for_import_or_export( + storage_id=storage_id, + request=self.request, + is_default=self.export_args.location_config["is_default"], + ) + + self.callback_args = (db_storage, self.callback) + self.callback_args + self.callback = export_resource_to_cloud_storage + def validate_request(self): super().validate_request() diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 7644ea612dba..853630ffb2b7 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -76,7 +76,6 @@ class RequestViewSet(viewsets.GenericViewSet): ordering_fields = ["created_date", "status", "action"] ordering = "-created_date" - # TODO: fix filters filter_fields = [ # RQ job fields "status", @@ -87,8 +86,8 @@ class RequestViewSet(viewsets.GenericViewSet): # derivatives fields (from parsed rq_id) "action", "target", - # "subresource", - # "format", + "subresource", + "format", ] simple_filters = filter_fields + ["org"] @@ -97,8 +96,8 @@ class RequestViewSet(viewsets.GenericViewSet): "created_date": "created_at", "action": "parsed_rq_id.action", "target": "parsed_rq_id.target", - # "subresource": "parsed_rq_id.subresource", - # "format": "parsed_rq_id.format", + "subresource": "parsed_rq_id.subresource", + "format": "parsed_rq_id.format", "status": "get_status", "project_id": "meta.project_id", "task_id": "meta.task_id", @@ -113,10 +112,10 @@ class RequestViewSet(viewsets.GenericViewSet): "project_id": SchemaField("integer"), "task_id": SchemaField("integer"), "job_id": SchemaField("integer"), - "action": SchemaField("string", RequestAction.choices), - "target": SchemaField("string", RequestTarget.choices), - # "subresource": SchemaField("string", RequestSubresource.choices), - # "format": SchemaField("string"), + "action": SchemaField("string"), + "target": SchemaField("string"), + "subresource": SchemaField("string"), + "format": SchemaField("string"), "org": SchemaField("string"), } From 9e53f02b9d90b1c232031cde8f5f97ec01b4ae24 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 25 Mar 2025 17:48:16 +0100 Subject: [PATCH 016/103] Remove unused imports --- cvat/apps/consensus/merging_manager.py | 4 ---- cvat/apps/engine/background.py | 14 ++------------ cvat/apps/engine/views.py | 1 - cvat/apps/events/export.py | 2 +- cvat/apps/quality_control/quality_reports.py | 7 +------ cvat/apps/quality_control/views.py | 2 +- cvat/apps/redis_handler/background.py | 2 -- cvat/apps/redis_handler/views.py | 2 -- 8 files changed, 5 insertions(+), 29 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index b115cb983839..e39bcb3215de 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: MIT import math -from functools import cached_property from typing import Type import attrs @@ -27,9 +26,6 @@ User, clear_annotations_in_jobs, ) -from cvat.apps.engine.rq import BaseRQMeta, define_dependent_job -from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import get_rq_lock_by_user from cvat.apps.profiler import silk_profile from cvat.apps.quality_control.quality_reports import ComparisonParameters, JobDataProvider from cvat.apps.redis_handler.background import AbstractRequestManager diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index d56b809c6907..e90f068aae6e 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -2,21 +2,16 @@ # # SPDX-License-Identifier: MIT -from abc import abstractmethod from dataclasses import asdict as dataclass_asdict from dataclasses import dataclass -from datetime import datetime -from functools import cached_property from pathlib import Path from tempfile import NamedTemporaryFile -from types import NoneType -from typing import Any, Callable, ClassVar +from typing import Any from uuid import uuid4 import attrs from attrs.converters import to_bool from django.conf import settings -from rest_framework import serializers from rest_framework.exceptions import MethodNotAllowed, ValidationError from rest_framework.reverse import reverse from rq.job import Job as RQJob @@ -24,7 +19,7 @@ import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.formats.registry import EXPORT_FORMATS from cvat.apps.dataset_manager.util import TmpDirManager -from cvat.apps.dataset_manager.views import get_export_cache_ttl, get_export_callback +from cvat.apps.dataset_manager.views import get_export_callback from cvat.apps.engine.backup import ( ProjectExporter, TaskExporter, @@ -33,7 +28,6 @@ import_task, ) from cvat.apps.engine.cloud_provider import ( - export_resource_to_cloud_storage, import_resource_from_cloud_storage, ) from cvat.apps.engine.location import StorageType, get_location_configuration @@ -51,18 +45,14 @@ from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export from cvat.apps.engine.rq import ( ExportRequestId, - ExportRQMeta, ImportRequestId, ImportRQMeta, - define_dependent_job, ) from cvat.apps.engine.serializers import UploadedFileSerializer, UploadedZipFileSerializer from cvat.apps.engine.task import create_thread as create_task -from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import ( build_annotations_file_name, build_backup_file_name, - get_rq_lock_by_user, is_dataset_export, ) from cvat.apps.events.handlers import handle_dataset_export, handle_dataset_import diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index d47ee69381d1..ce2fa62e2dbd 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -54,7 +54,6 @@ from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine import backup from cvat.apps.engine.background import BackupImporter, DatasetImporter, TaskCreator -from cvat.apps.engine.backup import import_project, import_task from cvat.apps.engine.cache import CvatChunkTimestampMismatchError, LockError, MediaCache from cvat.apps.engine.cloud_provider import db_storage_to_storage_instance from cvat.apps.engine.frame_provider import ( diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index d149d76896b9..76f57b5059eb 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -6,7 +6,7 @@ import os import uuid from contextlib import suppress -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta from pathlib import Path import attrs diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 94e96042ccab..32b90fa6541a 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -18,11 +18,10 @@ import datumaro.util.annotation_util import datumaro.util.mask_tools import numpy as np -from attrs import asdict, define, fields_dict, frozen +from attrs import asdict, define, fields_dict from datumaro.util import dump_json, parse_json from django.conf import settings from django.db import transaction -from django_rq.queues import DjangoRQ as RqQueue from rest_framework.serializers import ValidationError from scipy.optimize import linear_sum_assignment @@ -43,7 +42,6 @@ Image, Job, JobType, - Project, RequestTarget, ShapeType, StageChoice, @@ -52,9 +50,6 @@ User, ValidationMode, ) -from cvat.apps.engine.rq import BaseRQMeta, define_dependent_job -from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import get_rq_lock_by_user from cvat.apps.profiler import silk_profile from cvat.apps.quality_control import models from cvat.apps.quality_control.models import ( diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 45394f5ab861..055501ff8e21 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -286,7 +286,7 @@ def create(self, request, *args, **kwargs): serializer = RequestIdSerializer(data={"rq_id": rq_id}) serializer.is_valid(raise_exception=True) rq_id = serializer.validated_data["rq_id"] - rq_job = qc.QualityReportRQJobManager.get_job_by_id(rq_id) + rq_job = qc.QualityReportRQJobManager(request=request).get_job_by_id(rq_id) # FUTURE-TODO: move into permissions # and allow not only rq job owner to check the status diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index f6eda220eb67..880dcf1c2d5f 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -7,8 +7,6 @@ from dataclasses import asdict as dataclass_asdict from dataclasses import dataclass from datetime import datetime -from functools import cached_property -from types import NoneType from typing import Any, Callable, ClassVar from urllib.parse import quote diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 853630ffb2b7..238a5bed90e6 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -25,9 +25,7 @@ ) from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( # todo: move to the app - RequestAction, RequestStatus, - RequestTarget, ) from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest From 5358448be1ab74aa16b157d76e3c37ee6dc52628 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 26 Mar 2025 10:53:27 +0100 Subject: [PATCH 017/103] Drop tmp_file from import-specific metadata --- cvat/apps/engine/background.py | 5 ----- cvat/apps/engine/rq.py | 18 ------------------ 2 files changed, 23 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index e90f068aae6e..a20cccabcffc 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -420,11 +420,6 @@ def build_request_id(self): }, ).render() - def build_meta(self, *, file_path: str): - return ImportRQMeta.build_for( # TODO: looks like tmp_file is not used anywhere - request=self.request, db_obj=self.db_instance, tmp_file=file_path - ) - def finalize_request(self): handle_dataset_import( self.db_instance, diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 287490502d8d..0a40add05913 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -53,7 +53,6 @@ class RequestField: HIDDEN = "hidden" # import specific fields - TMP_FILE = "tmp_file" # TODO: unused field TASK_PROGRESS = "task_progress" # export specific fields @@ -316,11 +315,6 @@ def build_for( class ImportRQMeta(BaseRQMeta): - # immutable && optional fields - tmp_file: str | None = ImmutableRQMetaAttribute( - RQJobMetaField.TMP_FILE, optional=True - ) # used only when importing annotations|datasets|backups - # mutable fields task_progress: float | None = MutableRQMetaAttribute( RQJobMetaField.TASK_PROGRESS, validator=lambda x: isinstance(x, float), optional=True @@ -332,18 +326,6 @@ def _get_resettable_fields() -> list[str]: return base_fields + [RQJobMetaField.TASK_PROGRESS] - @classmethod - def build_for( - cls, - *, - request: ExtendedRequest, - db_obj: Model | None, - tmp_file: str | None = None, - ): - base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) - - return {**base_meta, RQJobMetaField.TMP_FILE: tmp_file} - def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: if user := BaseRQMeta.for_job(rq_job).user: From 7cb500686f3068031b9b1c036d0f966c51efe1ea Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 26 Mar 2025 11:44:44 +0100 Subject: [PATCH 018/103] Fix AbstractExporter --- cvat/apps/engine/background.py | 8 +++---- cvat/apps/events/export.py | 4 ++-- cvat/apps/redis_handler/background.py | 34 ++++++++------------------- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index a20cccabcffc..1cbcdd763cd7 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -57,7 +57,7 @@ ) from cvat.apps.events.handlers import handle_dataset_export, handle_dataset_import from cvat.apps.redis_handler.background import ( - AbstractExportableRequestManager, + AbstractExporter, AbstractRequestManager, ) from cvat.apps.redis_handler.rq import RequestId @@ -77,11 +77,11 @@ def cancel_and_delete(rq_job: RQJob) -> None: rq_job.delete() -class DatasetExporter(AbstractExportableRequestManager): +class DatasetExporter(AbstractExporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} @dataclass - class ExportArgs(AbstractExportableRequestManager.ExportArgs): + class ExportArgs(AbstractExporter.ExportArgs): format: str save_images: bool @@ -183,7 +183,7 @@ def where_to_redirect(self) -> str: ) -class BackupExporter(AbstractExportableRequestManager): +class BackupExporter(AbstractExporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} def validate_request_id(self, request_id, /) -> None: diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 76f57b5059eb..8239dddbfc18 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -27,7 +27,7 @@ from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import sendfile from cvat.apps.events.permissions import EventsPermission -from cvat.apps.redis_handler.background import AbstractExportableRequestManager +from cvat.apps.redis_handler.background import AbstractExporter from cvat.apps.redis_handler.rq import RequestId slogger = ServerLogManager(__name__) @@ -96,7 +96,7 @@ def user_id(self) -> int: @attrs.define(kw_only=True) -class EventsExporter(AbstractExportableRequestManager): +class EventsExporter(AbstractExporter): filter_query: dict = attrs.field(init=False) query_id: uuid.UUID = attrs.field(init=False) # temporary arg diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 880dcf1c2d5f..c86d432149b9 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -157,18 +157,18 @@ def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | job.delete() return None - def build_meta(self): + def build_meta(self, *, request_id: str): return BaseRQMeta.build(request=self.request, db_obj=self.db_instance) - def setup_new_job(self, queue: DjangoRQ, id_: str, /): + def setup_new_job(self, queue: DjangoRQ, request_id: str, /): with get_rq_lock_by_user(queue, self.user_id): queue.enqueue_call( func=self.callback, args=self.callback_args, kwargs=self.callback_kwargs, - job_id=id_, - meta=self.build_meta(), - depends_on=define_dependent_job(queue, self.user_id, rq_id=id_), + job_id=request_id, + meta=self.build_meta(request_id=request_id), + depends_on=define_dependent_job(queue, self.user_id, rq_id=request_id), result_ttl=self.job_result_ttl, failure_ttl=self.job_failed_ttl, ) @@ -176,8 +176,8 @@ def setup_new_job(self, queue: DjangoRQ, id_: str, /): def finalize_request(self) -> None: """Hook to run some actions (e.g. collect events) after processing a request""" - def get_response(self, id_: str) -> Response: - serializer = RequestIdSerializer({"rq_id": id_}) + def get_response(self, request_id: str) -> Response: + serializer = RequestIdSerializer({"rq_id": request_id}) return Response(serializer.data, status=status.HTTP_202_ACCEPTED) def process(self) -> Response: @@ -201,7 +201,7 @@ def process(self) -> Response: return self.get_response(request_id) -class AbstractExportableRequestManager(AbstractRequestManager): +class AbstractExporter(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value @property @@ -251,7 +251,7 @@ def init_request_args(self) -> None: except ValueError as ex: raise ValidationError(str(ex)) from ex - self.export_args = AbstractExportableRequestManager.ExportArgs( + self.export_args = AbstractExporter.ExportArgs( location_config=location_config, filename=self.request.query_params.get("filename") ) @@ -290,7 +290,7 @@ def validate_request(self): + " but cloud storage id was not specified" ) - def build_meta(self, *, request_id: str): + def build_meta(self, *, request_id): return ExportRQMeta.build_for( request=self.request, db_obj=self.db_instance, @@ -302,20 +302,6 @@ def build_meta(self, *, request_id: str): result_filename=self.get_result_filename(), ) - # TODO:refactor and fix for import too - def setup_new_job(self, queue: DjangoRQ, id_: str, /): - with get_rq_lock_by_user(queue, self.user_id): - queue.enqueue_call( - func=self.callback, - args=self.callback_args, - kwargs=self.callback_kwargs, - job_id=id_, - meta=self.build_meta(request_id=id_), - depends_on=define_dependent_job(queue, self.user_id, rq_id=id_), - result_ttl=self.job_result_ttl, - failure_ttl=self.job_failed_ttl, - ) - def download_file(self) -> Response: queue = self.get_queue() request_id = self.request.query_params.get(self.REQUEST_ID_KEY) From 37f6d2584707205f4523a0608caf70d310c4d8bd Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 26 Mar 2025 17:04:53 +0100 Subject: [PATCH 019/103] Move rq job owner check into rego --- cvat/apps/quality_control/permissions.py | 2 +- cvat/apps/redis_handler/permissions.py | 16 +++++---- cvat/apps/redis_handler/rules/requests.rego | 38 +++++++++++++++++++++ 3 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 cvat/apps/redis_handler/rules/requests.rego diff --git a/cvat/apps/quality_control/permissions.py b/cvat/apps/quality_control/permissions.py index 6e7c38944a88..39af9fe3649c 100644 --- a/cvat/apps/quality_control/permissions.py +++ b/cvat/apps/quality_control/permissions.py @@ -24,6 +24,7 @@ class Scopes(StrEnum): LIST = "list" CREATE = "create" VIEW = "view" + # FUTURE-TODO: deprecated scope, should be removed when related API is removed VIEW_STATUS = "view:status" @classmethod @@ -102,7 +103,6 @@ def create(cls, request, view, obj, iam_context): return permissions def __init__(self, **kwargs): - # TODO: refactor if "rq_job_owner_id" in kwargs: self.rq_job_owner_id = int(kwargs.pop("rq_job_owner_id")) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index 9067876f69ad..1099f94a3180 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -7,10 +7,8 @@ from typing import TYPE_CHECKING from django.conf import settings -from rest_framework.exceptions import PermissionDenied from rq.job import Job as RQJob -from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest from cvat.apps.iam.permissions import OpenPolicyAgentPermission, StrEnum @@ -19,6 +17,7 @@ from cvat.apps.engine.models import RequestTarget from cvat.apps.engine.permissions import JobPermission, TaskPermission +from cvat.apps.engine.rq import BaseRQMeta class RequestPermission(OpenPolicyAgentPermission): @@ -34,8 +33,6 @@ def create( ) -> list[OpenPolicyAgentPermission]: permissions = [] if view.basename == "request": - user_id = request.user.id - for scope in cls.get_scopes(request, view, obj): if scope == cls.Scopes.LIST: continue @@ -64,9 +61,8 @@ def create( ) continue - # TODO: move into OPA - if not is_rq_job_owner(obj, user_id): - raise PermissionDenied("You don't have permission to perform this action") + self = cls.create_base_perm(request, view, scope, iam_context, obj) + permissions.append(self) return permissions @@ -85,4 +81,10 @@ def get_scopes(request: ExtendedRequest, view: ViewSet, obj: RQJob | None) -> li ] def get_resource(self): + if owner := BaseRQMeta.for_job(self.obj).user: + return { + "owner": { + "id": owner.id, + }, + } return None diff --git a/cvat/apps/redis_handler/rules/requests.rego b/cvat/apps/redis_handler/rules/requests.rego new file mode 100644 index 000000000000..0c6e7321516f --- /dev/null +++ b/cvat/apps/redis_handler/rules/requests.rego @@ -0,0 +1,38 @@ +package requests + +import rego.v1 + +import data.utils +import data.organizations + +# input: { +# "scope": <"view"|"cancel"> or null, +# "auth": { +# "user": { +# "id": , +# "privilege": <"admin"|"user"|"worker"> or null +# }, +# "organization": { +# "id": , +# "owner": { +# "id": +# }, +# "user": { +# "role": <"owner"|"maintainer"|"supervisor"|"worker"> or null +# } +# } or null, +# }, +# "resource": { +# "owner": { "id": } or null, +# } +# } + +default allow := false + +allow if { + utils.is_admin +} + +allow if { + input.auth.user.id == input.resource.owner.id +} From 9c4c5952adca552571be5521a4fb9aae162760e1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 26 Mar 2025 17:06:24 +0100 Subject: [PATCH 020/103] Update events REST API tests --- cvat/apps/redis_handler/serializers.py | 2 +- tests/python/rest_api/test_analytics.py | 130 +++++++++++++----- tests/python/rest_api/test_consensus.py | 1 + tests/python/rest_api/test_quality_control.py | 5 +- tests/python/rest_api/utils.py | 3 +- 5 files changed, 103 insertions(+), 38 deletions(-) diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 591ff24a6728..d928cc23bdab 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -44,7 +44,7 @@ class Meta(BasicUserSerializer.Meta): class RequestDataOperationSerializer(serializers.Serializer): type = serializers.CharField() - target = serializers.ChoiceField(choices=models.RequestTarget.choices) + target = serializers.CharField() project_id = serializers.IntegerField(required=False, allow_null=True) task_id = serializers.IntegerField(required=False, allow_null=True) job_id = serializers.IntegerField(required=False, allow_null=True) diff --git a/tests/python/rest_api/test_analytics.py b/tests/python/rest_api/test_analytics.py index 22d3520362e0..efaa85d786c9 100644 --- a/tests/python/rest_api/test_analytics.py +++ b/tests/python/rest_api/test_analytics.py @@ -13,12 +13,14 @@ from time import sleep import pytest +from cvat_sdk.api_client import ApiClient from dateutil import parser as datetime_parser -from shared.utils.config import delete_method, make_api_client, server_get +import shared.utils.s3 as s3 +from shared.utils.config import delete_method, get_method, make_api_client, server_get from shared.utils.helpers import generate_image_files -from .utils import create_task +from .utils import create_task, wait_and_download_v2, wait_background_request class TestGetAnalytics: @@ -148,27 +150,55 @@ def _wait_for_request_ids(self, event_filters): assert False, "Could not wait for expected request IDs" @staticmethod - def _export_events(endpoint, *, max_retries: int = 20, interval: float = 0.1, **kwargs): - query_id = "" - for _ in range(max_retries): + def _export_events( + api_client: ApiClient, + *, + api_version: int, + max_retries: int = 20, + interval: float = 0.1, + **kwargs, + ) -> bytes | None: + if api_version == 1: + endpoint = api_client.events_api.list_endpoint + query_id = "" + for _ in range(max_retries): + (_, response) = endpoint.call_with_http_info( + **kwargs, query_id=query_id, _parse_response=False + ) + if response.status == HTTPStatus.CREATED: + break + assert response.status == HTTPStatus.ACCEPTED + if not query_id: + response_json = json.loads(response.data) + query_id = response_json["query_id"] + sleep(interval) + + assert response.status == HTTPStatus.CREATED + (_, response) = endpoint.call_with_http_info( - **kwargs, query_id=query_id, _parse_response=False + **kwargs, query_id=query_id, action="download", _parse_response=False ) - if response.status == HTTPStatus.CREATED: - break - assert response.status == HTTPStatus.ACCEPTED - if not query_id: - response_json = json.loads(response.data) - query_id = response_json["query_id"] - sleep(interval) - assert response.status == HTTPStatus.CREATED - - (_, response) = endpoint.call_with_http_info( - **kwargs, query_id=query_id, action="download", _parse_response=False + assert response.status == HTTPStatus.OK + + return response.data + + assert api_version == 2 + + request_id, response = api_client.events_api.create_file_export( + **kwargs, _check_status=False ) - assert response.status == HTTPStatus.OK + assert response.status == HTTPStatus.ACCEPTED - return response.data + if "location" in kwargs and "cloud_storage_id" in kwargs: + background_request, response = wait_background_request( + api_client, rq_id=request_id.rq_id, max_retries=max_retries, interval=interval + ) + assert background_request.result_url is None + return None + + return wait_and_download_v2( + api_client, rq_id=request_id.rq_id, max_retries=max_retries, interval=interval + ) @staticmethod def _csv_to_dict(csv_data): @@ -190,11 +220,12 @@ def _filter_events(events, filters): return res - def _test_get_audit_logs_as_csv(self, **kwargs): + def _test_get_audit_logs_as_csv(self, *, api_version: int = 2, **kwargs): with make_api_client(self._USERNAME) as api_client: - return self._export_events(api_client.events_api.list_endpoint, **kwargs) + return self._export_events(api_client, api_version=api_version, **kwargs) - def test_entry_to_time_interval(self): + @pytest.mark.parametrize("api_version", [1, 2]) + def test_entry_to_time_interval(self, api_version: int): now = datetime.now(timezone.utc) to_datetime = now from_datetime = now - timedelta(minutes=3) @@ -204,7 +235,7 @@ def test_entry_to_time_interval(self): "to": to_datetime.isoformat(), } - data = self._test_get_audit_logs_as_csv(**query_params) + data = self._test_get_audit_logs_as_csv(api_version=api_version, **query_params) events = self._csv_to_dict(data) assert len(events) @@ -212,12 +243,13 @@ def test_entry_to_time_interval(self): event_timestamp = datetime_parser.isoparse(event["timestamp"]) assert from_datetime <= event_timestamp <= to_datetime - def test_filter_by_project(self): + @pytest.mark.parametrize("api_version", [1, 2]) + def test_filter_by_project(self, api_version: int): query_params = { "project_id": self.project_id, } - data = self._test_get_audit_logs_as_csv(**query_params) + data = self._test_get_audit_logs_as_csv(api_version=api_version, **query_params) events = self._csv_to_dict(data) filtered_events = self._filter_events(events, [("project_id", [str(self.project_id)])]) @@ -229,13 +261,14 @@ def test_filter_by_project(self): assert event_count["create:task"] == 2 assert event_count["create:job"] == 4 - def test_filter_by_task(self): + @pytest.mark.parametrize("api_version", [1, 2]) + def test_filter_by_task(self, api_version: int): for task_id in self.task_ids: query_params = { "task_id": task_id, } - data = self._test_get_audit_logs_as_csv(**query_params) + data = self._test_get_audit_logs_as_csv(api_version=api_version, **query_params) events = self._csv_to_dict(data) filtered_events = self._filter_events(events, [("task_id", [str(task_id)])]) @@ -246,20 +279,22 @@ def test_filter_by_task(self): assert event_count["create:task"] == 1 assert event_count["create:job"] == 2 - def test_filter_by_non_existent_project(self): + @pytest.mark.parametrize("api_version", [1, 2]) + def test_filter_by_non_existent_project(self, api_version: int): query_params = { "project_id": self.project_id + 100, } - data = self._test_get_audit_logs_as_csv(**query_params) + data = self._test_get_audit_logs_as_csv(api_version=api_version, **query_params) events = self._csv_to_dict(data) assert len(events) == 0 - def test_user_and_request_id_not_empty(self): + @pytest.mark.parametrize("api_version", [1, 2]) + def test_user_and_request_id_not_empty(self, api_version: int): query_params = { "project_id": self.project_id, } - data = self._test_get_audit_logs_as_csv(**query_params) + data = self._test_get_audit_logs_as_csv(api_version=api_version, **query_params) events = self._csv_to_dict(data) for event in events: @@ -272,7 +307,8 @@ def test_user_and_request_id_not_empty(self): assert request_id uuid.UUID(request_id) - def test_delete_project(self): + @pytest.mark.parametrize("api_version", [1, 2]) + def test_delete_project(self, api_version: int): response = delete_method("admin1", f"projects/{self.project_id}") assert response.status_code == HTTPStatus.NO_CONTENT @@ -299,7 +335,7 @@ def test_delete_project(self): "project_id": self.project_id, } - data = self._test_get_audit_logs_as_csv(**query_params) + data = self._test_get_audit_logs_as_csv(api_version=api_version, **query_params) events = self._csv_to_dict(data) filtered_events = self._filter_events(events, [("project_id", [str(self.project_id)])]) @@ -310,3 +346,31 @@ def test_delete_project(self): assert event_count["delete:project"] == 1 assert event_count["delete:task"] == 2 assert event_count["delete:job"] == 4 + + @pytest.mark.with_external_services + @pytest.mark.parametrize("api_version, allowed", [(1, False), (2, True)]) + @pytest.mark.parametrize("cloud_storage_id", [3]) # import/export bucket + def test_export_to_cloud( + self, api_version: int, allowed: bool, cloud_storage_id: int, cloud_storages + ): + query_params = { + "api_version": api_version, + "location": "cloud_storage", + "cloud_storage_id": cloud_storage_id, + "filename": "test.csv", + "task_id": self.task_ids[0], + } + if allowed: + data = self._test_get_audit_logs_as_csv(**query_params) + assert data is None + s3_client = s3.make_client(bucket=cloud_storages[cloud_storage_id]["resource"]) + data = s3_client.download_fileobj(query_params["filename"]) + events = self._csv_to_dict(data) + assert len(events) + else: + response = get_method(self._USERNAME, "events", **query_params) + assert response.status_code == HTTPStatus.BAD_REQUEST + assert ( + response.json()[0] + == "This endpoint does not support exporting events to cloud storage" + ) diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index 22ad4d45253c..33fa04dd3f7c 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -15,6 +15,7 @@ from cvat_sdk.api_client.api_client import ApiClient, Endpoint from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff + from shared.utils.config import USER_PASS, make_api_client from .utils import CollectionSimpleFilterTestBase, compare_annotations, wait_background_request diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py index 39d6eecba8ae..062d21554c0b 100644 --- a/tests/python/rest_api/test_quality_control.py +++ b/tests/python/rest_api/test_quality_control.py @@ -7,7 +7,7 @@ from copy import deepcopy from functools import partial from http import HTTPStatus -from itertools import groupby +from itertools import groupby, product from typing import Any, Callable, Optional import pytest @@ -16,8 +16,7 @@ from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff -from shared.utils.config import make_api_client, USER_PASS -from itertools import product +from shared.utils.config import USER_PASS, make_api_client from .utils import CollectionSimpleFilterTestBase, parse_frame_step, wait_background_request diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index e9475bdb9dde..5335e3535a5d 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -19,9 +19,10 @@ from cvat_sdk.api_client.exceptions import ForbiddenException from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff -from shared.utils.config import make_api_client from urllib3 import HTTPResponse +from shared.utils.config import make_api_client + def initialize_export(endpoint: Endpoint, *, expect_forbidden: bool = False, **kwargs) -> str: (_, response) = endpoint.call_with_http_info( From 06951a3ba4609908207eaebf6f28cd706ef61c19 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 26 Mar 2025 17:57:20 +0100 Subject: [PATCH 021/103] Update server schema --- cvat/schema.yml | 502 ++++++++++++++++-------------------------------- 1 file changed, 170 insertions(+), 332 deletions(-) diff --git a/cvat/schema.yml b/cvat/schema.yml index 148f2054ef79..5b8c4ca9717b 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -982,10 +982,10 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: | A consensus merge request has been enqueued, the request id is returned. - The request status can be checked by using common requests API: GET /api/requests/rq_id + The request status can be checked by using common requests API: GET /api/requests/ '400': description: Invalid or failed request, check the response data for details /api/consensus/settings: @@ -1190,6 +1190,7 @@ paths: tokenAuth: [] - signatureAuth: [] - basicAuth: [] + deprecated: true responses: '200': description: Download of file started @@ -1238,6 +1239,83 @@ paths: schema: $ref: '#/components/schemas/ClientEvents' description: '' + /api/events/file/export: + post: + operationId: events_create_file_export + summary: Initiate a process to export events + parameters: + - in: query + name: cloud_storage_id + schema: + type: integer + description: Storage id + - in: query + name: filename + schema: + type: string + description: Desired output file name + - in: query + name: from + schema: + type: string + format: date-time + description: Filter events after the datetime. If no 'from' or 'to' parameters + are passed, the last 30 days will be set. + - in: query + name: job_id + schema: + type: integer + description: Filter events by job ID + - in: query + name: location + schema: + type: string + enum: + - cloud_storage + - local + description: Where need to save events file + - in: query + name: org_id + schema: + type: integer + description: Filter events by organization ID + - in: query + name: project_id + schema: + type: integer + description: Filter events by project ID + - in: query + name: task_id + schema: + type: integer + description: Filter events by task ID + - in: query + name: to + schema: + type: string + format: date-time + description: Filter events before the datetime. If no 'from' or 'to' parameters + are passed, the last 30 days will be set. + - in: query + name: user_id + schema: + type: integer + description: Filter events by user ID + tags: + - events + security: + - sessionAuth: [] + csrfAuth: [] + tokenAuth: [] + - signatureAuth: [] + - basicAuth: [] + responses: + '202': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/RequestId' + description: '' /api/guides: post: operationId: guides_create @@ -2260,10 +2338,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/AnnotationFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/AnnotationFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -2277,72 +2355,30 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Uploading has been started '405': description: Format is not available put: operationId: jobs_update_annotations - description: |2 - - Utilizing this endpoint to check status of the import process is deprecated - in favor of the new requests API: - GET /api/requests/, where `rq_id` parameter is returned in the response - on initializing request. - summary: Replace job annotations / Get annotation import status + summary: Replace job annotations parameters: - - in: query - name: cloud_storage_id - schema: - type: integer - description: Storage id - deprecated: true - - in: query - name: filename - schema: - type: string - description: Annotation file name - deprecated: true - - in: query - name: format - schema: - type: string - description: |- - Input format name - You can get the list of supported formats at: - /server/annotation/formats - deprecated: true - in: path name: id schema: type: integer description: A unique integer value identifying this job. required: true - - in: query - name: location - schema: - type: string - enum: - - cloud_storage - - local - description: where to import the annotation from - deprecated: true - - in: query - name: rq_id - schema: - type: string - description: rq id - deprecated: true tags: - jobs requestBody: content: application/json: schema: - $ref: '#/components/schemas/JobAnnotationsUpdateRequest' + $ref: '#/components/schemas/LabeledDataRequest' multipart/form-data: schema: - $ref: '#/components/schemas/JobAnnotationsUpdateRequest' + $ref: '#/components/schemas/LabeledDataRequest' security: - sessionAuth: [] csrfAuth: [] @@ -2350,12 +2386,8 @@ paths: - signatureAuth: [] - basicAuth: [] responses: - '201': - description: Import has finished - '202': - description: Import is in progress - '405': - description: Format is not available + '200': + description: Annotations have been replaced patch: operationId: jobs_partial_update_annotations summary: Update job annotations @@ -2588,7 +2620,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Exporting has been started '405': description: Format is not available @@ -3690,7 +3722,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Creating a backup file has been started '400': description: Wrong query parameters were passed @@ -3700,69 +3732,13 @@ paths: /api/projects/{id}/dataset/: get: operationId: projects_retrieve_dataset - description: |2 - - Utilizing this endpoint to check the status of the process - of importing a project dataset from a file is deprecated. - In addition, this endpoint no longer handles the project dataset export process. - - Consider using new API: - - `POST /api/projects//dataset/export/?save_images=True` to initiate export process - - `GET /api/requests/` to check process status - - `GET result_url` to download a prepared file - - Where: - - `rq_id` can be found in the response on initializing request - - `result_url` can be found in the response on checking status request - summary: Check dataset import status parameters: - - in: query - name: action - schema: - type: string - enum: - - import_status - description: Used to check the import status - deprecated: true - - in: query - name: cloud_storage_id - schema: - type: integer - description: This parameter is no longer supported - deprecated: true - - in: query - name: filename - schema: - type: string - description: This parameter is no longer supported - deprecated: true - - in: query - name: format - schema: - type: string - description: This parameter is no longer supported - deprecated: true - in: path name: id schema: type: integer description: A unique integer value identifying this project. required: true - - in: query - name: location - schema: - type: string - enum: - - cloud_storage - - local - description: This parameter is no longer supported - deprecated: true - - in: query - name: rq_id - schema: - type: string - description: This parameter is no longer supported - required: true tags: - projects security: @@ -3771,10 +3747,13 @@ paths: tokenAuth: [] - signatureAuth: [] - basicAuth: [] - deprecated: true responses: - '410': - description: API endpoint no longer supports exporting datasets + '200': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/ProjectRead' + description: '' post: operationId: projects_create_dataset description: |2 @@ -3816,24 +3795,16 @@ paths: - cloud_storage - local description: Where to import the dataset from - - in: query - name: use_default_location - schema: - type: boolean - default: true - description: Use the location that was configured in the project to import - annotations - deprecated: true tags: - projects requestBody: content: application/json: schema: - $ref: '#/components/schemas/DatasetWriteRequest' + $ref: '#/components/schemas/UploadedZipFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/DatasetWriteRequest' + $ref: '#/components/schemas/UploadedZipFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -3845,7 +3816,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Importing has been started '400': description: Failed to import dataset @@ -3912,7 +3883,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Exporting has been started '405': description: Format is not available @@ -3949,14 +3920,14 @@ paths: The backup import process is as follows: - The first request POST /api/projects/backup will initiate file upload and will create - the rq job on the server in which the process of a project creating from an uploaded backup - will be carried out. + The first request POST /api/projects/backup schedules a background job on the server + in which the process of a project creating from an uploaded backup is carried out. + + To check the status of the import process, use GET /api/requests/rq_id, + where rq_id is request ID obtained from the response of the previous request. - After initiating the backup upload, you will receive an rq_id parameter. - Make sure to include this parameter as a query parameter in your subsequent requests - to track the status of the project creation. - Once the project has been successfully created, the server will return the id of the newly created project. + Once the import completes successfully, the response will contain the ID + of the newly created project in the result_id field. summary: Recreate a project from a backup parameters: - in: header @@ -3993,21 +3964,16 @@ paths: schema: type: integer description: Organization identifier - - in: query - name: rq_id - schema: - type: string - description: rq id tags: - projects requestBody: content: application/json: schema: - $ref: '#/components/schemas/BackupWriteRequest' + $ref: '#/components/schemas/UploadedZipFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/BackupWriteRequest' + $ref: '#/components/schemas/UploadedZipFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -4015,14 +3981,12 @@ paths: - signatureAuth: [] - basicAuth: [] responses: - '201': - description: The project has been imported '202': content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' - description: Importing a backup file has been started + $ref: '#/components/schemas/RequestId' + description: Import of a backup file has started /api/quality/conflicts: get: operationId: quality_list_conflicts @@ -4264,7 +4228,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: | A quality report request has been enqueued, the request id is returned. The request status can be checked at this endpoint by passing the rq_id @@ -4465,11 +4429,6 @@ paths: description: A simple equality filter for the action field schema: type: string - enum: - - autoannotate - - create - - import - - export - name: filter required: false in: query @@ -4479,7 +4438,12 @@ paths: Details about the syntax used can be found at the link: https://jsonlogic.com/ - Available filter_fields: ['status', 'project_id', 'task_id', 'job_id', 'action', 'target']. + Available filter_fields: ['status', 'project_id', 'task_id', 'job_id', 'action', 'target', 'subresource', 'format']. + schema: + type: string + - name: format + in: query + description: A simple equality filter for the format field schema: type: string - name: job_id @@ -4526,15 +4490,16 @@ paths: - started - failed - finished + - name: subresource + in: query + description: A simple equality filter for the subresource field + schema: + type: string - name: target in: query description: A simple equality filter for the target field schema: type: string - enum: - - project - - task - - job - name: task_id in: query description: A simple equality filter for the task_id field @@ -5109,61 +5074,14 @@ paths: /api/tasks/{id}/annotations/: get: operationId: tasks_retrieve_annotations - description: | - Deprecation warning: - - Utilizing this endpoint to export annotations as a dataset in - a specific format is no longer possible. - - Consider using new API: - - `POST /api/tasks//dataset/export?save_images=False` to initiate export process - - `GET /api/requests/` to check process status, - where `rq_id` is request id returned on initializing request - - `GET result_url` to download a prepared file, - where `result_url` can be found in the response on checking status request summary: Get task annotations parameters: - - in: query - name: action - schema: - type: string - enum: - - download - description: This parameter is no longer supported - deprecated: true - - in: query - name: cloud_storage_id - schema: - type: integer - description: This parameter is no longer supported - deprecated: true - - in: query - name: filename - schema: - type: string - description: This parameter is no longer supported - deprecated: true - - in: query - name: format - schema: - type: string - description: This parameter is no longer supported - deprecated: true - in: path name: id schema: type: integer description: A unique integer value identifying this task. required: true - - in: query - name: location - schema: - type: string - enum: - - cloud_storage - - local - description: This parameter is no longer supported - deprecated: true tags: - tasks security: @@ -5237,10 +5155,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/TaskAnnotationsWriteRequest' + $ref: '#/components/schemas/UploadedFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/TaskAnnotationsWriteRequest' + $ref: '#/components/schemas/UploadedFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -5254,52 +5172,30 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Uploading has been started '405': description: Format is not available put: operationId: tasks_update_annotations - description: |2 - - Utilizing this endpoint to check status of the import process is deprecated - in favor of the new requests API: - - GET /api/requests/, where `rq_id` parameter is returned in the response - on initializing request. - summary: Replace task annotations / Get annotation import status + summary: Replace task annotations parameters: - - in: query - name: format - schema: - type: string - description: |- - Input format name - You can get the list of supported formats at: - /server/annotation/formats - deprecated: true - in: path name: id schema: type: integer description: A unique integer value identifying this task. required: true - - in: query - name: rq_id - schema: - type: string - description: rq id - deprecated: true tags: - tasks requestBody: content: application/json: schema: - $ref: '#/components/schemas/TaskAnnotationsUpdateRequest' + $ref: '#/components/schemas/LabeledDataRequest' multipart/form-data: schema: - $ref: '#/components/schemas/TaskAnnotationsUpdateRequest' + $ref: '#/components/schemas/LabeledDataRequest' security: - sessionAuth: [] csrfAuth: [] @@ -5307,12 +5203,8 @@ paths: - signatureAuth: [] - basicAuth: [] responses: - '201': - description: Import has finished - '202': - description: Import is in progress - '405': - description: Format is not available + '200': + description: Annotations have been replaced patch: operationId: tasks_partial_update_annotations summary: Update task annotations @@ -5422,7 +5314,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Creating a backup file has been started '400': description: Wrong query parameters were passed @@ -5703,7 +5595,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' + $ref: '#/components/schemas/RequestId' description: Exporting has been started '405': description: Format is not available @@ -5855,14 +5747,14 @@ paths: The backup import process is as follows: - The first request POST /api/tasks/backup will initiate file upload and will create - the rq job on the server in which the process of a task creating from an uploaded backup - will be carried out. + The first request POST /api/tasks/backup creates a background job on the server + in which the process of a task creating from an uploaded backup is carried out. + + To check the status of the import process, use GET /api/requests/rq_id, + where rq_id is request ID obtained from the response of the previous request. - After initiating the backup upload, you will receive an rq_id parameter. - Make sure to include this parameter as a query parameter in your subsequent requests - to track the status of the task creation. - Once the task has been successfully created, the server will return the id of the newly created task. + Once the import completes successfully, the response will contain the ID + of the newly created task in the result_id field. summary: Recreate a task from a backup parameters: - in: header @@ -5899,21 +5791,16 @@ paths: schema: type: integer description: Organization identifier - - in: query - name: rq_id - schema: - type: string - description: rq id tags: - tasks requestBody: content: application/json: schema: - $ref: '#/components/schemas/TaskFileRequest' + $ref: '#/components/schemas/UploadedZipFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/TaskFileRequest' + $ref: '#/components/schemas/UploadedZipFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -5921,14 +5808,12 @@ paths: - signatureAuth: [] - basicAuth: [] responses: - '201': - description: The task has been imported '202': content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RqId' - description: Importing a backup file has been started + $ref: '#/components/schemas/RequestId' + description: Import of a backup file has started /api/users: get: operationId: users_list @@ -6599,14 +6484,6 @@ components: * `mismatching_attributes` - MISMATCHING_ATTRIBUTES * `mismatching_groups` - MISMATCHING_GROUPS * `covered_annotation` - COVERED_ANNOTATION - AnnotationFileRequest: - type: object - properties: - annotation_file: - type: string - format: binary - required: - - annotation_file AnnotationGuideRead: type: object properties: @@ -6766,10 +6643,6 @@ components: required: - spec_id - value - BackupWriteRequest: - oneOf: - - $ref: '#/components/schemas/ProjectFileRequest' - nullable: true BasicOrganization: type: object properties: @@ -7294,17 +7167,9 @@ components: - image_quality DataResponse: oneOf: - - $ref: '#/components/schemas/RqId' + - $ref: '#/components/schemas/RequestId' - type: string format: binary - DatasetFileRequest: - type: object - properties: - dataset_file: - type: string - format: binary - required: - - dataset_file DatasetFormat: type: object properties: @@ -7342,10 +7207,6 @@ components: required: - exporters - importers - DatasetWriteRequest: - oneOf: - - $ref: '#/components/schemas/DatasetFileRequest' - nullable: true Event: type: object properties: @@ -7813,10 +7674,6 @@ components: count: type: integer readOnly: true - JobAnnotationsUpdateRequest: - oneOf: - - $ref: '#/components/schemas/LabeledDataRequest' - - $ref: '#/components/schemas/AnnotationFileRequest' JobRead: type: object properties: @@ -9596,14 +9453,6 @@ components: description: |- * `image_size` - IMAGE_SIZE * `group_bbox_size` - GROUP_BBOX_SIZE - ProjectFileRequest: - type: object - properties: - project_file: - type: string - format: binary - required: - - project_file ProjectRead: type: object properties: @@ -10076,7 +9925,7 @@ components: type: type: string target: - $ref: '#/components/schemas/RequestDataOperationTargetEnum' + type: string project_id: type: integer nullable: true @@ -10095,16 +9944,14 @@ components: required: - target - type - RequestDataOperationTargetEnum: - enum: - - project - - task - - job - type: string - description: |- - * `project` - Project - * `task` - Task - * `job` - Job + RequestId: + type: object + properties: + rq_id: + type: string + description: Request id + required: + - rq_id RequestStatus: enum: - queued @@ -10135,14 +9982,6 @@ components: * `supervisor` - Supervisor * `maintainer` - Maintainer * `owner` - Owner - RqId: - type: object - properties: - rq_id: - type: string - description: Request id - required: - - rq_id RqStatus: type: object properties: @@ -10523,23 +10362,6 @@ components: * `accuracy` - ACCURACY * `precision` - PRECISION * `recall` - RECALL - TaskAnnotationsUpdateRequest: - oneOf: - - $ref: '#/components/schemas/LabeledDataRequest' - - $ref: '#/components/schemas/AnnotationFileRequest' - nullable: true - TaskAnnotationsWriteRequest: - oneOf: - - $ref: '#/components/schemas/AnnotationFileRequest' - nullable: true - TaskFileRequest: - type: object - properties: - task_file: - type: string - format: binary - required: - - task_file TaskRead: type: object properties: @@ -10851,6 +10673,22 @@ components: required: - frame - type + UploadedFileRequest: + type: object + properties: + file: + type: string + format: binary + required: + - file + UploadedZipFileRequest: + type: object + properties: + file: + type: string + format: binary + required: + - file User: type: object properties: From 69b9d30373c931c475ad76f3d480cab89e6ecbc1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 13:11:10 +0100 Subject: [PATCH 022/103] Cleanup unit tests --- .../tests/test_rest_api_formats.py | 241 +++++++++--------- cvat/apps/engine/tests/test_rest_api.py | 108 ++------ cvat/apps/engine/tests/test_rest_api_3D.py | 42 ++- cvat/apps/engine/tests/utils.py | 83 +++++- 4 files changed, 245 insertions(+), 229 deletions(-) diff --git a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py index 989541d0b27a..c71456021188 100644 --- a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py +++ b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py @@ -40,7 +40,12 @@ from cvat.apps.dataset_manager.util import get_export_cache_lock from cvat.apps.dataset_manager.views import export from cvat.apps.engine.models import Task -from cvat.apps.engine.tests.utils import ExportApiTestBase, ForceLogin, get_paginated_collection +from cvat.apps.engine.tests.utils import ( + ExportApiTestBase, + ForceLogin, + ImportApiTestBase, + get_paginated_collection, +) projects_path = osp.join(osp.dirname(__file__), 'assets', 'projects.json') with open(projects_path) as file: @@ -139,7 +144,7 @@ def compare_datasets(expected: Dataset, actual: Dataset): ) -class _DbTestBase(ExportApiTestBase): +class _DbTestBase(ExportApiTestBase, ImportApiTestBase): @classmethod def setUpTestData(cls): cls.create_db_users() @@ -305,12 +310,6 @@ def _create_annotations_in_job(self, task, job_id, name_ann, key_get_values): response = self._put_api_v2_job_id_annotations(job_id, tmp_annotations) self.assertEqual(response.status_code, status.HTTP_200_OK, msg=response.json()) - def _upload_file(self, url, data, user): - response = self._put_request(url, user, data={"annotation_file": data}) - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - response = self._put_request(url, user) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - def _check_downloaded_file(self, file_name): if not osp.exists(file_name): raise FileNotFoundError(f"File '{file_name}' was not downloaded") @@ -318,15 +317,6 @@ def _check_downloaded_file(self, file_name): def _generate_url_remove_tasks_annotations(self, task_id): return f"/api/tasks/{task_id}/annotations" - def _generate_url_upload_tasks_annotations(self, task_id, upload_format_name): - return f"/api/tasks/{task_id}/annotations?format={upload_format_name}" - - def _generate_url_upload_job_annotations(self, job_id, upload_format_name): - return f"/api/jobs/{job_id}/annotations?format={upload_format_name}" - - def _generate_url_upload_project_dataset(self, project_id, format_name): - return f"/api/projects/{project_id}/dataset?format={format_name}" - def _remove_annotations(self, url, user): response = self._delete_request(url, user) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) @@ -351,12 +341,9 @@ def test_api_v2_dump_and_upload_annotations_with_objects_type_is_shape(self): dump_formats = dm.views.get_export_formats() upload_formats = dm.views.get_import_formats() expected = { - self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED,'file_exists': True, 'annotation_loaded': True}, - self.user: {'name': 'user', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True, 'annotation_loaded': True}, - None: {'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'create code': status.HTTP_401_UNAUTHORIZED, - 'accept code': status.HTTP_401_UNAUTHORIZED, 'file_exists': False, 'annotation_loaded': False}, + self.admin: {'name': 'admin', 'file_exists': True, 'annotation_loaded': True}, + self.user: {'name': 'user', 'file_exists': True, 'annotation_loaded': True}, + None: {'name': 'none', 'file_exists': False, 'annotation_loaded': False}, } with TestDir() as test_dir: @@ -432,13 +419,15 @@ def test_api_v2_dump_and_upload_annotations_with_objects_type_is_shape(self): else: task = self._create_task(tasks["main"], images) task_id = task["id"] - url = self._generate_url_upload_tasks_annotations(task_id, upload_format_name) + + expected_4xx_status_code = None if user else status.HTTP_401_UNAUTHORIZED with open(file_zip_name, 'rb') as binary_file: - response = self._put_request(url, user, data={"annotation_file": binary_file}) - self.assertEqual(response.status_code, edata['accept code']) - response = self._put_request(url, user) - self.assertEqual(response.status_code, edata['create code']) + self._import_task_annotations( + user, task_id, binary_file, + query_params={"format": upload_format_name}, + expected_4xx_status_code=expected_4xx_status_code + ) def test_api_v2_dump_annotations_with_objects_type_is_track(self): test_name = self._testMethodName @@ -446,12 +435,9 @@ def test_api_v2_dump_annotations_with_objects_type_is_track(self): dump_formats = dm.views.get_export_formats() upload_formats = dm.views.get_import_formats() expected = { - self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True, 'annotation_loaded': True}, - self.user: {'name': 'user', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True, 'annotation_loaded': True}, - None: {'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'create code': status.HTTP_401_UNAUTHORIZED, - 'accept code': status.HTTP_401_UNAUTHORIZED, 'file_exists': False, 'annotation_loaded': False}, + self.admin: {'name': 'admin', 'file_exists': True, 'annotation_loaded': True}, + self.user: {'name': 'user', 'file_exists': True, 'annotation_loaded': True}, + None: {'name': 'none', 'file_exists': False, 'annotation_loaded': False}, } with TestDir() as test_dir: @@ -524,24 +510,21 @@ def test_api_v2_dump_annotations_with_objects_type_is_track(self): else: task = self._create_task(tasks["main"], video) task_id = task["id"] - url = self._generate_url_upload_tasks_annotations(task_id, upload_format_name) with open(file_zip_name, 'rb') as binary_file: - response = self._put_request(url, user, data={"annotation_file": binary_file}) - self.assertEqual(response.status_code, edata['accept code']) - response = self._put_request(url, user) - self.assertEqual(response.status_code, edata['create code']) + self._import_task_annotations( + user, task_id, binary_file, + query_params={"format": upload_format_name}, + expected_4xx_status_code=None if user else status.HTTP_401_UNAUTHORIZED + ) def test_api_v2_dump_tag_annotations(self): dump_format_name = "CVAT for images 1.1" test_cases = ['all', 'first'] expected = { - self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - self.user: {'name': 'user', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - None: {'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'create code': status.HTTP_401_UNAUTHORIZED, - 'accept code': status.HTTP_401_UNAUTHORIZED, 'file_exists': False}, + self.admin: {'name': 'admin', 'file_exists': True}, + self.user: {'name': 'user', 'file_exists': True}, + None: {'name': 'none', 'file_exists': False}, } export_params = { "format": dump_format_name, @@ -599,18 +582,24 @@ def test_api_v2_dump_and_upload_annotations_with_objects_are_different_images(se url = self._generate_url_remove_tasks_annotations(task_id) self._remove_annotations(url, self.admin) + if upload_type == "task": - url_upload = self._generate_url_upload_tasks_annotations(task_id, "CVAT 1.1") + with open(file_zip_name, 'rb') as binary_file: + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": "CVAT 1.1"}, + ) else: jobs = self._get_jobs(task_id) - url_upload = self._generate_url_upload_job_annotations(jobs[0]["id"], "CVAT 1.1") - - with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url_upload, binary_file, self.admin) + with open(file_zip_name, 'rb') as binary_file: + self._import_job_annotations( + self.admin, jobs[0]["id"], binary_file, + query_params={"format": "CVAT 1.1"}, + ) - response = self._get_request(f"/api/tasks/{task_id}/annotations", self.admin) - self.assertEqual(len(response.data["shapes"]), 2) - self.assertEqual(len(response.data["tracks"]), 0) + response = self._get_request(f"/api/tasks/{task_id}/annotations", self.admin) + self.assertEqual(len(response.data["shapes"]), 2) + self.assertEqual(len(response.data["tracks"]), 0) def test_api_v2_dump_and_upload_annotations_with_objects_are_different_video(self): test_name = self._testMethodName @@ -641,18 +630,23 @@ def test_api_v2_dump_and_upload_annotations_with_objects_are_different_video(sel url = self._generate_url_remove_tasks_annotations(task_id) self._remove_annotations(url, self.admin) if upload_type == "task": - url_upload = self._generate_url_upload_tasks_annotations(task_id, "CVAT 1.1") + with open(file_zip_name, 'rb') as binary_file: + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": "CVAT 1.1"}, + ) else: jobs = self._get_jobs(task_id) - url_upload = self._generate_url_upload_job_annotations(jobs[0]["id"], "CVAT 1.1") - - with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url_upload, binary_file, self.admin) - self.assertEqual(osp.exists(file_zip_name), True) + with open(file_zip_name, 'rb') as binary_file: + self._import_job_annotations( + self.admin, jobs[0]["id"], binary_file, + query_params={"format": "CVAT 1.1"}, + ) - response = self._get_request(f"/api/tasks/{task_id}/annotations", self.admin) - self.assertEqual(len(response.data["shapes"]), 0) - self.assertEqual(len(response.data["tracks"]), 2) + self.assertEqual(osp.exists(file_zip_name), True) + response = self._get_request(f"/api/tasks/{task_id}/annotations", self.admin) + self.assertEqual(len(response.data["shapes"]), 0) + self.assertEqual(len(response.data["tracks"]), 2) def test_api_v2_dump_and_upload_with_objects_type_is_track_and_outside_property(self): test_name = self._testMethodName @@ -671,8 +665,10 @@ def test_api_v2_dump_and_upload_with_objects_type_is_track_and_outside_property( self.assertEqual(osp.exists(file_zip_name), True) with open(file_zip_name, 'rb') as binary_file: - url = self._generate_url_upload_tasks_annotations(task_id, "CVAT 1.1") - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": "CVAT 1.1"}, + ) def test_api_v2_dump_and_upload_with_objects_type_is_track_and_keyframe_property(self): test_name = self._testMethodName @@ -693,8 +689,10 @@ def test_api_v2_dump_and_upload_with_objects_type_is_track_and_keyframe_property self.assertEqual(osp.exists(file_zip_name), True) with open(file_zip_name, 'rb') as binary_file: - url = self._generate_url_upload_tasks_annotations(task_id, "CVAT 1.1") - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": "CVAT 1.1"}, + ) def test_api_v2_dump_upload_annotations_from_several_jobs(self): test_name = self._testMethodName @@ -718,9 +716,11 @@ def test_api_v2_dump_upload_annotations_from_several_jobs(self): # remove annotations url = self._generate_url_remove_tasks_annotations(task_id) self._remove_annotations(url, self.admin) - url = self._generate_url_upload_tasks_annotations(task_id, "CVAT 1.1") with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": "CVAT 1.1"}, + ) def test_api_v2_dump_annotations_from_several_jobs(self): test_name = self._testMethodName @@ -752,21 +752,20 @@ def test_api_v2_dump_annotations_from_several_jobs(self): # remove annotations url = self._generate_url_remove_tasks_annotations(task_id) self._remove_annotations(url, self.admin) - url = self._generate_url_upload_tasks_annotations(task_id, "CVAT 1.1") with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": "CVAT 1.1"}, + ) def test_api_v2_export_dataset(self): test_name = self._testMethodName dump_formats = dm.views.get_export_formats() expected = { - self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - self.user: {'name': 'user', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - None: {'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'create code': status.HTTP_401_UNAUTHORIZED, - 'accept code': status.HTTP_401_UNAUTHORIZED, 'file_exists': False}, + self.admin: {'name': 'admin', 'file_exists': True}, + self.user: {'name': 'user', 'file_exists': True}, + None: {'name': 'none', 'file_exists': False}, } with TestDir() as test_dir: @@ -842,14 +841,11 @@ def test_api_v2_dump_empty_frames(self): task = self._create_task(tasks["no attributes"], images) task_id = task["id"] - url = self._generate_url_upload_tasks_annotations(task_id, upload_format_name) - with open(file_zip_name, 'rb') as binary_file: - response = self._put_request(url, self.admin, data={"annotation_file": binary_file}) - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - response = self._put_request(url, self.admin) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertIsNone(response.data) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": upload_format_name}, + ) def test_api_v2_rewriting_annotations(self): test_name = self._testMethodName @@ -905,10 +901,12 @@ def test_api_v2_rewriting_annotations(self): dump_format_name = "CVAT 1.1" elif dump_format_name == "Ultralytics YOLO Detection Track 1.0": dump_format_name = "Ultralytics YOLO Detection 1.0" - url = self._generate_url_upload_tasks_annotations(task_id, dump_format_name) with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": dump_format_name}, + ) task_ann = TaskAnnotation(task_id) task_ann.init_from_db() @@ -946,9 +944,11 @@ def test_api_v2_tasks_annotations_dump_and_upload_many_jobs_with_datumaro(self): self._remove_annotations(url, self.admin) # upload annotations - url = self._generate_url_upload_tasks_annotations(task_id, upload_format_name) with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": upload_format_name}, + ) # equals annotations data_from_task_after_upload = self._get_data_from_task(task_id, include_images) @@ -1022,9 +1022,12 @@ def test_api_v2_tasks_annotations_dump_and_upload_with_datumaro(self): upload_format_name = 'Ultralytics YOLO Detection 1.0' else: upload_format_name = dump_format_name - url = self._generate_url_upload_tasks_annotations(task_id, upload_format_name) + with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": upload_format_name}, + ) # equals annotations data_from_task_after_upload = self._get_data_from_task(task_id, include_images) @@ -1087,9 +1090,11 @@ def test_api_v2_check_widerface_with_all_attributes(self): self._remove_annotations(url, self.admin) # upload annotations - url = self._generate_url_upload_tasks_annotations(task_id, upload_format_name) with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": upload_format_name}, + ) # equals annotations data_from_task_after_upload = self._get_data_from_task(task_id, include_images) @@ -1123,9 +1128,11 @@ def test_api_v2_check_mot_with_shapes_only(self): self._remove_annotations(url, self.admin) # upload annotations - url = self._generate_url_upload_tasks_annotations(task_id, format_name) with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": format_name}, + ) # equals annotations data_from_task_after_upload = self._get_data_from_task(task_id, include_images) @@ -1160,9 +1167,11 @@ def test_api_v2_check_attribute_import_in_tracks(self): self._remove_annotations(url, self.admin) # upload annotations - url = self._generate_url_upload_tasks_annotations(task_id, upload_format_name) with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": upload_format_name}, + ) # equals annotations data_from_task_after_upload = self._get_data_from_task(task_id, include_images) @@ -1206,9 +1215,11 @@ def test_api_v2_check_skeleton_tracks_with_missing_shapes(self): self._remove_annotations(url, self.admin) # upload annotations - url = self._generate_url_upload_tasks_annotations(task_id, format_name) with open(file_zip_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, + query_params={"format": format_name}, + ) class ExportBehaviorTest(_DbTestBase): @@ -1584,10 +1595,10 @@ def patched_osp_exists(path: str): with ( patch( - "cvat.apps.engine.background.get_export_cache_lock", + "cvat.apps.redis_handler.background.get_export_cache_lock", new=self.patched_get_export_cache_lock, ), - patch("cvat.apps.engine.background.osp.exists") as mock_osp_exists, + patch("cvat.apps.redis_handler.background.osp.exists") as mock_osp_exists, TemporaryDirectory() as temp_dir, ): mock_osp_exists.side_effect = patched_osp_exists @@ -2030,12 +2041,9 @@ def test_api_v2_export_import_dataset(self): upload_formats = dm.views.get_import_formats() expected = { - self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - self.user: {'name': 'user', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - None: {'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'create code': status.HTTP_401_UNAUTHORIZED, - 'accept code': status.HTTP_401_UNAUTHORIZED, 'file_exists': False}, + self.admin: {'name': 'admin', 'file_exists': True}, + self.user: {'name': 'user', 'file_exists': True}, + None: {'name': 'none', 'file_exists': False}, } with TestDir() as test_dir: @@ -2095,24 +2103,22 @@ def test_api_v2_export_import_dataset(self): project['labels'] = tasks[upload_format_name]['labels'] project = self._create_project(project) file_zip_name = osp.join(test_dir, f"{test_name}_{edata['name']}_{upload_format_name}.zip") - url = self._generate_url_upload_project_dataset(project['id'], upload_format_name) if osp.exists(file_zip_name): with open(file_zip_name, 'rb') as binary_file: - response = self._post_request(url, user, data={"dataset_file": binary_file}) - self.assertEqual(response.status_code, edata['accept code']) + self._import_project_dataset( + self.admin, project['id'], binary_file, + query_params={"format": upload_format_name}, + ) def test_api_v2_export_annotations(self): test_name = self._testMethodName dump_formats = dm.views.get_export_formats() expected = { - self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - self.user: {'name': 'user', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, - 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True}, - None: {'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'create code': status.HTTP_401_UNAUTHORIZED, - 'accept code': status.HTTP_401_UNAUTHORIZED, 'file_exists': False}, + self.admin: {'name': 'admin', 'file_exists': True}, + self.user: {'name': 'user', 'file_exists': True}, + None: {'name': 'none', 'file_exists': False}, } with TestDir() as test_dir: @@ -2183,11 +2189,12 @@ def test_api_v2_dump_upload_annotations_with_objects_type_is_track(self): # Upload annotations with objects type is track project = self._create_project(project_dict) - url = self._generate_url_upload_project_dataset(project["id"], upload_format_name) with open(file_zip_name, 'rb') as binary_file: - response = self._post_request(url, user, data={"dataset_file": binary_file}) - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) + self._import_project_dataset( + user, project["id"], binary_file, + query_params={"format": upload_format_name}, + ) # equals annotations new_task = self._get_tasks(project["id"])[0] diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 5b9e6fa6691c..e0224b2fa9a3 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -66,6 +66,7 @@ ApiTestBase, ExportApiTestBase, ForceLogin, + ImportApiTestBase, generate_image_file, generate_video_file, get_paginated_collection, @@ -1322,7 +1323,7 @@ def test_api_v2_projects_id_tasks_no_auth(self): self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) -class ProjectBackupAPITestCase(ExportApiTestBase): +class ProjectBackupAPITestCase(ExportApiTestBase, ImportApiTestBase): @classmethod def setUpTestData(cls): create_db_users(cls) @@ -1620,12 +1621,6 @@ def _create_projects(cls): cls._create_tasks(db_project) cls.projects.append(db_project) - def _run_api_v2_projects_import(self, user, data): - with ForceLogin(user, self.client): - response = self.client.post('/api/projects/backup', data=data, format="multipart") - - return response - def _run_api_v2_projects_id(self, pid, user): with ForceLogin(user, self.client): response = self.client.get('/api/projects/{}'.format(pid), format="json") @@ -1654,18 +1649,13 @@ def _run_api_v2_projects_id_export_import(self, user): self.assertTrue(response.streaming) content = io.BytesIO(b"".join(response.streaming_content)) content.seek(0) + content.name = "file.zip" - uploaded_data = { - "project_file": content, - } - response = self._run_api_v2_projects_import(user, uploaded_data) - self.assertEqual(response.status_code, expected_4xx_status_code or status.HTTP_202_ACCEPTED) - if response.status_code == status.HTTP_202_ACCEPTED: - rq_id = response.data["rq_id"] - response = self._run_api_v2_projects_import(user, {"rq_id": rq_id}) - self.assertEqual(response.status_code, expected_4xx_status_code or status.HTTP_201_CREATED) + created_project_id = self._import_project_backup(user, content, expected_4xx_status_code=expected_4xx_status_code) + + if not expected_4xx_status_code: original_project = self._run_api_v2_projects_id(pid, user) - imported_project = self._run_api_v2_projects_id(response.data["id"], user) + imported_project = self._run_api_v2_projects_id(created_project_id, user) compare_objects( self=self, obj1=original_project, @@ -1882,7 +1872,7 @@ def test_api_v2_projects_remove_task_export(self): self._check_xml(pid, user, 3) -class ProjectImportExportAPITestCase(ExportApiTestBase): +class ProjectImportExportAPITestCase(ExportApiTestBase, ImportApiTestBase): def setUp(self) -> None: super().setUp() self.tasks = [] @@ -1999,16 +1989,6 @@ def _create_project(project_data): for data in project_data: _create_project(data) - def _run_api_v2_projects_id_dataset_import(self, pid, user, data, f): - with ForceLogin(user, self.client): - response = self.client.post("/api/projects/{}/dataset?format={}".format(pid, f), data=data, format="multipart") - return response - - def _run_api_v2_projects_id_dataset_import_status(self, pid, user, rq_id): - with ForceLogin(user, self.client): - response = self.client.get("/api/projects/{}/dataset?action=import_status&rq_id={}".format(pid, rq_id), format="json") - return response - def test_api_v2_projects_id_export_import(self): self._create_projects() self._create_tasks() @@ -2025,16 +2005,8 @@ def test_api_v2_projects_id_export_import(self): tmp_file.write(b"".join(response.streaming_content)) tmp_file.seek(0) - import_data = { - "dataset_file": tmp_file, - } - - response = self._run_api_v2_projects_id_dataset_import(pid_import, self.owner, import_data, "CVAT 1.1") - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) + self._import_project_dataset(self.owner, pid_import, tmp_file, query_params={"format": "CVAT 1.1"}) - rq_id = response.data.get('rq_id') - response = self._run_api_v2_projects_id_dataset_import_status(pid_import, self.owner, rq_id) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) def tearDown(self): for task in self.tasks: @@ -2797,7 +2769,7 @@ def test_api_v2_tasks_no_auth(self): } self._check_api_v2_tasks(None, data) -class TaskImportExportAPITestCase(ExportApiTestBase): +class TaskImportExportAPITestCase(ExportApiTestBase, ImportApiTestBase): def setUp(self): super().setUp() self.tasks = [] @@ -3110,11 +3082,6 @@ def _create_task(task_data, media_data): for media in self.media_data: _create_task(data, media) - def _run_api_v2_tasks_id_import(self, user, data): - with ForceLogin(user, self.client): - response = self.client.post('/api/tasks/backup', data=data, format="multipart") - - return response def _run_api_v2_tasks_id(self, tid, user): with ForceLogin(user, self.client): @@ -3140,18 +3107,13 @@ def _run_api_v2_tasks_id_export_import(self, user): self.assertTrue(response.streaming) content = io.BytesIO(b"".join(response.streaming_content)) content.seek(0) + content.name = "file.zip" + + created_task_id = self._import_task_backup(user, content, expected_4xx_status_code=expected_4xx_status_code) - uploaded_data = { - "task_file": content, - } - response = self._run_api_v2_tasks_id_import(user, uploaded_data) - self.assertEqual(response.status_code, expected_4xx_status_code or status.HTTP_202_ACCEPTED) if user is not self.somebody and user is not self.user and user is not self.annotator: - rq_id = response.data["rq_id"] - response = self._run_api_v2_tasks_id_import(user, {"rq_id": rq_id}) - self.assertEqual(response.status_code, expected_4xx_status_code or status.HTTP_201_CREATED) original_task = self._run_api_v2_tasks_id(tid, user) - imported_task = self._run_api_v2_tasks_id(response.data["id"], user) + imported_task = self._run_api_v2_tasks_id(created_task_id, user) compare_objects( self=self, obj1=original_task, @@ -5495,7 +5457,7 @@ def test_api_v2_jobs_id_annotations_somebody(self): def test_api_v2_jobs_id_annotations_no_auth(self): self._run_api_v2_jobs_id_annotations(self.user, self.user, None) -class TaskAnnotationAPITestCase(ExportApiTestBase, JobAnnotationAPITestCase): +class TaskAnnotationAPITestCase(ExportApiTestBase, ImportApiTestBase, JobAnnotationAPITestCase): def _put_api_v2_tasks_id_annotations(self, pk, user, data): with ForceLogin(user, self.client): response = self.client.put("/api/tasks/{}/annotations".format(pk), @@ -5524,16 +5486,6 @@ def _patch_api_v2_tasks_id_annotations(self, pk, user, action, data): return response - def _upload_api_v2_tasks_id_annotations(self, pk, user, data, query_params=""): - with ForceLogin(user, self.client): - response = self.client.put( - path="/api/tasks/{0}/annotations?{1}".format(pk, query_params), - data=data, - format="multipart", - ) - - return response - def _get_formats(self, user): with ForceLogin(user, self.client): response = self.client.get( @@ -6539,18 +6491,9 @@ def _get_initial_annotation(annotation_format): if not import_format: continue - uploaded_data = { - "annotation_file": content, - } - response = self._upload_api_v2_tasks_id_annotations( - task["id"], owner, uploaded_data, - "format={}".format(import_format)) - self.assertEqual(response.status_code, HTTP_202_ACCEPTED) - - response = self._upload_api_v2_tasks_id_annotations( - task["id"], owner, {}, - "format={}".format(import_format)) - self.assertEqual(response.status_code, HTTP_201_CREATED) + self._import_task_annotations( + owner, task["id"], content, query_params={"format": import_format} + ) # 7. check annotation if export_format in {"Segmentation mask 1.1", "MOTS PNG 1.0", @@ -6667,18 +6610,9 @@ def generate_coco_anno(): content = io.BytesIO(generate_coco_anno()) content.seek(0) - format_name = "COCO 1.0" - uploaded_data = { - "annotation_file": content, - } - response = self._upload_api_v2_tasks_id_annotations( - task["id"], user, uploaded_data, - "format={}".format(format_name)) - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - - response = self._upload_api_v2_tasks_id_annotations( - task["id"], user, {}, "format={}".format(format_name)) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) + self._import_task_annotations( + user, task["id"], content, query_params={"format": "COCO 1.0"} + ) response = self._get_api_v2_tasks_id_annotations(task["id"], user) self.assertEqual(response.status_code, status.HTTP_200_OK) diff --git a/cvat/apps/engine/tests/test_rest_api_3D.py b/cvat/apps/engine/tests/test_rest_api_3D.py index 87333293dce9..fa6b2a66c16b 100644 --- a/cvat/apps/engine/tests/test_rest_api_3D.py +++ b/cvat/apps/engine/tests/test_rest_api_3D.py @@ -21,14 +21,19 @@ from cvat.apps.dataset_manager.task import TaskAnnotation from cvat.apps.dataset_manager.tests.utils import TestDir from cvat.apps.engine.media_extractors import ValidateDimension -from cvat.apps.engine.tests.utils import ExportApiTestBase, ForceLogin, get_paginated_collection +from cvat.apps.engine.tests.utils import ( + ExportApiTestBase, + ForceLogin, + ImportApiTestBase, + get_paginated_collection, +) CREATE_ACTION = "create" UPDATE_ACTION = "update" DELETE_ACTION = "delete" -class _DbTestBase(ExportApiTestBase): +class _DbTestBase(ExportApiTestBase, ImportApiTestBase): @classmethod def setUpTestData(cls): cls.create_db_users() @@ -136,18 +141,6 @@ def _get_jobs(self, task_id): ) return values - def _upload_file(self, url, data, user): - response = self._put_request(url, user, data={"annotation_file": data}) - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - response = self._put_request(url, user) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - def _generate_url_upload_tasks_annotations(self, task_id, upload_format_name): - return f"/api/tasks/{task_id}/annotations?format={upload_format_name}" - - def _generate_url_upload_job_annotations(self, job_id, upload_format_name): - return f"/api/jobs/{job_id}/annotations?format={upload_format_name}" - def _remove_annotations(self, tid): response = self._delete_request(f"/api/tasks/{tid}/annotations", self.admin) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) @@ -499,10 +492,11 @@ def test_api_v2_dump_and_upload_annotation(self): self._remove_annotations(task_id) with self.subTest(format=f"{format_name}_upload"): file_name = osp.join(test_dir, f"{format_name}_admin.zip") - url = self._generate_url_upload_tasks_annotations(task_id, format_name) with open(file_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task["id"], binary_file, query_params={"format": format_name} + ) task_ann = TaskAnnotation(task_id) task_ann.init_from_db() @@ -538,10 +532,10 @@ def test_api_v2_rewrite_annotation(self): self.assertEqual(response.status_code, status.HTTP_200_OK) file_name = osp.join(test_dir, f"{format_name}.zip") - url = self._generate_url_upload_tasks_annotations(task_id, format_name) - with open(file_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task["id"], binary_file, query_params={"format": format_name} + ) task_ann = TaskAnnotation(task_id) task_ann.init_from_db() @@ -568,10 +562,11 @@ def test_api_v2_dump_and_upload_empty_annotation(self): self.assertTrue(osp.exists(file_name)) file_name = osp.join(test_dir, f"{format_name}.zip") - url = self._generate_url_upload_tasks_annotations(task_id, format_name) with open(file_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, query_params={"format": format_name} + ) task_ann = TaskAnnotation(task_id) task_ann.init_from_db() @@ -631,10 +626,11 @@ def test_api_v2_upload_annotation_with_attributes(self): self._remove_annotations(task_id) with self.subTest(format=f"{format_name}_upload"): file_name = osp.join(test_dir, f"{format_name}.zip") - url = self._generate_url_upload_tasks_annotations(task_id, format_name) with open(file_name, 'rb') as binary_file: - self._upload_file(url, binary_file, self.admin) + self._import_task_annotations( + self.admin, task_id, binary_file, query_params={"format": format_name} + ) task_ann = TaskAnnotation(task_id) task_ann.init_from_db() diff --git a/cvat/apps/engine/tests/utils.py b/cvat/apps/engine/tests/utils.py index ba3b3e1f495c..e3400ef0ae4f 100644 --- a/cvat/apps/engine/tests/utils.py +++ b/cvat/apps/engine/tests/utils.py @@ -138,9 +138,9 @@ def _delete_request(self, path: str, user: str): response = self.client.delete(path) return response - def _post_request(self, path: str, user: str, *, data: dict[str, Any] | None = None): + def _post_request(self, path: str, user: str, *, data: dict[str, Any] | None = None, **kwargs): with ForceLogin(user, self.client): - response = self.client.post(path, data=data) + response = self.client.post(path, data=data, **kwargs) return response def _put_request(self, url: str, user: str, *, data: dict[str, Any] | None = None): @@ -167,6 +167,85 @@ def _check_request_status( def _query_params_to_str(self, **params: dict[str, Any]) -> str: return "?" + "&".join([f"{k}={quote(str(v))}"for k, v in params.items()]) +class ImportApiTestBase(ApiTestBase): + def _import( + self, + user: str, + api_path: str, + file_content: BytesIO, + *, + query_params: dict[str, Any] | None = None, + expected_4xx_status_code: int | None = None, + ): + if query_params: + api_path += self._query_params_to_str(**query_params) + + response = self._post_request( + api_path, user, + data={"file": file_content}, + format="multipart", + ) + self.assertEqual(response.status_code, expected_4xx_status_code or status.HTTP_202_ACCEPTED) + + if not expected_4xx_status_code: + rq_id = response.json().get("rq_id") + assert rq_id, "The rq_id param was not found in the server response" + response = self._check_request_status(user, rq_id) + + return response + + def _import_project_dataset( + self, user: str, projetc_id: int, file_content: BytesIO, query_params: str = None, + expected_4xx_status_code: int | None = None + ): + return self._import( + user, f"/api/projects/{projetc_id}/dataset", file_content, + query_params=query_params, expected_4xx_status_code=expected_4xx_status_code + ) + + def _import_task_annotations( + self, user: str, task_id: int, file_content: BytesIO, query_params: str = None, + expected_4xx_status_code: int | None = None + ): + return self._import( + user, f"/api/tasks/{task_id}/annotations", file_content, + query_params=query_params, expected_4xx_status_code=expected_4xx_status_code + ) + + def _import_job_annotations( + self, user: str, job_id: int, file_content: BytesIO, query_params: str = None, + expected_4xx_status_code: int | None = None + ): + return self._import( + user, f"/api/jobs/{job_id}/annotations", file_content, + query_params=query_params, expected_4xx_status_code=expected_4xx_status_code + ) + + def _import_project_backup( + self, user: str, file_content: BytesIO, query_params: str = None, + expected_4xx_status_code: int | None = None + ) -> int | None: + response = self._import( + user, f"/api/projects/backup", file_content, + query_params=query_params, expected_4xx_status_code=expected_4xx_status_code + ) + if expected_4xx_status_code: + return None + + return response.json()["result_id"] + + def _import_task_backup( + self, user: str, file_content: BytesIO, query_params: str = None, + expected_4xx_status_code: int | None = None + ) -> int | None: + response = self._import( + user, f"/api/tasks/backup", file_content, + query_params=query_params, expected_4xx_status_code=expected_4xx_status_code + ) + if expected_4xx_status_code: + return None + + return response.json()["result_id"] class ExportApiTestBase(ApiTestBase): def _export( From d97a677b4288b97df8b1e5c73719ef1db10c5065 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 13:30:28 +0100 Subject: [PATCH 023/103] [events] Add deprecation response header --- cvat/apps/engine/background.py | 6 ++++-- cvat/apps/engine/serializers.py | 1 + cvat/apps/events/export.py | 17 ++++++++++------- cvat/apps/redis_handler/background.py | 3 +++ tests/python/rest_api/test_projects.py | 14 +++++++------- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 1cbcdd763cd7..9731e35fe568 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -280,6 +280,8 @@ def location(self) -> Location: def to_dict(self): return dataclass_asdict(self) + import_args: ImportArgs | None = attrs.field(init=False) + def init_request_args(self): filename = self.request.query_params.get("filename") file_path = (self.tmp_dir / filename) if filename else None @@ -293,7 +295,7 @@ def init_request_args(self): except ValueError as ex: raise ValidationError(str(ex)) from ex - self.import_args = self.ImportArgs( + self.import_args = ResourceImporter.ImportArgs( location_config=location_config, file_path=file_path, ) @@ -332,7 +334,7 @@ def _handle_cloud_storage_file_upload(self): def _handle_non_tus_file_upload(self): file_serializer = self.upload_serializer_class(data=self.request.data) file_serializer.is_valid(raise_exception=True) - payload_file = file_serializer.validated_data[file_serializer.file.field_name] + payload_file = file_serializer.validated_data["file"] with NamedTemporaryFile(prefix="cvat_", dir=TmpDirManager.TMP_ROOT, delete=False) as tf: self.import_args.file_path = tf.name diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 8559e8f00159..8218bc955bb9 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -2939,6 +2939,7 @@ class UploadedFileSerializer(serializers.Serializer): file = serializers.FileField() class UploadedZipFileSerializer(UploadedFileSerializer): + # probably there is no need in such validation def validate_file(self, value): if os.path.splitext(value.name)[1] != '.zip': raise serializers.ValidationError('A file should be zip archive') diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 8239dddbfc18..f822d23fc3d4 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -5,7 +5,6 @@ import csv import os import uuid -from contextlib import suppress from datetime import datetime, timedelta from pathlib import Path @@ -15,7 +14,6 @@ from django.conf import settings from django.utils import timezone from rest_framework import serializers, status -from rest_framework.exceptions import MethodNotAllowed from rest_framework.response import Response from rest_framework.reverse import reverse from rq import get_current_job @@ -191,6 +189,8 @@ def export(request: ExtendedRequest): response_data = { "query_id": manager.query_id, } + deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) + response_headers = {"Deprecation": f"@{deprecation_timestamp}"} rq_job = queue.fetch_job(request_id) @@ -205,18 +205,21 @@ def export(request: ExtendedRequest): return sendfile(request, file_path, attachment=True, attachment_filename=filename) else: if os.path.exists(file_path): - return Response(status=status.HTTP_201_CREATED) + return Response(status=status.HTTP_201_CREATED, headers=response_headers) elif rq_job.is_failed: rq_job_meta = RQMetaWithFailureInfo.for_job(rq_job) exc_info = rq_job_meta.formatted_exception or str(rq_job.exc_info) rq_job.delete() - return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + return Response( + exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR, headers=response_headers + ) else: - return Response(data=response_data, status=status.HTTP_202_ACCEPTED) + return Response( + data=response_data, status=status.HTTP_202_ACCEPTED, headers=response_headers + ) manager.init_request_args() - with suppress(MethodNotAllowed): - manager.validate_request() + # request validation is missed here since exporting to a cloud_storage is disabled manager.init_callback_with_params() manager.setup_new_job(queue, request_id) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index c86d432149b9..6dfcf3653eb1 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -201,6 +201,7 @@ def process(self) -> Response: return self.get_response(request_id) +@attrs.define(kw_only=True) class AbstractExporter(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value @@ -226,6 +227,8 @@ def location(self) -> Location: def to_dict(self): return dataclass_asdict(self) + export_args: ExportArgs | None = attrs.field(init=False) + @abstractmethod def get_result_filename(self) -> str: ... diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index d049f46a8ef1..7245fee8e449 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -390,7 +390,7 @@ def test_admin_can_get_project_backup_and_create_project_by_backup(self, admin_u tmp_file.name = "dataset.zip" import_data = { - "project_file": tmp_file, + "file": tmp_file, } with make_api_client(admin_user) as api_client: @@ -671,7 +671,7 @@ def test_can_import_dataset_in_org(self, admin_user: str): tmp_file.name = "dataset.zip" import_data = { - "dataset_file": tmp_file, + "file": tmp_file, } self._test_import_project(admin_user, project_id, "CVAT 1.1", import_data) @@ -713,7 +713,7 @@ def test_can_export_and_import_dataset_with_skeletons( tmp_file = io.BytesIO(dataset) tmp_file.name = "dataset.zip" import_data = { - "dataset_file": tmp_file, + "file": tmp_file, } self._test_import_project(admin_user, project_id, import_format, import_data) @@ -736,7 +736,7 @@ def test_can_import_export_dataset_with_some_format(self, format_name: str): tmp_file.name = "dataset.zip" import_data = { - "dataset_file": tmp_file, + "file": tmp_file, } self._test_import_project(username, project_id, format_name, import_data) @@ -802,7 +802,7 @@ def test_can_import_export_annotations_with_rotation(self): tmp_file.name = "dataset.zip" import_data = { - "dataset_file": tmp_file, + "file": tmp_file, } self._test_import_project(username, project_id, "CVAT 1.1", import_data) @@ -916,7 +916,7 @@ def test_can_export_and_import_dataset_after_deleting_related_storage( with io.BytesIO(dataset) as tmp_file: tmp_file.name = "dataset.zip" import_data = { - "dataset_file": tmp_file, + "file": tmp_file, } self._test_import_project(admin_user, project_id, "CVAT 1.1", import_data) @@ -972,7 +972,7 @@ def _export_task(task_id: int, format_name: str) -> io.BytesIO: ) ) - import_data = {"dataset_file": dataset_file} + import_data = {"file": dataset_file} with pytest.raises(exceptions.ApiException, match="Dataset file should be zip archive"): self._test_import_project( From be5348bbeae193491d124b6d4d6063da89953b86 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 13:31:29 +0100 Subject: [PATCH 024/103] Update some rest api tests --- tests/python/rest_api/test_requests.py | 23 +++++++++-------------- tests/python/rest_api/utils.py | 21 ++++++++++++++------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index efadb141b8b5..7476b6c69ef2 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -109,9 +109,7 @@ def _make_requests(project_ids: list[int], task_ids: list[int], job_ids: list[in if resource_type == "task" and subresource == "backup": import_task_backup( self.user, - data={ - "task_file": tmp_file, - }, + file_content=tmp_file, ) empty_file = io.BytesIO(b"empty_file") @@ -120,9 +118,7 @@ def _make_requests(project_ids: list[int], task_ids: list[int], job_ids: list[in # import corrupted backup import_task_backup( self.user, - data={ - "task_file": empty_file, - }, + file_content=empty_file, ) return _make_requests @@ -273,16 +269,16 @@ def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, p owner = project["owner"] subresource = "dataset" if save_images else "annotations" - export_project_dataset( + request_id = export_project_dataset( owner["username"], save_images=save_images, id=project["id"], download_result=False, + return_request_id=True, ) - rq_id = f'export:project-{project["id"]}-{subresource}-in-{format_name.replace(" ", "_").replace(".", "@")}-format-by-{owner["id"]}' with make_api_client(owner["username"]) as owner_client: - bg_request = self._test_get_request_200(owner_client, rq_id) + bg_request = self._test_get_request_200(owner_client, request_id) assert ( bg_request.created_date @@ -292,7 +288,7 @@ def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, p ) assert bg_request.operation.format == format_name assert bg_request.operation.project_id == project["id"] - assert bg_request.operation.target.value == "project" + assert bg_request.operation.target == "project" assert bg_request.operation.task_id is None assert bg_request.operation.job_id is None assert bg_request.operation.type == f"export:{subresource}" @@ -314,13 +310,12 @@ def test_non_owner_cannot_retrieve_request(self, find_users, projects, format_na owner = project["owner"] malefactor = find_users(exclude_username=owner["username"])[0] - export_project_dataset( + request_id = export_project_dataset( owner["username"], save_images=True, id=project["id"], download_result=False, + return_request_id=True, ) - rq_id = f'export:project-{project["id"]}-dataset-in-{format_name.replace(" ", "_").replace(".", "@")}-format-by-{owner["id"]}' - with make_api_client(malefactor["username"]) as malefactor_client: - self._test_get_request_403(malefactor_client, rq_id) + self._test_get_request_403(malefactor_client, request_id) diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 5335e3535a5d..4ba6aed54c5a 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -9,6 +9,7 @@ from http import HTTPStatus from time import sleep from typing import Any, Callable, Iterable, Optional, TypeVar, Union +from io import BytesIO import requests from cvat_sdk.api_client import apis, models @@ -81,7 +82,7 @@ def wait_and_download_v2( return None # return downloaded file in case of local downloading or None otherwise - if background_request.result_url: + if download_result and background_request.result_url: response = requests.get( background_request.result_url, auth=(api_client.configuration.username, api_client.configuration.password), @@ -101,8 +102,9 @@ def export_v2( expect_forbidden: bool = False, wait_result: bool = True, download_result: bool = True, + return_request_id: bool = False, **kwargs, -) -> Optional[bytes]: +) -> bytes | str | None: """Export datasets|annotations|backups using the second version of export API Args: @@ -115,6 +117,7 @@ def export_v2( Returns: bytes: The content of the file if downloaded locally. None: If `wait_result` or `download_result` were False or the file is downloaded to cloud storage. + str: If `download_result` was False and `return_request_id` was True. """ # initialize background process and ensure that the first request returns 403 code if request should be forbidden rq_id = initialize_export(endpoint, expect_forbidden=expect_forbidden, **kwargs) @@ -123,13 +126,17 @@ def export_v2( return None # check status of background process - return wait_and_download_v2( + result = wait_and_download_v2( endpoint.api_client, rq_id, max_retries=max_retries, interval=interval, download_result=download_result, ) + if not download_result and return_request_id: + return rq_id + + return result def export_dataset( @@ -246,14 +253,14 @@ def import_backup( return import_resource(endpoint, max_retries=max_retries, interval=interval, **kwargs) -def import_project_backup(username: str, data: dict, **kwargs) -> None: +def import_project_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: - return import_backup(api_client.projects_api, project_file_request=deepcopy(data), **kwargs) + return import_backup(api_client.projects_api, uploaded_zip_file_request={"file": file_content}, **kwargs) -def import_task_backup(username: str, data: dict, **kwargs) -> None: +def import_task_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: - return import_backup(api_client.tasks_api, task_file_request=deepcopy(data), **kwargs) + return import_backup(api_client.tasks_api, uploaded_zip_file_request={"file": file_content}, **kwargs) FieldPath = Sequence[Union[str, Callable]] From 5ccee8f19575546148d76e5e749c74d5e1f3d875 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 13:45:16 +0100 Subject: [PATCH 025/103] isort --- cvat/apps/engine/background.py | 15 +++------------ cvat/apps/engine/backup.py | 6 +----- cvat/apps/engine/mixins.py | 7 +------ cvat/apps/engine/views.py | 18 +++--------------- cvat/apps/redis_handler/background.py | 6 +----- cvat/apps/redis_handler/serializers.py | 4 +--- cvat/apps/redis_handler/views.py | 6 +----- 7 files changed, 11 insertions(+), 51 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 9731e35fe568..d968b1906318 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -27,9 +27,7 @@ import_project, import_task, ) -from cvat.apps.engine.cloud_provider import ( - import_resource_from_cloud_storage, -) +from cvat.apps.engine.cloud_provider import import_resource_from_cloud_storage from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( @@ -43,11 +41,7 @@ Task, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq import ( - ExportRequestId, - ImportRequestId, - ImportRQMeta, -) +from cvat.apps.engine.rq import ExportRequestId, ImportRequestId, ImportRQMeta from cvat.apps.engine.serializers import UploadedFileSerializer, UploadedZipFileSerializer from cvat.apps.engine.task import create_thread as create_task from cvat.apps.engine.utils import ( @@ -56,10 +50,7 @@ is_dataset_export, ) from cvat.apps.events.handlers import handle_dataset_export, handle_dataset_import -from cvat.apps.redis_handler.background import ( - AbstractExporter, - AbstractRequestManager, -) +from cvat.apps.redis_handler.background import AbstractExporter, AbstractRequestManager from cvat.apps.redis_handler.rq import RequestId slogger = ServerLogManager(__name__) diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 989d620265d9..1746c18ae5f7 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -45,11 +45,7 @@ from cvat.apps.engine import models from cvat.apps.engine.cloud_provider import db_storage_to_storage_instance from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import ( - DataChoice, - StorageChoice, - StorageMethodChoice, -) +from cvat.apps.engine.models import DataChoice, StorageChoice, StorageMethodChoice from cvat.apps.engine.serializers import ( AnnotationGuideWriteSerializer, AssetWriteSerializer, diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 94f2b0bd99ed..55d90a8a55b2 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -26,12 +26,7 @@ from cvat.apps.engine.background import BackupExporter, DatasetExporter from cvat.apps.engine.handlers import clear_import_cache from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import ( - Location, - RequestAction, - RequestSubresource, - RequestTarget, -) +from cvat.apps.engine.models import Location, RequestAction, RequestSubresource, RequestTarget from cvat.apps.engine.rq import RequestId from cvat.apps.engine.serializers import DataSerializer from cvat.apps.engine.types import ExtendedRequest diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index ce2fa62e2dbd..a413d25d272d 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -66,12 +66,7 @@ from cvat.apps.engine.media_extractors import get_mime from cvat.apps.engine.mixins import BackupMixin, DatasetMixin, PartialUpdateModelMixin, UploadMixin from cvat.apps.engine.model_utils import bulk_create -from cvat.apps.engine.models import ( - AnnotationGuide, - Asset, - ClientFile, - CloudProviderChoice, -) +from cvat.apps.engine.models import AnnotationGuide, Asset, ClientFile, CloudProviderChoice from cvat.apps.engine.models import CloudStorage as CloudStorageModel from cvat.apps.engine.models import ( Comment, @@ -100,11 +95,7 @@ UserPermission, get_iam_context, ) -from cvat.apps.engine.rq import ( - ImportRQMeta, - RequestId, - RQMetaWithFailureInfo, -) +from cvat.apps.engine.rq import ImportRQMeta, RequestId, RQMetaWithFailureInfo from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationGuideReadSerializer, @@ -143,10 +134,7 @@ UserSerializer, ) from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import ( - parse_exception_message, - sendfile, -) +from cvat.apps.engine.utils import parse_exception_message, sendfile from cvat.apps.engine.view_utils import tus_chunk_action from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.iam.permissions import IsAuthenticatedOrReadPublicResource, PolicyEnforcer diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 6dfcf3653eb1..45a9416ab4fc 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -34,11 +34,7 @@ from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, define_dependent_job from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.engine.utils import ( - get_rq_lock_by_user, - get_rq_lock_for_job, - sendfile, -) +from cvat.apps.engine.utils import get_rq_lock_by_user, get_rq_lock_for_job, sendfile from cvat.apps.redis_handler.serializers import RequestIdSerializer slogger = ServerLogManager(__name__) diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index d928cc23bdab..01cd47677795 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -21,9 +21,7 @@ from cvat.apps.engine.models import RequestAction, RequestSubresource from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta from cvat.apps.engine.serializers import BasicUserSerializer -from cvat.apps.engine.utils import ( - parse_exception_message, -) +from cvat.apps.engine.utils import parse_exception_message from cvat.apps.lambda_manager.rq import LambdaRQMeta from cvat.apps.redis_handler.rq import RequestId diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 238a5bed90e6..4ab45abc3570 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -24,9 +24,7 @@ NonModelSimpleFilter, ) from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import ( # todo: move to the app - RequestStatus, -) +from cvat.apps.engine.models import RequestStatus # todo: move to the app from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest from cvat.apps.redis_handler.rq import RequestId @@ -141,7 +139,6 @@ def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: ParsedIdClass = self.get_parsed_id_class(queue.name) for job in queue.job_class.fetch_many(job_ids, queue.connection): - # TODO: move filtration by owner? if job and is_rq_job_owner(job, user_id): try: parsed_rq_id = ParsedIdClass.parse(job.id) @@ -165,7 +162,6 @@ def _get_rq_jobs(self, user_id: int) -> list[RQJob]: List[RQJob]: A list of RQJob objects representing all jobs for the specified user. """ all_jobs = [] - # TODO: optimize filtration here for queue in self.queues: jobs = self._get_rq_jobs_from_queue(queue, user_id) all_jobs.extend(jobs) From 03f445e2d17e6fd1cfbbfcb829c67dc1aa8587f8 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 13:45:45 +0100 Subject: [PATCH 026/103] Resolve linter warnings --- cvat/apps/engine/background.py | 2 +- cvat/apps/engine/tests/test_rest_api.py | 4 --- cvat/apps/redis_handler/rq.py | 45 +++++++++++++------------ cvat/apps/redis_handler/views.py | 2 +- 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index d968b1906318..283edb80ddd5 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -41,7 +41,7 @@ Task, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq import ExportRequestId, ImportRequestId, ImportRQMeta +from cvat.apps.engine.rq import ExportRequestId, ImportRequestId from cvat.apps.engine.serializers import UploadedFileSerializer, UploadedZipFileSerializer from cvat.apps.engine.task import create_thread as create_task from cvat.apps.engine.utils import ( diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index e0224b2fa9a3..01d49dc7999c 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -5895,13 +5895,9 @@ def _run_api_v2_tasks_id_annotations_dump_load(self, owner): if owner: HTTP_200_OK = status.HTTP_200_OK HTTP_204_NO_CONTENT = status.HTTP_204_NO_CONTENT - HTTP_202_ACCEPTED = status.HTTP_202_ACCEPTED - HTTP_201_CREATED = status.HTTP_201_CREATED else: HTTP_200_OK = status.HTTP_401_UNAUTHORIZED HTTP_204_NO_CONTENT = status.HTTP_401_UNAUTHORIZED - HTTP_202_ACCEPTED = status.HTTP_401_UNAUTHORIZED - HTTP_201_CREATED = status.HTTP_401_UNAUTHORIZED def _get_initial_annotation(annotation_format): if annotation_format not in ["Market-1501 1.0", "ICDAR Recognition 1.0", diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index d705477fa807..d0ec05d6c19c 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -29,6 +29,10 @@ def convert_extra(value: dict) -> dict[str, Any]: return value +class IncorrectRequestIdError(ValueError): + pass + + @attrs.frozen(kw_only=True) class RequestId: FIELD_SEP: ClassVar[str] = "&" @@ -42,16 +46,12 @@ class RequestId: validator=attrs.validators.instance_of((int, UUID)), converter=convert_id, ) - - # todo: dot access extra: dict[str, Any] = attrs.field(converter=convert_extra, factory=dict) @property def type(self) -> str: return self.TYPE_SEP.join([self.action, self.target]) - # @classmethod - # def from_base(cls, parsed_id: RequestId, /): def convert_to(self, child_class: type[RequestId], /): # method is going to be used by child classes return child_class( @@ -63,8 +63,7 @@ def convert_to(self, child_class: type[RequestId], /): ) def render(self) -> str: - # TODO: add queue name indirectly - bytes = self.FIELD_SEP.join( + data = self.FIELD_SEP.join( [ self.KEY_VAL_SEP.join([k, v]) for k, v in { @@ -77,21 +76,23 @@ def render(self) -> str: ] ).encode() - return base64.b64encode(bytes).decode() + return base64.b64encode(data).decode() - # TODO: handle exceptions @classmethod - def parse(cls, rq_id: str, /): - decoded_rq_id = base64.b64decode(rq_id).decode() - - keys = set(attrs.fields_dict(cls).keys()) - {"extra"} - params = {} - - for pair in decoded_rq_id.split(RequestId.FIELD_SEP): - key, value = pair.split(RequestId.KEY_VAL_SEP, maxsplit=1) - if key in keys: - params[key] = value - else: - params.setdefault("extra", {})[key] = value - - return cls(**params) + def parse(cls, request_id: str, /): + try: + decoded_rq_id = base64.b64decode(request_id).decode() + + keys = set(attrs.fields_dict(cls).keys()) - {"extra"} + params = {} + + for pair in decoded_rq_id.split(RequestId.FIELD_SEP): + key, value = pair.split(RequestId.KEY_VAL_SEP, maxsplit=1) + if key in keys: + params[key] = value + else: + params.setdefault("extra", {})[key] = value + + return cls(**params) + except Exception as ex: + raise IncorrectRequestIdError from ex diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 4ab45abc3570..f8686539909a 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -193,7 +193,7 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: job = queue.fetch_job(rq_id) if job: ParsedIdClass = self.get_parsed_id_class(queue.name) - if type(parsed_rq_id) is not ParsedIdClass: + if type(parsed_rq_id) is not ParsedIdClass: # pylint: disable=unidiomatic-typecheck parsed_rq_id = parsed_rq_id.convert_to(ParsedIdClass) job.parsed_rq_id = parsed_rq_id From 9ded53c89200af2c1ef8309f6fd7b9dcc8d315e6 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 13:49:32 +0100 Subject: [PATCH 027/103] black tests --- tests/python/rest_api/utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 4ba6aed54c5a..75ffd066290e 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -7,9 +7,9 @@ from collections.abc import Hashable, Iterator, Sequence from copy import deepcopy from http import HTTPStatus +from io import BytesIO from time import sleep from typing import Any, Callable, Iterable, Optional, TypeVar, Union -from io import BytesIO import requests from cvat_sdk.api_client import apis, models @@ -255,12 +255,16 @@ def import_backup( def import_project_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: - return import_backup(api_client.projects_api, uploaded_zip_file_request={"file": file_content}, **kwargs) + return import_backup( + api_client.projects_api, uploaded_zip_file_request={"file": file_content}, **kwargs + ) def import_task_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: - return import_backup(api_client.tasks_api, uploaded_zip_file_request={"file": file_content}, **kwargs) + return import_backup( + api_client.tasks_api, uploaded_zip_file_request={"file": file_content}, **kwargs + ) FieldPath = Sequence[Union[str, Callable]] From 0440ddc6e5887b139353d66a0e1b3440b742e0eb Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 14:15:31 +0100 Subject: [PATCH 028/103] fix type in tests --- tests/python/rest_api/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 75ffd066290e..e9ccd988afc3 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -104,7 +104,7 @@ def export_v2( download_result: bool = True, return_request_id: bool = False, **kwargs, -) -> bytes | str | None: +) -> Optional[Union[bytes, str]]: """Export datasets|annotations|backups using the second version of export API Args: From 3415c543ccd1d1de34a75016bb62569619f987b6 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 27 Mar 2025 23:40:20 +0100 Subject: [PATCH 029/103] API fixes --- cvat-sdk/cvat_sdk/auto_annotation/driver.py | 2 +- cvat-sdk/cvat_sdk/core/proxies/annotations.py | 4 +- cvat-sdk/cvat_sdk/core/proxies/jobs.py | 1 - cvat-sdk/cvat_sdk/core/proxies/tasks.py | 1 - cvat/apps/engine/background.py | 2 +- cvat/apps/engine/serializers.py | 17 +++++-- cvat/schema.yml | 20 +++----- tests/python/rest_api/test_consensus.py | 4 +- tests/python/rest_api/test_projects.py | 48 ++++--------------- tests/python/rest_api/test_quality_control.py | 8 ++-- tests/python/rest_api/utils.py | 4 +- 11 files changed, 41 insertions(+), 70 deletions(-) diff --git a/cvat-sdk/cvat_sdk/auto_annotation/driver.py b/cvat-sdk/cvat_sdk/auto_annotation/driver.py index d4928b2c4781..8af3df28a3ec 100644 --- a/cvat-sdk/cvat_sdk/auto_annotation/driver.py +++ b/cvat-sdk/cvat_sdk/auto_annotation/driver.py @@ -574,7 +574,7 @@ def annotate_task( if clear_existing: client.tasks.api.update_annotations( - task_id, task_annotations_update_request=models.LabeledDataRequest(shapes=shapes) + task_id, labeled_data_request=models.LabeledDataRequest(shapes=shapes) ) else: client.tasks.api.partial_update_annotations( diff --git a/cvat-sdk/cvat_sdk/core/proxies/annotations.py b/cvat-sdk/cvat_sdk/core/proxies/annotations.py index 6ec7434b2c99..03d03b5f0fea 100644 --- a/cvat-sdk/cvat_sdk/core/proxies/annotations.py +++ b/cvat-sdk/cvat_sdk/core/proxies/annotations.py @@ -20,8 +20,6 @@ class AnnotationUpdateAction(Enum): class AnnotationCrudMixin(ABC): # TODO: refactor - @property - def _put_annotations_data_param(self) -> str: ... def get_annotations(self: _EntityT) -> models.ILabeledData: (annotations, _) = self.api.retrieve_annotations(getattr(self, self._model_id_field)) @@ -29,7 +27,7 @@ def get_annotations(self: _EntityT) -> models.ILabeledData: def set_annotations(self: _EntityT, data: models.ILabeledDataRequest): self.api.update_annotations( - getattr(self, self._model_id_field), **{self._put_annotations_data_param: data} + getattr(self, self._model_id_field), labeled_data_request=data ) def update_annotations( diff --git a/cvat-sdk/cvat_sdk/core/proxies/jobs.py b/cvat-sdk/cvat_sdk/core/proxies/jobs.py index fbf02c168b24..ad21a41de60c 100644 --- a/cvat-sdk/cvat_sdk/core/proxies/jobs.py +++ b/cvat-sdk/cvat_sdk/core/proxies/jobs.py @@ -42,7 +42,6 @@ class Job( ExportDatasetMixin, ): _model_partial_update_arg = "patched_job_write_request" - _put_annotations_data_param = "job_annotations_update_request" def import_annotations( self, diff --git a/cvat-sdk/cvat_sdk/core/proxies/tasks.py b/cvat-sdk/cvat_sdk/core/proxies/tasks.py index 7502612342e6..4714910787a4 100644 --- a/cvat-sdk/cvat_sdk/core/proxies/tasks.py +++ b/cvat-sdk/cvat_sdk/core/proxies/tasks.py @@ -67,7 +67,6 @@ class Task( DownloadBackupMixin, ): _model_partial_update_arg = "patched_task_write_request" - _put_annotations_data_param = "task_annotations_update_request" def upload_data( self, diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 283edb80ddd5..328096bbb3f0 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -275,7 +275,7 @@ def to_dict(self): def init_request_args(self): filename = self.request.query_params.get("filename") - file_path = (self.tmp_dir / filename) if filename else None + file_path = str(self.tmp_dir / filename) if filename else None try: location_config = get_location_configuration( diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 8218bc955bb9..172421b3fdc9 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -2938,13 +2938,24 @@ class FileInfoSerializer(serializers.Serializer): class UploadedFileSerializer(serializers.Serializer): file = serializers.FileField() -class UploadedZipFileSerializer(UploadedFileSerializer): + def __init__(self, *args, only_zip: bool = False, **kwargs): + super().__init__(*args, **kwargs) + self._only_zip = only_zip + # probably there is no need in such validation def validate_file(self, value): - if os.path.splitext(value.name)[1] != '.zip': - raise serializers.ValidationError('A file should be zip archive') + if self._only_zip and os.path.splitext(value.name)[1] != '.zip': + raise serializers.ValidationError('A file should be a zip archive') return value + +@extend_schema_serializer( + component_name="UploadedFile", +) +class UploadedZipFileSerializer(UploadedFileSerializer): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, only_zip=True) + class CommentReadSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(allow_null=True, required=False) diff --git a/cvat/schema.yml b/cvat/schema.yml index 5b8c4ca9717b..7681c96d920c 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -3801,10 +3801,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedZipFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedZipFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -3970,10 +3970,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedZipFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedZipFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -5797,10 +5797,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedZipFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedZipFileRequest' + $ref: '#/components/schemas/UploadedFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -10681,14 +10681,6 @@ components: format: binary required: - file - UploadedZipFileRequest: - type: object - properties: - file: - type: string - format: binary - required: - - file User: type: object properties: diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index 33fa04dd3f7c..e0e02260ec30 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -741,11 +741,11 @@ def test_quorum_is_applied(self, admin_user, jobs, labels, consensus_settings, t api_client.jobs_api.update_annotations( replicas[0]["id"], - job_annotations_update_request=models.JobAnnotationsUpdateRequest(shapes=[bbox1]), + labeled_data_request=models.LabeledDataRequest(shapes=[bbox1]), ) api_client.jobs_api.update_annotations( replicas[1]["id"], - job_annotations_update_request=models.JobAnnotationsUpdateRequest( + labeled_data_request=models.LabeledDataRequest( shapes=[bbox1, bbox2] ), ) diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index 7245fee8e449..95961bd45fea 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -43,6 +43,7 @@ export_dataset, export_project_backup, export_project_dataset, + import_project_backup, ) @@ -389,15 +390,7 @@ def test_admin_can_get_project_backup_and_create_project_by_backup(self, admin_u tmp_file = io.BytesIO(backup) tmp_file.name = "dataset.zip" - import_data = { - "file": tmp_file, - } - - with make_api_client(admin_user) as api_client: - (_, response) = api_client.projects_api.create_backup( - backup_write_request=deepcopy(import_data), _content_type="multipart/form-data" - ) - assert response.status == HTTPStatus.ACCEPTED + import_project_backup(admin_user, tmp_file) @pytest.mark.usefixtures("restore_db_per_function") @@ -630,7 +623,7 @@ def _test_import_project(self, username, project_id, format_name, data): (_, response) = api_client.projects_api.create_dataset( id=project_id, format=format_name, - dataset_write_request=deepcopy(data), + uploaded_file_request={"file": data}, _content_type="multipart/form-data", ) assert response.status == HTTPStatus.ACCEPTED @@ -670,11 +663,7 @@ def test_can_import_dataset_in_org(self, admin_user: str): tmp_file = io.BytesIO(dataset) tmp_file.name = "dataset.zip" - import_data = { - "file": tmp_file, - } - - self._test_import_project(admin_user, project_id, "CVAT 1.1", import_data) + self._test_import_project(admin_user, project_id, "CVAT 1.1", tmp_file) @pytest.mark.parametrize( "export_format, import_format", @@ -712,11 +701,8 @@ def test_can_export_and_import_dataset_with_skeletons( tmp_file = io.BytesIO(dataset) tmp_file.name = "dataset.zip" - import_data = { - "file": tmp_file, - } - self._test_import_project(admin_user, project_id, import_format, import_data) + self._test_import_project(admin_user, project_id, import_format, tmp_file) @pytest.mark.parametrize("format_name", ("Datumaro 1.0", "ImageNet 1.0", "PASCAL VOC 1.1")) def test_can_import_export_dataset_with_some_format(self, format_name: str): @@ -735,11 +721,7 @@ def test_can_import_export_dataset_with_some_format(self, format_name: str): tmp_file = io.BytesIO(dataset) tmp_file.name = "dataset.zip" - import_data = { - "file": tmp_file, - } - - self._test_import_project(username, project_id, format_name, import_data) + self._test_import_project(username, project_id, format_name, tmp_file) @pytest.mark.parametrize("username, pid", [("admin1", 8)]) @pytest.mark.parametrize( @@ -801,11 +783,7 @@ def test_can_import_export_annotations_with_rotation(self): tmp_file = io.BytesIO(dataset) tmp_file.name = "dataset.zip" - import_data = { - "file": tmp_file, - } - - self._test_import_project(username, project_id, "CVAT 1.1", import_data) + self._test_import_project(username, project_id, "CVAT 1.1", tmp_file) response = get_method(username, f"tasks", project_id=project_id) assert response.status_code == HTTPStatus.OK @@ -915,11 +893,7 @@ def test_can_export_and_import_dataset_after_deleting_related_storage( with io.BytesIO(dataset) as tmp_file: tmp_file.name = "dataset.zip" - import_data = { - "file": tmp_file, - } - - self._test_import_project(admin_user, project_id, "CVAT 1.1", import_data) + self._test_import_project(admin_user, project_id, "CVAT 1.1", tmp_file) @pytest.mark.parametrize( "dimension, format_name", @@ -972,14 +946,12 @@ def _export_task(task_id: int, format_name: str) -> io.BytesIO: ) ) - import_data = {"file": dataset_file} - - with pytest.raises(exceptions.ApiException, match="Dataset file should be zip archive"): + with pytest.raises(exceptions.ApiException, match="A file should be a zip archive"): self._test_import_project( admin_user, project.id, format_name=format_name, - data=import_data, + data=dataset_file, ) @pytest.mark.parametrize( diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py index 062d21554c0b..da615c435142 100644 --- a/tests/python/rest_api/test_quality_control.py +++ b/tests/python/rest_api/test_quality_control.py @@ -59,7 +59,7 @@ def create_gt_job(self, user, task_id): (labels, _) = api_client.labels_api.list(task_id=task_id) api_client.jobs_api.update_annotations( job.id, - job_annotations_update_request=dict( + labeled_data_request=dict( shapes=[ dict( frame=start_frame, @@ -1388,7 +1388,7 @@ def test_can_compute_quality_if_non_skeleton_label_follows_skeleton_label( new_label_id = new_label_obj.results[0].id api_client.tasks_api.update_annotations( task_id, - task_annotations_update_request={ + labeled_data_request={ "shapes": [ models.LabeledShapeRequest( type="rectangle", @@ -1599,11 +1599,11 @@ def test_quality_metrics_in_task_with_gt_and_tracks( } api_client.jobs_api.update_annotations( - gt_job.id, job_annotations_update_request=gt_annotations + gt_job.id, labeled_data_request=gt_annotations ) api_client.tasks_api.update_annotations( - task_id, task_annotations_update_request=normal_annotations + task_id, labeled_data_request=normal_annotations ) api_client.jobs_api.partial_update( diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index e9ccd988afc3..4531b09d899a 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -256,14 +256,14 @@ def import_backup( def import_project_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: return import_backup( - api_client.projects_api, uploaded_zip_file_request={"file": file_content}, **kwargs + api_client.projects_api, uploaded_file_request={"file": file_content}, **kwargs ) def import_task_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: return import_backup( - api_client.tasks_api, uploaded_zip_file_request={"file": file_content}, **kwargs + api_client.tasks_api, uploaded_file_request={"file": file_content}, **kwargs ) From f2e5cca791a9db95ec43c6faee8be6399db0026b Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 28 Mar 2025 00:02:49 +0100 Subject: [PATCH 030/103] t --- tests/python/rest_api/test_analytics.py | 3 ++- tests/python/rest_api/test_consensus.py | 4 +--- tests/python/rest_api/test_quality_control.py | 4 +--- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/python/rest_api/test_analytics.py b/tests/python/rest_api/test_analytics.py index efaa85d786c9..97fd3ff7abb7 100644 --- a/tests/python/rest_api/test_analytics.py +++ b/tests/python/rest_api/test_analytics.py @@ -11,6 +11,7 @@ from http import HTTPStatus from io import StringIO from time import sleep +from typing import Optional import pytest from cvat_sdk.api_client import ApiClient @@ -157,7 +158,7 @@ def _export_events( max_retries: int = 20, interval: float = 0.1, **kwargs, - ) -> bytes | None: + ) -> Optional[bytes]: if api_version == 1: endpoint = api_client.events_api.list_endpoint query_id = "" diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index e0e02260ec30..ec86180300fc 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -745,9 +745,7 @@ def test_quorum_is_applied(self, admin_user, jobs, labels, consensus_settings, t ) api_client.jobs_api.update_annotations( replicas[1]["id"], - labeled_data_request=models.LabeledDataRequest( - shapes=[bbox1, bbox2] - ), + labeled_data_request=models.LabeledDataRequest(shapes=[bbox1, bbox2]), ) self.merge(job_id=parent_job["id"], user=admin_user) diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py index da615c435142..001776cf564c 100644 --- a/tests/python/rest_api/test_quality_control.py +++ b/tests/python/rest_api/test_quality_control.py @@ -1598,9 +1598,7 @@ def test_quality_metrics_in_task_with_gt_and_tracks( ] } - api_client.jobs_api.update_annotations( - gt_job.id, labeled_data_request=gt_annotations - ) + api_client.jobs_api.update_annotations(gt_job.id, labeled_data_request=gt_annotations) api_client.tasks_api.update_annotations( task_id, labeled_data_request=normal_annotations From 07b2a0d6cef1f70946bfa6265bf40a49961188e5 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 31 Mar 2025 18:29:21 +0200 Subject: [PATCH 031/103] Fix import from cloud --- cvat/apps/engine/background.py | 86 +++++++++++++++++++----------- cvat/apps/engine/cloud_provider.py | 5 +- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 328096bbb3f0..effab2b70c78 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -2,11 +2,12 @@ # # SPDX-License-Identifier: MIT +from abc import abstractmethod from dataclasses import asdict as dataclass_asdict from dataclasses import dataclass from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Any +from typing import Any, Callable from uuid import uuid4 import attrs @@ -47,6 +48,7 @@ from cvat.apps.engine.utils import ( build_annotations_file_name, build_backup_file_name, + import_resource_with_clean_up_after, is_dataset_export, ) from cvat.apps.events.handlers import handle_dataset_export, handle_dataset_import @@ -274,8 +276,7 @@ def to_dict(self): import_args: ImportArgs | None = attrs.field(init=False) def init_request_args(self): - filename = self.request.query_params.get("filename") - file_path = str(self.tmp_dir / filename) if filename else None + file_path: str | None = None try: location_config = get_location_configuration( @@ -286,6 +287,13 @@ def init_request_args(self): except ValueError as ex: raise ValidationError(str(ex)) from ex + if filename := self.request.query_params.get("filename"): + file_path = ( + str(self.tmp_dir / filename) + if location_config["location"] != Location.CLOUD_STORAGE + else filename + ) + self.import_args = ResourceImporter.ImportArgs( location_config=location_config, file_path=file_path, @@ -332,6 +340,37 @@ def _handle_non_tus_file_upload(self): for chunk in payload_file.chunks(): tf.write(chunk) + @abstractmethod + def _init_callback_with_params(self) -> tuple[Callable, tuple]: ... + + def init_callback_with_params(self): + # Note: self.import_args are changed here + if self.import_args.location == Location.CLOUD_STORAGE: + db_storage, key = self._handle_cloud_storage_file_upload() + elif not self.import_args.file_path: + self._handle_non_tus_file_upload() + + # redefine here callback and callback args in order to: + # - (optional) download file from cloud storage + # - remove uploaded file at the end + self.callback = import_resource_with_clean_up_after + import_func, import_func_args = self._init_callback_with_params() + + if self.import_args.location == Location.LOCAL: + self.callback_args = ( + import_func, + *import_func_args, + ) + else: + self.callback_args = ( + import_resource_from_cloud_storage, + import_func_args[0], + db_storage, + key, + import_func, + *import_func_args[1:], + ) + @attrs.define(kw_only=True) class DatasetImporter(ResourceImporter): @@ -356,36 +395,28 @@ def init_request_args(self) -> None: format_name = self.request.query_params.get("format", "") conv_mask_to_poly = to_bool(self.request.query_params.get("conv_mask_to_poly", True)) - self.import_args = self.ImportArgs( + self.import_args: DatasetImporter.ImportArgs = self.ImportArgs( **self.import_args.to_dict(), format=format_name, conv_mask_to_poly=conv_mask_to_poly, ) - def init_callback_with_params(self): + def _init_callback_with_params(self): if isinstance(self.db_instance, Project): - self.callback = dm.project.import_dataset_as_project + callback = dm.project.import_dataset_as_project elif isinstance(self.db_instance, Task): - self.callback = dm.task.import_task_annotations + callback = dm.task.import_task_annotations else: assert isinstance(self.db_instance, Job) - self.callback = dm.task.import_job_annotations - - if self.import_args.location == Location.CLOUD_STORAGE: - db_storage, key = self._handle_cloud_storage_file_upload() - elif not self.import_args.file_path: - self._handle_non_tus_file_upload() + callback = dm.task.import_job_annotations - self.callback_args = ( + callback_args = ( self.import_args.file_path, self.db_instance.pk, self.import_args.format, self.import_args.conv_mask_to_poly, ) - - if self.import_args.location == Location.CLOUD_STORAGE: - self.callback_args = (db_storage, key, self.callback) + self.callback_args - self.callback = import_resource_from_cloud_storage + return callback, callback_args def validate_request(self): super().validate_request() @@ -441,7 +472,7 @@ def __attrs_post_init__(self) -> None: def init_request_args(self) -> None: super().init_request_args() - self.import_args = self.ImportArgs( + self.import_args: BackupImporter.ImportArgs = self.ImportArgs( **self.import_args.to_dict(), org_id=getattr(self.request.iam_context["organization"], "id", None), ) @@ -457,19 +488,10 @@ def build_request_id(self): }, ).render() - def init_callback_with_params(self): - self.callback = import_project if self.resource == RequestTarget.PROJECT else import_task - - if self.import_args.location == Location.CLOUD_STORAGE: - db_storage, key = self._handle_cloud_storage_file_upload() - elif not self.import_args.file_path: - self._handle_non_tus_file_upload() - - self.callback_args = (self.import_args.file_path, self.user_id, self.import_args.org_id) - - if self.import_args.location == Location.CLOUD_STORAGE: - self.callback_args = (db_storage, key, self.callback) + self.callback_args - self.callback = import_resource_from_cloud_storage + def _init_callback_with_params(self): + callback = import_project if self.resource == RequestTarget.PROJECT else import_task + callback_args = (self.import_args.file_path, self.user_id, self.import_args.org_id) + return callback, callback_args def finalize_request(self): # FUTURE-TODO: send logs to event store diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 56fb43e573da..e55ff3b8786c 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -1005,18 +1005,17 @@ def db_storage_to_storage_instance(db_storage): T = TypeVar('T', Callable[[str, int, int], int], Callable[[str, int, str, bool], None]) def import_resource_from_cloud_storage( + filename: str, db_storage: Any, key: str, - cleanup_func: Callable[[T, str,], Any], import_func: T, - filename: str, *args, **kwargs, ) -> Any: storage = db_storage_to_storage_instance(db_storage) storage.download_file(key, filename) - return cleanup_func(import_func, filename, *args, **kwargs) + return import_func(filename, *args, **kwargs) def export_resource_to_cloud_storage( db_storage: Any, From 305aac9b5e2739a6b1b51db1bacb57c5001305bd Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 31 Mar 2025 18:30:47 +0200 Subject: [PATCH 032/103] p --- cvat/apps/redis_handler/permissions.py | 4 ++-- cvat/apps/redis_handler/rules/requests.rego | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index 1099f94a3180..8fd5a2d57b03 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -25,7 +25,7 @@ class RequestPermission(OpenPolicyAgentPermission): class Scopes(StrEnum): LIST = "list" VIEW = "view" - CANCEL = "cancel" + DELETE = "delete" @classmethod def create( @@ -76,7 +76,7 @@ def get_scopes(request: ExtendedRequest, view: ViewSet, obj: RQJob | None) -> li { ("list", "GET"): __class__.Scopes.LIST, ("retrieve", "GET"): __class__.Scopes.VIEW, - ("cancel", "POST"): __class__.Scopes.CANCEL, + ("cancel", "POST"): __class__.Scopes.DELETE, }[(view.action, request.method)] ] diff --git a/cvat/apps/redis_handler/rules/requests.rego b/cvat/apps/redis_handler/rules/requests.rego index 0c6e7321516f..eaf7d8114052 100644 --- a/cvat/apps/redis_handler/rules/requests.rego +++ b/cvat/apps/redis_handler/rules/requests.rego @@ -6,7 +6,7 @@ import data.utils import data.organizations # input: { -# "scope": <"view"|"cancel"> or null, +# "scope": <"view"|"delete"> or null, # "auth": { # "user": { # "id": , @@ -34,5 +34,6 @@ allow if { } allow if { + input.scope in {utils.VIEW, utils.DELETE} input.auth.user.id == input.resource.owner.id } From cf6f9327d438abf4c6aa678eb085728c26a1de3f Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 31 Mar 2025 21:50:23 +0200 Subject: [PATCH 033/103] Update test_resource_import_export.py --- .../rest_api/test_resource_import_export.py | 22 +++- .../shared/utils/resource_import_export.py | 121 ++++++------------ 2 files changed, 60 insertions(+), 83 deletions(-) diff --git a/tests/python/rest_api/test_resource_import_export.py b/tests/python/rest_api/test_resource_import_export.py index 84582871c2cc..13f0151563cc 100644 --- a/tests/python/rest_api/test_resource_import_export.py +++ b/tests/python/rest_api/test_resource_import_export.py @@ -202,7 +202,19 @@ def test_import_resource_from_cloud_storage_with_specific_location( resource, is_default=False, obj=obj, cloud_storage_id=cloud_storage_id ) self._export_resource(cloud_storage, obj_id, obj, resource, **export_kwargs) - self._import_resource(cloud_storage, resource, obj_id, obj, **kwargs) + self._import_resource( + cloud_storage, + resource, + *( + [ + obj_id, + ] + if resource != "backup" + else [] + ), + obj, + **kwargs, + ) @pytest.mark.usefixtures("restore_redis_inmem_per_function") @pytest.mark.parametrize( @@ -326,7 +338,13 @@ def test_user_cannot_import_from_cloud_storage_with_specific_location_without_ac self._import_resource( cloud_storage, resource, - obj_id, + *( + [ + obj_id, + ] + if resource != "backup" + else [] + ), obj, user=user, _expect_status=HTTPStatus.FORBIDDEN, diff --git a/tests/python/shared/utils/resource_import_export.py b/tests/python/shared/utils/resource_import_export.py index c61d5874b3fe..4e0133482051 100644 --- a/tests/python/shared/utils/resource_import_export.py +++ b/tests/python/shared/utils/resource_import_export.py @@ -10,7 +10,7 @@ T = TypeVar("T") -from shared.utils.config import get_method, post_method, put_method +from shared.utils.config import get_method, post_method FILENAME_TEMPLATE = "cvat/{}/{}.zip" EXPORT_FORMAT = "CVAT for images 1.1" @@ -53,6 +53,26 @@ def _make_import_resource_params( return params +# FUTURE-TODO: reuse common logic from rest_api/utils +def _wait_request( + user: str, + request_id: str, + *, + sleep_interval: float = 0.1, + number_of_checks: int = 100, +): + for _ in range(number_of_checks): + sleep(sleep_interval) + response = get_method(user, f"requests/{request_id}") + assert response.status_code == HTTPStatus.OK + + request_details = json.loads(response.content) + status = request_details["status"] + assert status in {"started", "queued", "finished", "failed"} + if status in {"finished", "failed"}: + return + + class _CloudStorageResourceTest(ABC): @staticmethod @abstractmethod @@ -90,13 +110,9 @@ def _export_resource_to_cloud_storage( resource: str, *, user: str, - _expect_status: Optional[int] = None, + _expect_status: HTTPStatus = HTTPStatus.ACCEPTED, **kwargs, ): - _expect_status = _expect_status or HTTPStatus.ACCEPTED - - sleep_interval = 0.1 - number_of_checks = 100 # initialize the export process response = post_method( @@ -113,47 +129,22 @@ def _export_resource_to_cloud_storage( rq_id = json.loads(response.content).get("rq_id") assert rq_id, "The rq_id was not found in server request" - for _ in range(number_of_checks): - sleep(sleep_interval) - # use new requests API for checking the status of the operation - response = get_method(user, f"requests/{rq_id}") - assert response.status_code == HTTPStatus.OK - - request_details = json.loads(response.content) - status = request_details["status"] - assert status in {"started", "queued", "finished", "failed"} - if status in {"finished", "failed"}: - break + _wait_request(user, rq_id) def _import_resource_from_cloud_storage( - self, url: str, *, user: str, _expect_status: Optional[int] = None, **kwargs + self, url: str, *, user: str, _expect_status: HTTPStatus = HTTPStatus.ACCEPTED, **kwargs ) -> None: - _expect_status = _expect_status or HTTPStatus.ACCEPTED - response = post_method(user, url, data=None, **kwargs) status = response.status_code - assert status == _expect_status + assert status == _expect_status, status if status == HTTPStatus.FORBIDDEN: return rq_id = response.json().get("rq_id") assert rq_id, "The rq_id parameter was not found in the server response" - number_of_checks = 100 - sleep_interval = 0.1 - - for _ in range(number_of_checks): - sleep(sleep_interval) - # use new requests API for checking the status of the operation - response = get_method(user, f"requests/{rq_id}") - assert response.status_code == HTTPStatus.OK - - request_details = json.loads(response.content) - status = request_details["status"] - assert status in {"started", "queued", "finished", "failed"} - if status in {"finished", "failed"}: - break + _wait_request(user, rq_id) def _import_annotations_from_cloud_storage( self, @@ -161,27 +152,17 @@ def _import_annotations_from_cloud_storage( obj, *, user, - _expect_status: Optional[int] = None, + _expect_status: HTTPStatus = HTTPStatus.ACCEPTED, _check_uploaded: bool = True, **kwargs, ): - _expect_status = _expect_status or HTTPStatus.CREATED - url = f"{obj}/{obj_id}/annotations" - response = post_method(user, url, data=None, **kwargs) - status = response.status_code - - # Only the first POST request contains rq_id in response. - # Exclude cases with 403 expected status. - rq_id = None - if status == HTTPStatus.ACCEPTED: - rq_id = response.json().get("rq_id") - assert rq_id, "The rq_id was not found in the response" + self._import_resource_from_cloud_storage( + url, user=user, _expect_status=_expect_status, **kwargs + ) - while status != _expect_status: - assert status == HTTPStatus.ACCEPTED - response = put_method(user, url, data=None, rq_id=rq_id, **kwargs) - status = response.status_code + if _expect_status == HTTPStatus.FORBIDDEN: + return if _check_uploaded: response = get_method(user, url) @@ -192,40 +173,18 @@ def _import_annotations_from_cloud_storage( assert len(annotations["shapes"]) def _import_backup_from_cloud_storage( - self, obj_id, obj, *, user, _expect_status: Optional[int] = None, **kwargs + self, obj, *, user, _expect_status: HTTPStatus = HTTPStatus.ACCEPTED, **kwargs ): - _expect_status = _expect_status or HTTPStatus.CREATED - - url = f"{obj}/backup" - response = post_method(user, url, data=None, **kwargs) - status = response.status_code - - while status != _expect_status: - assert status == HTTPStatus.ACCEPTED - data = json.loads(response.content.decode("utf8")) - response = post_method(user, url, data=data, **kwargs) - status = response.status_code + self._import_resource_from_cloud_storage( + f"{obj}/backup", user=user, _expect_status=_expect_status, **kwargs + ) def _import_dataset_from_cloud_storage( - self, obj_id, obj, *, user, _expect_status: Optional[int] = None, **kwargs + self, obj_id, obj, *, user, _expect_status: HTTPStatus = HTTPStatus.ACCEPTED, **kwargs ): - _expect_status = _expect_status or HTTPStatus.CREATED - - url = f"{obj}/{obj_id}/dataset" - response = post_method(user, url, data=None, **kwargs) - status = response.status_code - - # Only the first POST request contains rq_id in response. - # Exclude cases with 403 expected status. - rq_id = None - if status == HTTPStatus.ACCEPTED: - rq_id = response.json().get("rq_id") - assert rq_id, "The rq_id was not found in the response" - - while status != _expect_status: - assert status == HTTPStatus.ACCEPTED - response = get_method(user, url, action="import_status", rq_id=rq_id) - status = response.status_code + self._import_resource_from_cloud_storage( + f"{obj}/{obj_id}/dataset", user=user, _expect_status=_expect_status, **kwargs + ) def _import_resource(self, cloud_storage: dict[str, Any], resource_type: str, *args, **kwargs): methods = { From dd3d29261ff5cda66734a2ab48a1d41ccaa169a8 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 31 Mar 2025 22:20:37 +0200 Subject: [PATCH 034/103] Fix operation type returned by server for import requests --- cvat/apps/engine/rq.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 0a40add05913..1172297a3abc 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -367,6 +367,10 @@ def format(self) -> str | None: # TODO: quote/unquote return self.extra.get("format") + @property + def type(self) -> str: + return self.TYPE_SEP.join([self.action, self.subresource or self.target]) + def define_dependent_job( queue: DjangoRQ, From 7d1bb101f9fe8f1dee3ecc8130f273838e4eea0d Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 31 Mar 2025 23:01:31 +0200 Subject: [PATCH 035/103] Skip broken test --- cvat/apps/dataset_manager/tests/test_rest_api_formats.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py index c71456021188..36f2d17c8517 100644 --- a/cvat/apps/dataset_manager/tests/test_rest_api_formats.py +++ b/cvat/apps/dataset_manager/tests/test_rest_api_formats.py @@ -2097,6 +2097,11 @@ def test_api_v2_export_import_dataset(self): ]: # TO-DO: fix bug for this formats continue + if upload_format_name == "Ultralytics YOLO Classification 1.0": + # FUTURE-FIXME: + # cvat.apps.dataset_manager.bindings.CvatImportError: + # Could not match item id: \'image_1\' with any task frame + continue for user, edata in list(expected.items()): project = copy.deepcopy(projects['main']) if upload_format_name in tasks: From 6988d9524ebd1012e4dcd9a24877aa2365fa75f4 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 31 Mar 2025 23:04:28 +0200 Subject: [PATCH 036/103] black code --- cvat-sdk/cvat_sdk/core/proxies/annotations.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cvat-sdk/cvat_sdk/core/proxies/annotations.py b/cvat-sdk/cvat_sdk/core/proxies/annotations.py index 03d03b5f0fea..8d3c45153a10 100644 --- a/cvat-sdk/cvat_sdk/core/proxies/annotations.py +++ b/cvat-sdk/cvat_sdk/core/proxies/annotations.py @@ -20,15 +20,12 @@ class AnnotationUpdateAction(Enum): class AnnotationCrudMixin(ABC): # TODO: refactor - def get_annotations(self: _EntityT) -> models.ILabeledData: (annotations, _) = self.api.retrieve_annotations(getattr(self, self._model_id_field)) return annotations def set_annotations(self: _EntityT, data: models.ILabeledDataRequest): - self.api.update_annotations( - getattr(self, self._model_id_field), labeled_data_request=data - ) + self.api.update_annotations(getattr(self, self._model_id_field), labeled_data_request=data) def update_annotations( self: _EntityT, From 6af24e12a3427435c468620341d856c7cce4efd1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 31 Mar 2025 23:05:35 +0200 Subject: [PATCH 037/103] remove unused import --- tests/python/shared/utils/resource_import_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/shared/utils/resource_import_export.py b/tests/python/shared/utils/resource_import_export.py index 4e0133482051..e9ce58555013 100644 --- a/tests/python/shared/utils/resource_import_export.py +++ b/tests/python/shared/utils/resource_import_export.py @@ -4,7 +4,7 @@ from contextlib import ExitStack from http import HTTPStatus from time import sleep -from typing import Any, Optional, TypeVar +from typing import Any, TypeVar import pytest From d752b3eafc249f6333eabb8189db6197d4115259 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 1 Apr 2025 10:35:12 +0200 Subject: [PATCH 038/103] Fix server schema --- cvat/apps/engine/views.py | 1 + cvat/schema.yml | 24 ------------------------ 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index a413d25d272d..73dd32e1cc39 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -360,6 +360,7 @@ def perform_create(self, serializer, **kwargs): # Required for the extra summary information added in the queryset serializer.instance = self.get_queryset().get(pk=serializer.instance.pk) + @extend_schema(methods=['GET'], exclude=True) @extend_schema(methods=['POST'], summary='Import a dataset into a project', description=textwrap.dedent(""" diff --git a/cvat/schema.yml b/cvat/schema.yml index 7681c96d920c..ee5a6a7dade1 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -3730,30 +3730,6 @@ paths: description: The backup process has already been initiated and is not yet finished /api/projects/{id}/dataset/: - get: - operationId: projects_retrieve_dataset - parameters: - - in: path - name: id - schema: - type: integer - description: A unique integer value identifying this project. - required: true - tags: - - projects - security: - - sessionAuth: [] - csrfAuth: [] - tokenAuth: [] - - signatureAuth: [] - - basicAuth: [] - responses: - '200': - content: - application/vnd.cvat+json: - schema: - $ref: '#/components/schemas/ProjectRead' - description: '' post: operationId: projects_create_dataset description: |2 From 12fc0ddba486061a2b741c4385720726b95cafaf Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 1 Apr 2025 10:35:35 +0200 Subject: [PATCH 039/103] Remove extra lines --- cvat/apps/engine/urls.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cvat/apps/engine/urls.py b/cvat/apps/engine/urls.py index b52e5b6181e6..0c294aa5ab8f 100644 --- a/cvat/apps/engine/urls.py +++ b/cvat/apps/engine/urls.py @@ -12,7 +12,6 @@ from . import views router = routers.DefaultRouter(trailing_slash=False) - router.register("projects", views.ProjectViewSet) router.register("tasks", views.TaskViewSet) router.register("jobs", views.JobViewSet) @@ -25,7 +24,6 @@ router.register("assets", views.AssetsViewSet) router.register("guides", views.AnnotationGuidesViewSet) - urlpatterns = [ # Entry point for a client path("", RedirectView.as_view(url=settings.UI_URL, permanent=True, query_string=True)), From e1df007c8af87003282caaf6585afc3275488707 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 1 Apr 2025 13:23:07 +0200 Subject: [PATCH 040/103] Update POST /api/quality/reports API description --- cvat/apps/quality_control/views.py | 7 ++++++- cvat/schema.yml | 5 +++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 055501ff8e21..09c20a7e6aa1 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -226,7 +226,12 @@ def get_queryset(self): @extend_schema( operation_id="quality_create_report", summary="Create a quality report", - description="Deprecation warning: Do not use this endpoint ot check the report computation status", + description=textwrap.dedent( + """\ + Deprecation warning: Utilizing this endpoint to check the computation status is no longer possible. + Consider using common requests API: GET /api/requests/ + """ + ), parameters=[ OpenApiParameter( CREATE_REPORT_RQ_ID_PARAMETER, diff --git a/cvat/schema.yml b/cvat/schema.yml index ee5a6a7dade1..419168eff7b7 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -4168,8 +4168,9 @@ paths: description: '' post: operationId: quality_create_report - description: 'Deprecation warning: Do not use this endpoint ot check the report - computation status' + description: | + Deprecation warning: Utilizing this endpoint to check the computation status is no longer possible. + Consider using common requests API: GET /api/requests/ summary: Create a quality report parameters: - in: query From afce3d1e46b06266ccec3372e37ec35f136b49a1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 1 Apr 2025 14:39:35 +0200 Subject: [PATCH 041/103] Cleanup API --- cvat/apps/engine/views.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 73dd32e1cc39..23fba37e2b77 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -163,8 +163,7 @@ def get_410_response_for_export_api(path: str) -> HttpResponseGone: def get_410_response_for_import_api() -> HttpResponseGone: return HttpResponseGone(textwrap.dedent("""\ - This endpoint is no longer supported. - To check the status of the import process, use GET /api/requests/rq_id, + This endpoint no longer supports checking the status of the import process, use GET /api/requests/rq_id, where rq_id is obtained from the response of the previous request. """)) @@ -474,6 +473,9 @@ def export_backup(self, request: ExtendedRequest, pk: int): serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): + if request.query_params.get("rq_id"): + return get_410_response_for_import_api() + return self.upload_data(request) @tus_chunk_action(detail=False, suffix_base="backup") @@ -820,6 +822,9 @@ def get_queryset(self): serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): + if request.query_params.get("rq_id"): + return get_410_response_for_import_api() + return self.upload_data(request) @tus_chunk_action(detail=False, suffix_base="backup") @@ -1252,7 +1257,7 @@ def annotations(self, request: ExtendedRequest, pk: int): return self.upload_data(request) elif request.method == 'PUT': - if "format" in request.query_params.keys(): + if {"format", "rq_id"} & set(request.query_params.keys()): return get_410_response_for_import_api() serializer = LabeledDataSerializer(data=request.data) @@ -1725,7 +1730,7 @@ def annotations(self, request: ExtendedRequest, pk: int): return self.upload_data(request) elif request.method == 'PUT': - if "format" in request.query_params.keys(): + if {"format", "rq_id"} & set(request.query_params.keys()): return get_410_response_for_import_api() serializer = LabeledDataSerializer(data=request.data) From 0de036cde4dd30c5d1bb76e37a760194d27cdaf1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 1 Apr 2025 14:39:53 +0200 Subject: [PATCH 042/103] Fix typos --- cvat/apps/engine/background.py | 2 +- cvat/apps/engine/tests/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index effab2b70c78..79548f411eae 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -344,7 +344,7 @@ def _handle_non_tus_file_upload(self): def _init_callback_with_params(self) -> tuple[Callable, tuple]: ... def init_callback_with_params(self): - # Note: self.import_args are changed here + # Note: self.import_args is changed here if self.import_args.location == Location.CLOUD_STORAGE: db_storage, key = self._handle_cloud_storage_file_upload() elif not self.import_args.file_path: diff --git a/cvat/apps/engine/tests/utils.py b/cvat/apps/engine/tests/utils.py index e3400ef0ae4f..48c4b3a2afdd 100644 --- a/cvat/apps/engine/tests/utils.py +++ b/cvat/apps/engine/tests/utils.py @@ -226,7 +226,7 @@ def _import_project_backup( expected_4xx_status_code: int | None = None ) -> int | None: response = self._import( - user, f"/api/projects/backup", file_content, + user, "/api/projects/backup", file_content, query_params=query_params, expected_4xx_status_code=expected_4xx_status_code ) if expected_4xx_status_code: @@ -239,7 +239,7 @@ def _import_task_backup( expected_4xx_status_code: int | None = None ) -> int | None: response = self._import( - user, f"/api/tasks/backup", file_content, + user, "/api/tasks/backup", file_content, query_params=query_params, expected_4xx_status_code=expected_4xx_status_code ) if expected_4xx_status_code: From b961e9c7abb0b89c1ee3ce2c82f483bf1fe8c395 Mon Sep 17 00:00:00 2001 From: maya Date: Fri, 4 Apr 2025 22:40:47 +0200 Subject: [PATCH 043/103] small refactoring --- cvat/apps/consensus/views.py | 12 ++--- cvat/apps/engine/background.py | 26 +++++------ cvat/apps/engine/mixins.py | 4 +- cvat/apps/engine/rq.py | 16 ++----- cvat/apps/engine/view_utils.py | 18 ++++++++ cvat/apps/engine/views.py | 38 +++++----------- cvat/apps/events/export.py | 12 +---- cvat/apps/events/views.py | 2 +- cvat/apps/quality_control/quality_reports.py | 14 +++++- cvat/apps/quality_control/views.py | 4 +- cvat/apps/redis_handler/background.py | 5 +- cvat/apps/redis_handler/permissions.py | 48 ++++++++++---------- cvat/apps/redis_handler/rq.py | 20 ++++++-- cvat/apps/redis_handler/serializers.py | 44 +++++++----------- cvat/apps/redis_handler/views.py | 29 ++++++------ cvat/settings/base.py | 1 + 16 files changed, 144 insertions(+), 149 deletions(-) diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index 6738304f668a..d746e53dfd13 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -26,6 +26,7 @@ from cvat.apps.engine.models import Job, Task from cvat.apps.engine.types import ExtendedRequest from cvat.apps.redis_handler.serializers import RequestIdSerializer +from cvat.apps.engine.view_utils import get_410_response_when_checking_process_status @extend_schema(tags=["consensus"]) @@ -55,14 +56,7 @@ def create(self, request: ExtendedRequest, *args, **kwargs): rq_id = request.query_params.get(self.CREATE_MERGE_RQ_ID_PARAMETER, None) if rq_id: - return HttpResponseGone( - textwrap.dedent( - """\ - This endpoint is no longer handles merge status checking. - The common requests API should be used instead: GET /api/requests/rq_id - """ - ) - ) + return get_410_response_when_checking_process_status("merge") input_serializer = ConsensusMergeCreateSerializer(data=request.data) input_serializer.is_valid(raise_exception=True) @@ -81,7 +75,7 @@ def create(self, request: ExtendedRequest, *args, **kwargs): raise NotFound(f"Jobs {job_id} do not exist") from ex manager = merging.MergingManager(request=request, db_instance=instance) - return manager.process() + return manager.schedule_job() @extend_schema(tags=["consensus"]) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 79548f411eae..7faecd731cb9 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -106,6 +106,7 @@ def build_request_id(self): action=RequestAction.EXPORT, target=RequestTarget(self.resource), id=self.db_instance.pk, + user_id=self.user_id, extra={ "subresource": ( RequestSubresource.DATASET @@ -113,18 +114,17 @@ def build_request_id(self): else RequestSubresource.ANNOTATIONS ), "format": self.export_args.format, - "user_id": self.user_id, }, ).render() def validate_request_id(self, request_id, /) -> None: - parsed_rq_id = ExportRequestId.parse(request_id) + parsed_request_id = ExportRequestId.parse(request_id) if ( - parsed_rq_id.action != RequestAction.EXPORT - or parsed_rq_id.target != RequestTarget(self.resource) - or parsed_rq_id.id != self.db_instance.pk - or parsed_rq_id.subresource + parsed_request_id.action != RequestAction.EXPORT + or parsed_request_id.target != RequestTarget(self.resource) + or parsed_request_id.id != self.db_instance.pk + or parsed_request_id.subresource not in {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} ): raise ValueError("The provided request id does not match exported target or resource") @@ -180,13 +180,13 @@ class BackupExporter(AbstractExporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} def validate_request_id(self, request_id, /) -> None: - parsed_rq_id = ExportRequestId.parse(request_id) + parsed_request_id = ExportRequestId.parse(request_id) if ( - parsed_rq_id.action != RequestAction.EXPORT - or parsed_rq_id.target != RequestTarget(self.resource) - or parsed_rq_id.id != self.db_instance.pk - or parsed_rq_id.subresource is not RequestSubresource.BACKUP + parsed_request_id.action != RequestAction.EXPORT + or parsed_request_id.target != RequestTarget(self.resource) + or parsed_request_id.id != self.db_instance.pk + or parsed_request_id.subresource is not RequestSubresource.BACKUP ): raise ValueError("The provided request id does not match exported target or resource") @@ -227,9 +227,9 @@ def build_request_id(self): action=RequestAction.EXPORT, target=RequestTarget(self.resource), id=self.db_instance.pk, + user_id=self.user_id, extra={ "subresource": RequestSubresource.BACKUP, - "user_id": self.user_id, }, ).render() @@ -430,7 +430,7 @@ def validate_request(self): raise MethodNotAllowed(self.request.method, detail="Format is disabled") def build_request_id(self): - return ExportRequestId( + return ImportRequestId( queue=self.QUEUE_NAME, action=RequestAction.IMPORT, target=RequestTarget(self.resource), diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 55d90a8a55b2..6b631f7c88f8 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -449,7 +449,7 @@ def initiate_dataset_export(self, request: ExtendedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() export_manager = DatasetExporter(request=request, db_instance=self._object) - return export_manager.process() + return export_manager.schedule_job() @extend_schema(summary='Download a prepared dataset file', parameters=[ @@ -496,7 +496,7 @@ class BackupMixin: def initiate_backup_export(self, request: ExtendedRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions export_manager = BackupExporter(request=request, db_instance=db_object) - return export_manager.process() + return export_manager.schedule_job() @extend_schema(summary='Download a prepared backup file', diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 1172297a3abc..3e08ad4a4ee0 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -51,6 +51,7 @@ class RequestField: STATUS = "status" PROGRESS = "progress" HIDDEN = "hidden" + RESULT_ID = "result_id" # import specific fields TASK_PROGRESS = "task_progress" @@ -238,6 +239,9 @@ def request(self): RQJobMetaField.STATUS, validator=lambda x: isinstance(x, str), optional=True ) + # TODO: looks like result duplicating + result_id: int | None = MutableRQMetaAttribute(RQJobMetaField.RESULT_ID, optional=True) + @staticmethod def _get_resettable_fields() -> list[str]: return RQMetaWithFailureInfo._get_resettable_fields() + [ @@ -335,10 +339,6 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: class ExportRequestId(RequestId): - @cached_property - def user_id(self) -> int: - return int(self.extra["user_id"]) - @cached_property def subresource(self) -> RequestSubresource | None: if subresource := self.extra.get("subresource"): @@ -349,10 +349,6 @@ def subresource(self) -> RequestSubresource | None: def format(self) -> str | None: return self.extra.get("format") - @property - def type(self) -> str: - return self.TYPE_SEP.join([self.action, self.subresource or self.target]) - class ImportRequestId(RequestId): @cached_property @@ -367,10 +363,6 @@ def format(self) -> str | None: # TODO: quote/unquote return self.extra.get("format") - @property - def type(self) -> str: - return self.TYPE_SEP.join([self.action, self.subresource or self.target]) - def define_dependent_job( queue: DjangoRQ, diff --git a/cvat/apps/engine/view_utils.py b/cvat/apps/engine/view_utils.py index 3d27deedd039..bb16eab89e56 100644 --- a/cvat/apps/engine/view_utils.py +++ b/cvat/apps/engine/view_utils.py @@ -17,6 +17,8 @@ from cvat.apps.engine.mixins import UploadMixin from cvat.apps.engine.parsers import TusUploadParser from cvat.apps.engine.types import ExtendedRequest +from django.http import HttpResponseGone +import textwrap def make_paginated_response( @@ -92,3 +94,19 @@ def decorator(f): return f return decorator + +def get_410_response_for_export_api(path: str) -> HttpResponseGone: + return HttpResponseGone(textwrap.dedent(f"""\ + This endpoint is no longer supported. + To initiate the export process, use POST {path}. + To check the process status, use GET /api/requests/rq_id, + where rq_id is obtained from the response of the previous request. + To download the prepared file, use the result_url obtained from the response of the previous request. + """)) + +def get_410_response_when_checking_process_status(process_type: str, /) -> HttpResponseGone: + return HttpResponseGone(textwrap.dedent(f"""\ + This endpoint no longer supports checking the status of the {process_type} process. + The common requests API should be used instead: GET /api/requests/rq_id, + where rq_id is obtained from the response of the initializing request. + """)) \ No newline at end of file diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 23fba37e2b77..d518ca71dcf8 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -135,7 +135,7 @@ ) from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import parse_exception_message, sendfile -from cvat.apps.engine.view_utils import tus_chunk_action +from cvat.apps.engine.view_utils import tus_chunk_action, get_410_response_when_checking_process_status, get_410_response_for_export_api from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.iam.permissions import IsAuthenticatedOrReadPublicResource, PolicyEnforcer from cvat.apps.redis_handler.serializers import RequestIdSerializer @@ -152,20 +152,6 @@ _DATA_UPDATED_DATE_HEADER_NAME = 'X-Updated-Date' _RETRY_AFTER_TIMEOUT = 10 -def get_410_response_for_export_api(path: str) -> HttpResponseGone: - return HttpResponseGone(textwrap.dedent(f"""\ - This endpoint is no longer supported. - To initiate the export process, use POST {path}. - To check the process status, use GET /api/requests/rq_id, - where rq_id is obtained from the response of the previous request. - To download the prepared file, use the result_url obtained from the response of the previous request. - """)) - -def get_410_response_for_import_api() -> HttpResponseGone: - return HttpResponseGone(textwrap.dedent("""\ - This endpoint no longer supports checking the status of the import process, use GET /api/requests/rq_id, - where rq_id is obtained from the response of the previous request. - """)) @extend_schema(tags=['server']) class ServerViewSet(viewsets.ViewSet): @@ -399,7 +385,7 @@ def dataset(self, request: ExtendedRequest, pk: int): # depends on rq job status (like 201 - finished), # while GET /api/requests/rq_id returns a 200 status code # if such a request exists regardless of job status. - return get_410_response_for_import_api() + return get_410_response_when_checking_process_status("import") # we cannot redirect to the new API here since this endpoint used not only to check the status # of exporting process|download a result file, but also to initiate export process @@ -423,11 +409,11 @@ def get_upload_dir(self): def upload_finished(self, request: ExtendedRequest): if self.action == 'dataset': importer = DatasetImporter(request=request, db_instance=self._object) - return importer.process() + return importer.schedule_job() elif self.action == 'import_backup': importer = BackupImporter(request=request, resource=RequestTarget.PROJECT) - return importer.process() + return importer.schedule_job() return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) @@ -474,7 +460,7 @@ def export_backup(self, request: ExtendedRequest, pk: int): parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): if request.query_params.get("rq_id"): - return get_410_response_for_import_api() + return get_410_response_when_checking_process_status("import") return self.upload_data(request) @@ -823,7 +809,7 @@ def get_queryset(self): parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): if request.query_params.get("rq_id"): - return get_410_response_for_import_api() + return get_410_response_when_checking_process_status("import") return self.upload_data(request) @@ -953,7 +939,7 @@ def upload_finished(self, request: ExtendedRequest): @transaction.atomic def _handle_upload_annotations(request: ExtendedRequest): importer = DatasetImporter(request=request, db_instance=self._object) - return importer.process() + return importer.schedule_job() def _handle_upload_data(request: ExtendedRequest): with transaction.atomic(): @@ -1017,12 +1003,12 @@ def _handle_upload_data(request: ExtendedRequest): # Need to process task data when the transaction is committed creator = TaskCreator(request=request, db_instance=self._object, db_data=data) - return creator.process() + return creator.schedule_job() @transaction.atomic def _handle_upload_backup(request: ExtendedRequest): importer = BackupImporter(request=request, resource=RequestTarget.TASK) - return importer.process() + return importer.schedule_job() if self.action == 'annotations': return _handle_upload_annotations(request) @@ -1258,7 +1244,7 @@ def annotations(self, request: ExtendedRequest, pk: int): elif request.method == 'PUT': if {"format", "rq_id"} & set(request.query_params.keys()): - return get_410_response_for_import_api() + return get_410_response_when_checking_process_status("import") serializer = LabeledDataSerializer(data=request.data) if serializer.is_valid(raise_exception=True): @@ -1615,7 +1601,7 @@ def get_upload_dir(self): def upload_finished(self, request: ExtendedRequest): if self.action == 'annotations': importer = DatasetImporter(request=request, db_instance=self._object) - return importer.process() + return importer.schedule_job() return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) @@ -1731,7 +1717,7 @@ def annotations(self, request: ExtendedRequest, pk: int): elif request.method == 'PUT': if {"format", "rq_id"} & set(request.query_params.keys()): - return get_410_response_for_import_api() + return get_410_response_when_checking_process_status("import") serializer = LabeledDataSerializer(data=request.data) if serializer.is_valid(raise_exception=True): diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index f822d23fc3d4..2a7ffed51910 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -87,12 +87,6 @@ def _create_csv(query_params: dict): raise -class EventsRequestId(RequestId): - @property - def user_id(self) -> int: - return self.extra["user_id"] - - @attrs.define(kw_only=True) class EventsExporter(AbstractExporter): @@ -104,14 +98,12 @@ def __attrs_post_init__(self): self.query_id = self.request.query_params.get("query_id") or uuid.uuid4() def build_request_id(self): - return EventsRequestId( + return RequestId( queue=self.QUEUE_NAME, action="export", target="events", id=self.query_id, - extra={ - "user_id": self.user_id, - }, + user_id=self.user_id, ).render() def init_request_args(self): diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index 378e482b5cbf..f6acd6aec3ac 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -186,7 +186,7 @@ def list(self, request: ExtendedRequest): def initiate_export(self, request: ExtendedRequest): self.check_permissions(request) exporter = EventsExporter(request=request) - return exporter.process() + return exporter.schedule_job() @extend_schema( summary="Download a prepared file with events", diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 32b90fa6541a..5c1169f14f1d 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2261,6 +2261,12 @@ def generate_report(self) -> ComparisonReport: ) +class QualityRequestId(RequestId): + @property + def subresource(self): + return self.extra["subresource"] + + @define(kw_only=True) class QualityReportRQJobManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.QUALITY_REPORTS.value @@ -2271,11 +2277,14 @@ def job_result_ttl(self): return 120 def build_request_id(self): - return RequestId( + return QualityRequestId( queue=self.QUEUE_NAME, - action="compute", + action="calculate", target=self.resource, id=self.db_instance.pk, + extra={ + "subresource": "quality" + } ).render() def validate_request(self): @@ -2308,6 +2317,7 @@ def _check_task_quality(cls, *, task_id: int) -> int: return cls()._compute_reports(task_id=task_id) def _compute_reports(self, task_id: int) -> int: + # raise Exception("Ooops") with transaction.atomic(): try: # Preload all the data for the computations. diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 09c20a7e6aa1..e6d774976632 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -283,7 +283,7 @@ def create(self, request, *args, **kwargs): raise NotFound(f"Task {task_id} does not exist") from ex manager = qc.QualityReportRQJobManager(request=request, db_instance=task) - return manager.process() + return manager.schedule_job() else: deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) @@ -335,7 +335,7 @@ def create(self, request, *args, **kwargs): headers=response_headers, ) - report = self.get_queryset().get(pk=return_value) + report = self.get_queryset().get(pk=return_value) # fixme: no result_id in response report_serializer = QualityReportSerializer( instance=report, context={"request": request} ) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 45a9416ab4fc..0cbd8dad2d39 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -156,7 +156,7 @@ def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | def build_meta(self, *, request_id: str): return BaseRQMeta.build(request=self.request, db_obj=self.db_instance) - def setup_new_job(self, queue: DjangoRQ, request_id: str, /): + def setup_new_job(self, queue: DjangoRQ, request_id: str, /, **kwargs): with get_rq_lock_by_user(queue, self.user_id): queue.enqueue_call( func=self.callback, @@ -167,6 +167,7 @@ def setup_new_job(self, queue: DjangoRQ, request_id: str, /): depends_on=define_dependent_job(queue, self.user_id, rq_id=request_id), result_ttl=self.job_result_ttl, failure_ttl=self.job_failed_ttl, + **kwargs, ) def finalize_request(self) -> None: @@ -176,7 +177,7 @@ def get_response(self, request_id: str) -> Response: serializer = RequestIdSerializer({"rq_id": request_id}) return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - def process(self) -> Response: + def schedule_job(self) -> Response: self.init_request_args() self.validate_request() self.init_callback_with_params() diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index 8fd5a2d57b03..bd9badff02bf 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -16,8 +16,9 @@ from rest_framework.viewsets import ViewSet from cvat.apps.engine.models import RequestTarget -from cvat.apps.engine.permissions import JobPermission, TaskPermission +from cvat.apps.engine.permissions import JobPermission, TaskPermission, ProjectPermission from cvat.apps.engine.rq import BaseRQMeta +from cvat.apps.redis_handler.rq import CustomRQJob class RequestPermission(OpenPolicyAgentPermission): @@ -29,7 +30,7 @@ class Scopes(StrEnum): @classmethod def create( - cls, request: ExtendedRequest, view: ViewSet, obj: RQJob | None, iam_context: dict + cls, request: ExtendedRequest, view: ViewSet, obj: CustomRQJob | None, iam_context: dict ) -> list[OpenPolicyAgentPermission]: permissions = [] if view.basename == "request": @@ -37,29 +38,26 @@ def create( if scope == cls.Scopes.LIST: continue elif scope == cls.Scopes.VIEW: - parsed_rq_id = obj.parsed_rq_id - - if ( - parsed_rq_id.queue - in ( - settings.CVAT_QUEUES.CONSENSUS, - settings.CVAT_QUEUES.QUALITY_REPORTS, - ) - and parsed_rq_id.target == RequestTarget.TASK - ): - permissions.append( - TaskPermission.create_scope_view(request, parsed_rq_id.id) - ) - continue - - if ( - parsed_rq_id.queue == settings.CVAT_QUEUES.CONSENSUS - and parsed_rq_id.target == RequestTarget.JOB - ): - permissions.append( - JobPermission.create_scope_view(request, parsed_rq_id.id) - ) - continue + parsed_request_id = obj.parsed_id + + # In case when background job is unique for a user, status check should be available only for this user + # In other cases, status check should be available for all users that have target resource VIEW permission + if not parsed_request_id.user_id: + if parsed_request_id.target == RequestTarget.PROJECT.value: + permissions.append( + ProjectPermission.create_scope_view(request, parsed_request_id.id) + ) + continue + elif parsed_request_id.target == RequestTarget.TASK.value: + permissions.append( + TaskPermission.create_scope_view(request, parsed_request_id.id) + ) + continue + elif parsed_request_id.target == RequestTarget.JOB.value: + permissions.append( + JobPermission.create_scope_view(request, parsed_request_id.id) + ) + continue self = cls.create_base_perm(request, view, scope, iam_context, obj) permissions.append(self) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index d0ec05d6c19c..fd871d3224cd 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -6,6 +6,8 @@ import attrs from django.conf import settings +from rq.job import Job as RQJob +from typing import Protocol def convert_id(value: int | str | UUID) -> int | UUID: @@ -39,7 +41,7 @@ class RequestId: KEY_VAL_SEP: ClassVar[str] = "=" TYPE_SEP: ClassVar[str] = ":" - queue: settings.CVAT_QUEUES = attrs.field(converter=settings.CVAT_QUEUES) + queue: str = attrs.field(validator=attrs.validators.instance_of(str)) action: str = attrs.field(validator=attrs.validators.instance_of(str)) target: str = attrs.field(validator=attrs.validators.instance_of(str)) id: int | UUID = attrs.field( @@ -48,9 +50,13 @@ class RequestId: ) extra: dict[str, Any] = attrs.field(converter=convert_extra, factory=dict) + # todo: prohibit by default to set this field + user_id: int | None = attrs.field(converter=lambda x: x if x is None else int(x), default=None) + @property def type(self) -> str: - return self.TYPE_SEP.join([self.action, self.target]) + subresource = getattr(self, "subresource", None) + return self.TYPE_SEP.join([self.action, subresource or self.target]) def convert_to(self, child_class: type[RequestId], /): # method is going to be used by child classes @@ -59,6 +65,7 @@ def convert_to(self, child_class: type[RequestId], /): action=self.action, target=self.target, id=self.id, + user_id=self.user_id, extra=self.extra, ) @@ -67,10 +74,11 @@ def render(self) -> str: [ self.KEY_VAL_SEP.join([k, v]) for k, v in { - "queue": self.queue.value, + "queue": self.queue, # TODO: probably can be added in RequestIdSerializer? "action": str(self.action), "target": str(self.target), "id": str(self.id), + **({"user_id": str(self.user_id),} if self.user_id is not None else {}), **self.extra, }.items() ] @@ -96,3 +104,9 @@ def parse(cls, request_id: str, /): return cls(**params) except Exception as ex: raise IncorrectRequestIdError from ex + +class WithParsedId(Protocol): + parsed_id: RequestId + +class CustomRQJob(RQJob, WithParsedId): + pass \ No newline at end of file diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 01cd47677795..4bed346e7144 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -9,12 +9,11 @@ from typing import Any import rq.defaults as rq_defaults -from django.conf import settings from django.utils import timezone from drf_spectacular.utils import extend_schema_field from rest_framework import serializers -from rq.job import Job as RQJob from rq.job import JobStatus as RQJobStatus +from cvat.apps.redis_handler.rq import CustomRQJob from cvat.apps.engine import models from cvat.apps.engine.log import ServerLogManager @@ -49,27 +48,27 @@ class RequestDataOperationSerializer(serializers.Serializer): format = serializers.CharField(required=False, allow_null=True) function_id = serializers.CharField(required=False, allow_null=True) - def to_representation(self, rq_job: RQJob) -> dict[str, Any]: - parsed_rq_id: RequestId = rq_job.parsed_rq_id + def to_representation(self, rq_job: CustomRQJob) -> dict[str, Any]: + parsed_request_id: RequestId = rq_job.parsed_id base_rq_job_meta = BaseRQMeta.for_job(rq_job) representation = { - "type": parsed_rq_id.type, - "target": parsed_rq_id.target, + "type": parsed_request_id.type, + "target": parsed_request_id.target, "project_id": base_rq_job_meta.project_id, "task_id": base_rq_job_meta.task_id, "job_id": base_rq_job_meta.job_id, } - if parsed_rq_id.action == RequestAction.AUTOANNOTATE: + if parsed_request_id.action == RequestAction.AUTOANNOTATE: representation["function_id"] = LambdaRQMeta.for_job(rq_job).function_id - elif parsed_rq_id.action in ( + elif parsed_request_id.action in ( RequestAction.IMPORT, RequestAction.EXPORT, - ) and parsed_rq_id.subresource in ( + ) and parsed_request_id.subresource in ( RequestSubresource.ANNOTATIONS, RequestSubresource.DATASET, ): - representation["format"] = parsed_rq_id.format + representation["format"] = parsed_request_id.format return representation @@ -105,21 +104,21 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @extend_schema_field(UserIdentifiersSerializer()) - def get_owner(self, rq_job: RQJob) -> dict[str, Any]: + def get_owner(self, rq_job: CustomRQJob) -> dict[str, Any]: assert self._base_rq_job_meta return UserIdentifiersSerializer(self._base_rq_job_meta.user).data @extend_schema_field( serializers.FloatField(min_value=0, max_value=1, required=False, allow_null=True) ) - def get_progress(self, rq_job: RQJob) -> Decimal: + def get_progress(self, rq_job: CustomRQJob) -> Decimal: rq_job_meta = ImportRQMeta.for_job(rq_job) # progress of task creation is stored in "task_progress" field # progress of project import is stored in "progress" field return Decimal(rq_job_meta.progress or rq_job_meta.task_progress or 0.0) @extend_schema_field(serializers.DateTimeField(required=False, allow_null=True)) - def get_expiry_date(self, rq_job: RQJob) -> str | None: + def get_expiry_date(self, rq_job: CustomRQJob) -> str | None: delta = None if rq_job.is_finished: delta = rq_job.result_ttl or rq_defaults.DEFAULT_RESULT_TTL @@ -133,9 +132,7 @@ def get_expiry_date(self, rq_job: RQJob) -> str | None: return None @extend_schema_field(serializers.CharField(allow_blank=True)) - def get_message(self, rq_job: RQJob) -> str: - # TODO: from cvat.apps.engine.utils import parse_exception_message - + def get_message(self, rq_job: CustomRQJob) -> str: assert self._base_rq_job_meta rq_job_status = rq_job.get_status() message = "" @@ -150,7 +147,7 @@ def get_message(self, rq_job: RQJob) -> str: return message - def to_representation(self, rq_job: RQJob) -> dict[str, Any]: + def to_representation(self, rq_job: CustomRQJob) -> dict[str, Any]: self._base_rq_job_meta = BaseRQMeta.for_job(rq_job) representation = super().to_representation(rq_job) @@ -159,16 +156,9 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: representation["status"] = RQJobStatus.QUEUED if representation["status"] == RQJobStatus.FINISHED: - - # TODO: move into a custom Job class - if rq_job.parsed_rq_id.action == models.RequestAction.EXPORT: + if rq_job.parsed_id.action == models.RequestAction.EXPORT: representation["result_url"] = ExportRQMeta.for_job(rq_job).result_url - - if ( - rq_job.parsed_rq_id.action == models.RequestAction.IMPORT - and rq_job.parsed_rq_id.subresource == models.RequestSubresource.BACKUP - or rq_job.parsed_rq_id.queue == settings.CVAT_QUEUES.QUALITY_REPORTS - ): - representation["result_id"] = rq_job.return_value() + elif self._base_rq_job_meta.result_id is not None: + representation["result_id"] = self._base_rq_job_meta.result_id return representation diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index f8686539909a..a08b1e3ec4f4 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -27,7 +27,7 @@ from cvat.apps.engine.models import RequestStatus # todo: move to the app from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.rq import RequestId +from cvat.apps.redis_handler.rq import RequestId, CustomRQJob from cvat.apps.redis_handler.serializers import RequestSerializer slogger = ServerLogManager(__name__) @@ -90,10 +90,10 @@ class RequestViewSet(viewsets.GenericViewSet): lookup_fields = { "created_date": "created_at", - "action": "parsed_rq_id.action", - "target": "parsed_rq_id.target", - "subresource": "parsed_rq_id.subresource", - "format": "parsed_rq_id.format", + "action": "parsed_id.action", + "target": "parsed_id.target", + "subresource": "parsed_id.subresource", + "format": "parsed_id.format", "status": "get_status", "project_id": "meta.project_id", "task_id": "meta.task_id", @@ -141,12 +141,11 @@ def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: for job in queue.job_class.fetch_many(job_ids, queue.connection): if job and is_rq_job_owner(job, user_id): try: - parsed_rq_id = ParsedIdClass.parse(job.id) + parsed_request_id = ParsedIdClass.parse(job.id) except Exception: # nosec B112 continue - # todo: fix type annotation - job.parsed_rq_id = parsed_rq_id + job.parsed_id = parsed_request_id jobs.append(job) return jobs @@ -179,24 +178,24 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: Optional[RQJob]: The retrieved RQJob, or None if not found. """ try: - parsed_rq_id = RequestId.parse(rq_id) + parsed_request_id = RequestId.parse(rq_id) except Exception: return None - job: RQJob | None = None + job: CustomRQJob | None = None - if parsed_rq_id.queue.value not in self.SUPPORTED_QUEUES: + if parsed_request_id.queue not in self.SUPPORTED_QUEUES: raise ValidationError("Unsupported queue") - queue: DjangoRQ = django_rq.get_queue(parsed_rq_id.queue.value) + queue: DjangoRQ = django_rq.get_queue(parsed_request_id.queue) job = queue.fetch_job(rq_id) if job: ParsedIdClass = self.get_parsed_id_class(queue.name) - if type(parsed_rq_id) is not ParsedIdClass: # pylint: disable=unidiomatic-typecheck - parsed_rq_id = parsed_rq_id.convert_to(ParsedIdClass) + if type(parsed_request_id) is not ParsedIdClass: # pylint: disable=unidiomatic-typecheck + parsed_request_id = parsed_request_id.convert_to(ParsedIdClass) - job.parsed_rq_id = parsed_rq_id + job.parsed_id = parsed_request_id return job diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 23cbc7c9a551..4451939daca2 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -324,6 +324,7 @@ class CVAT_QUEUES(Enum): **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", "VISIBLE_VIA_REQUESTS_API": True, + "PARSED_JOB_ID_CLASS": "cvat.apps.quality_control.quality_reports.QualityRequestId", }, CVAT_QUEUES.CLEANING.value: { **REDIS_INMEM_SETTINGS, From 2fa15048afe064df721746f72debd1b1632a14b0 Mon Sep 17 00:00:00 2001 From: maya Date: Mon, 7 Apr 2025 10:28:00 +0200 Subject: [PATCH 044/103] Use return value instead of custom meta key in serializer --- cvat/apps/engine/rq.py | 4 ---- cvat/apps/quality_control/views.py | 2 +- cvat/apps/redis_handler/serializers.py | 8 +++++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 3e08ad4a4ee0..b75157f13d59 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -51,7 +51,6 @@ class RequestField: STATUS = "status" PROGRESS = "progress" HIDDEN = "hidden" - RESULT_ID = "result_id" # import specific fields TASK_PROGRESS = "task_progress" @@ -239,9 +238,6 @@ def request(self): RQJobMetaField.STATUS, validator=lambda x: isinstance(x, str), optional=True ) - # TODO: looks like result duplicating - result_id: int | None = MutableRQMetaAttribute(RQJobMetaField.RESULT_ID, optional=True) - @staticmethod def _get_resettable_fields() -> list[str]: return RQMetaWithFailureInfo._get_resettable_fields() + [ diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index e6d774976632..4c2adf9c7f95 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -335,7 +335,7 @@ def create(self, request, *args, **kwargs): headers=response_headers, ) - report = self.get_queryset().get(pk=return_value) # fixme: no result_id in response + report = self.get_queryset().get(pk=return_value) report_serializer = QualityReportSerializer( instance=report, context={"request": request} ) diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 4bed346e7144..5e2504a3c0cc 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -23,6 +23,7 @@ from cvat.apps.engine.utils import parse_exception_message from cvat.apps.lambda_manager.rq import LambdaRQMeta from cvat.apps.redis_handler.rq import RequestId +from uuid import UUID slogger = ServerLogManager(__name__) @@ -97,7 +98,6 @@ class RequestSerializer(serializers.Serializer): expiry_date = serializers.SerializerMethodField() owner = serializers.SerializerMethodField() result_url = serializers.URLField(required=False, allow_null=True) - result_id = serializers.IntegerField(required=False, allow_null=True) def __init__(self, *args, **kwargs): self._base_rq_job_meta: BaseRQMeta | None = None @@ -158,7 +158,9 @@ def to_representation(self, rq_job: CustomRQJob) -> dict[str, Any]: if representation["status"] == RQJobStatus.FINISHED: if rq_job.parsed_id.action == models.RequestAction.EXPORT: representation["result_url"] = ExportRQMeta.for_job(rq_job).result_url - elif self._base_rq_job_meta.result_id is not None: - representation["result_id"] = self._base_rq_job_meta.result_id + else: + return_value = rq_job.return_value() + if isinstance(return_value, (int, UUID)): + representation["result_id"] = return_value return representation From 8b4e946db5af7af2aba53c86fccb808e4f936350 Mon Sep 17 00:00:00 2001 From: maya Date: Mon, 7 Apr 2025 11:27:55 +0200 Subject: [PATCH 045/103] Fix permissions --- cvat/apps/redis_handler/permissions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index bd9badff02bf..a8f255d9fc6e 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -40,9 +40,9 @@ def create( elif scope == cls.Scopes.VIEW: parsed_request_id = obj.parsed_id - # In case when background job is unique for a user, status check should be available only for this user + # In case when background job is unique for a user, status check should be available only for this user/admin # In other cases, status check should be available for all users that have target resource VIEW permission - if not parsed_request_id.user_id: + if not parsed_request_id.user_id and isinstance(parsed_request_id.id, int): if parsed_request_id.target == RequestTarget.PROJECT.value: permissions.append( ProjectPermission.create_scope_view(request, parsed_request_id.id) From 4155f2a7f953422f0c322316a9c7c8f220c45c9c Mon Sep 17 00:00:00 2001 From: maya Date: Mon, 7 Apr 2025 13:39:53 +0200 Subject: [PATCH 046/103] isort && pylint --- cvat/apps/consensus/views.py | 3 +-- cvat/apps/engine/view_utils.py | 6 +++--- cvat/apps/engine/views.py | 10 +++++++--- cvat/apps/redis_handler/permissions.py | 2 +- cvat/apps/redis_handler/rq.py | 6 ++---- cvat/apps/redis_handler/serializers.py | 5 ++--- cvat/apps/redis_handler/views.py | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index d746e53dfd13..134d81c75464 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -4,7 +4,6 @@ import textwrap -from django.http import HttpResponseGone from drf_spectacular.utils import ( OpenApiParameter, OpenApiResponse, @@ -25,8 +24,8 @@ from cvat.apps.engine.mixins import PartialUpdateModelMixin from cvat.apps.engine.models import Job, Task from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.serializers import RequestIdSerializer from cvat.apps.engine.view_utils import get_410_response_when_checking_process_status +from cvat.apps.redis_handler.serializers import RequestIdSerializer @extend_schema(tags=["consensus"]) diff --git a/cvat/apps/engine/view_utils.py b/cvat/apps/engine/view_utils.py index bb16eab89e56..9311516b6932 100644 --- a/cvat/apps/engine/view_utils.py +++ b/cvat/apps/engine/view_utils.py @@ -4,9 +4,11 @@ # NOTE: importing in the utils.py header leads to circular importing +import textwrap from typing import Optional from django.db.models.query import QuerySet +from django.http import HttpResponseGone from django.http.response import HttpResponse from drf_spectacular.utils import extend_schema from rest_framework.decorators import action @@ -17,8 +19,6 @@ from cvat.apps.engine.mixins import UploadMixin from cvat.apps.engine.parsers import TusUploadParser from cvat.apps.engine.types import ExtendedRequest -from django.http import HttpResponseGone -import textwrap def make_paginated_response( @@ -109,4 +109,4 @@ def get_410_response_when_checking_process_status(process_type: str, /) -> HttpR This endpoint no longer supports checking the status of the {process_type} process. The common requests API should be used instead: GET /api/requests/rq_id, where rq_id is obtained from the response of the initializing request. - """)) \ No newline at end of file + """)) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index d518ca71dcf8..8da674bc7dae 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -28,7 +28,7 @@ from django.db import models as django_models from django.db import transaction from django.db.models.query import Prefetch -from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseGone, HttpResponseNotFound +from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseNotFound from django.utils import timezone from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import ( @@ -135,7 +135,11 @@ ) from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import parse_exception_message, sendfile -from cvat.apps.engine.view_utils import tus_chunk_action, get_410_response_when_checking_process_status, get_410_response_for_export_api +from cvat.apps.engine.view_utils import ( + get_410_response_for_export_api, + get_410_response_when_checking_process_status, + tus_chunk_action, +) from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.iam.permissions import IsAuthenticatedOrReadPublicResource, PolicyEnforcer from cvat.apps.redis_handler.serializers import RequestIdSerializer @@ -808,7 +812,7 @@ def get_queryset(self): serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): - if request.query_params.get("rq_id"): + if request.query_params.get("rq_id"): # permissions? return get_410_response_when_checking_process_status("import") return self.upload_data(request) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index a8f255d9fc6e..8242660395eb 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -16,7 +16,7 @@ from rest_framework.viewsets import ViewSet from cvat.apps.engine.models import RequestTarget -from cvat.apps.engine.permissions import JobPermission, TaskPermission, ProjectPermission +from cvat.apps.engine.permissions import JobPermission, ProjectPermission, TaskPermission from cvat.apps.engine.rq import BaseRQMeta from cvat.apps.redis_handler.rq import CustomRQJob diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index fd871d3224cd..ba686e7d1814 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -1,13 +1,11 @@ from __future__ import annotations import base64 -from typing import Any, ClassVar +from typing import Any, ClassVar, Protocol from uuid import UUID import attrs -from django.conf import settings from rq.job import Job as RQJob -from typing import Protocol def convert_id(value: int | str | UUID) -> int | UUID: @@ -109,4 +107,4 @@ class WithParsedId(Protocol): parsed_id: RequestId class CustomRQJob(RQJob, WithParsedId): - pass \ No newline at end of file + pass diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 5e2504a3c0cc..f22ac202117b 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -7,13 +7,13 @@ from datetime import timedelta from decimal import Decimal from typing import Any +from uuid import UUID import rq.defaults as rq_defaults from django.utils import timezone from drf_spectacular.utils import extend_schema_field from rest_framework import serializers from rq.job import JobStatus as RQJobStatus -from cvat.apps.redis_handler.rq import CustomRQJob from cvat.apps.engine import models from cvat.apps.engine.log import ServerLogManager @@ -22,8 +22,7 @@ from cvat.apps.engine.serializers import BasicUserSerializer from cvat.apps.engine.utils import parse_exception_message from cvat.apps.lambda_manager.rq import LambdaRQMeta -from cvat.apps.redis_handler.rq import RequestId -from uuid import UUID +from cvat.apps.redis_handler.rq import CustomRQJob, RequestId slogger = ServerLogManager(__name__) diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index a08b1e3ec4f4..11f16d97cb93 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -27,7 +27,7 @@ from cvat.apps.engine.models import RequestStatus # todo: move to the app from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.rq import RequestId, CustomRQJob +from cvat.apps.redis_handler.rq import CustomRQJob, RequestId from cvat.apps.redis_handler.serializers import RequestSerializer slogger = ServerLogManager(__name__) From 5593d942cea2aaa3d74927d6b83d7dabb315e640 Mon Sep 17 00:00:00 2001 From: maya Date: Mon, 7 Apr 2025 13:42:57 +0200 Subject: [PATCH 047/103] black --- cvat/apps/quality_control/quality_reports.py | 4 +--- cvat/apps/redis_handler/rq.py | 12 ++++++++++-- cvat/apps/redis_handler/views.py | 4 +++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 5c1169f14f1d..0a04c45eb6e4 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2282,9 +2282,7 @@ def build_request_id(self): action="calculate", target=self.resource, id=self.db_instance.pk, - extra={ - "subresource": "quality" - } + extra={"subresource": "quality"}, ).render() def validate_request(self): diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index ba686e7d1814..674d4bdfc671 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -72,11 +72,17 @@ def render(self) -> str: [ self.KEY_VAL_SEP.join([k, v]) for k, v in { - "queue": self.queue, # TODO: probably can be added in RequestIdSerializer? + "queue": self.queue, # TODO: probably can be added in RequestIdSerializer? "action": str(self.action), "target": str(self.target), "id": str(self.id), - **({"user_id": str(self.user_id),} if self.user_id is not None else {}), + **( + { + "user_id": str(self.user_id), + } + if self.user_id is not None + else {} + ), **self.extra, }.items() ] @@ -103,8 +109,10 @@ def parse(cls, request_id: str, /): except Exception as ex: raise IncorrectRequestIdError from ex + class WithParsedId(Protocol): parsed_id: RequestId + class CustomRQJob(RQJob, WithParsedId): pass diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 11f16d97cb93..d3c9b50e2c25 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -192,7 +192,9 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: job = queue.fetch_job(rq_id) if job: ParsedIdClass = self.get_parsed_id_class(queue.name) - if type(parsed_request_id) is not ParsedIdClass: # pylint: disable=unidiomatic-typecheck + if ( + type(parsed_request_id) is not ParsedIdClass # pylint: disable=unidiomatic-typecheck + ): parsed_request_id = parsed_request_id.convert_to(ParsedIdClass) job.parsed_id = parsed_request_id From 43da03ad051e45e265df3d759c2358049da2b424 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 7 Apr 2025 13:46:21 +0200 Subject: [PATCH 048/103] Fix automatic change --- cvat/apps/redis_handler/serializers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index f22ac202117b..338a33909046 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -97,6 +97,7 @@ class RequestSerializer(serializers.Serializer): expiry_date = serializers.SerializerMethodField() owner = serializers.SerializerMethodField() result_url = serializers.URLField(required=False, allow_null=True) + result_id = serializers.IntegerField(required=False, allow_null=True) def __init__(self, *args, **kwargs): self._base_rq_job_meta: BaseRQMeta | None = None From 3a902c32aca2baf9a281501de0d2160d4b78ed69 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 7 Apr 2025 13:50:23 +0200 Subject: [PATCH 049/103] disable linter check --- cvat/apps/redis_handler/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index d3c9b50e2c25..79fb97090875 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -193,7 +193,7 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: if job: ParsedIdClass = self.get_parsed_id_class(queue.name) if ( - type(parsed_request_id) is not ParsedIdClass # pylint: disable=unidiomatic-typecheck + type(parsed_request_id) is not ParsedIdClass # fmt: skip # pylint: disable=unidiomatic-typecheck ): parsed_request_id = parsed_request_id.convert_to(ParsedIdClass) From 37d1a0c695a8f90c257aa0f7e5cadf4290eb5cc5 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 7 Apr 2025 16:36:55 +0200 Subject: [PATCH 050/103] Remove assert --- tests/python/rest_api/test_consensus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index ec86180300fc..fe9879ba8773 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -192,14 +192,14 @@ class TestPostConsensusMerge(_PermissionTestBase): def test_can_merge_task_with_consensus_jobs(self, admin_user, tasks): task_id = next(t["id"] for t in tasks if t["consensus_enabled"]) - assert self.merge(user=admin_user, task_id=task_id) + self.merge(user=admin_user, task_id=task_id) def test_can_merge_consensus_job(self, admin_user, jobs): job_id = next( j["id"] for j in jobs if j["type"] == "annotation" and j["consensus_replicas"] > 0 ) - assert self.merge(user=admin_user, job_id=job_id) + self.merge(user=admin_user, job_id=job_id) def test_cannot_merge_task_without_consensus_jobs(self, admin_user, tasks): task_id = next(t["id"] for t in tasks if not t["consensus_enabled"]) From 5781f96e9dedefe9878fbbe79724820ce91f1acc Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 8 Apr 2025 12:04:23 +0200 Subject: [PATCH 051/103] Update events API --- cvat/apps/events/views.py | 4 ++-- cvat/schema.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index f6acd6aec3ac..297d93e27e66 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -182,7 +182,7 @@ def list(self, request: ExtendedRequest): "202": OpenApiResponse(RequestIdSerializer), }, ) - @action(detail=False, methods=["POST"], url_path="file/export") + @action(detail=False, methods=["POST"], url_path="export") def initiate_export(self, request: ExtendedRequest): self.check_permissions(request) exporter = EventsExporter(request=request) @@ -205,7 +205,7 @@ def initiate_export(self, request: ExtendedRequest): }, exclude=True, # private API endpoint that should be used only as result_url ) - @action(detail=False, methods=["GET"], url_path="file/download") + @action(detail=False, methods=["GET"], url_path="download") def download_file(self, request: ExtendedRequest): self.check_permissions(request) exporter = EventsExporter(request=request) diff --git a/cvat/schema.yml b/cvat/schema.yml index 419168eff7b7..963daedc7dcc 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -1239,9 +1239,9 @@ paths: schema: $ref: '#/components/schemas/ClientEvents' description: '' - /api/events/file/export: + /api/events/export: post: - operationId: events_create_file_export + operationId: events_create_export summary: Initiate a process to export events parameters: - in: query From dbd8b4655a4c666e4b235a02effe0f09269b9d8d Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 13:54:29 +0200 Subject: [PATCH 052/103] [events] Use cache/export dir --- cvat/apps/dataset_manager/cron.py | 8 +- cvat/apps/dataset_manager/util.py | 141 +++++++++++++++++++----------- cvat/apps/events/export.py | 23 ++--- 3 files changed, 106 insertions(+), 66 deletions(-) diff --git a/cvat/apps/dataset_manager/cron.py b/cvat/apps/dataset_manager/cron.py index e2918de985ba..6d1f5b9c267f 100644 --- a/cvat/apps/dataset_manager/cron.py +++ b/cvat/apps/dataset_manager/cron.py @@ -16,6 +16,7 @@ from cvat.apps.dataset_manager.util import ( CacheFileOrDirPathParseError, + ConstructedFileId, ExportCacheManager, TmpDirManager, get_export_cache_lock, @@ -39,7 +40,10 @@ def clear_export_cache(file_path: Path) -> bool: ttl=EXPORT_CACHE_LOCK_TTL, ): parsed_filename = ExportCacheManager.parse_filename(file_path.name) - cache_ttl = get_export_cache_ttl(parsed_filename.instance_type) + if isinstance(parsed_filename.file_id, ConstructedFileId): + cache_ttl = get_export_cache_ttl(parsed_filename.file_id.instance_type) + else: + cache_ttl = get_export_cache_ttl(None) # use common default cache TTL if timezone.now().timestamp() <= file_path.stat().st_mtime + cache_ttl.total_seconds(): logger.debug(f"Export cache file {file_path.name!r} was recently accessed") @@ -100,7 +104,7 @@ class ExportCacheDirectoryCleaner(BaseCleaner): task_description: ClassVar[str] = "export cache directory cleanup" def do_cleanup(self) -> None: - export_cache_dir_path = settings.EXPORT_CACHE_ROOT + export_cache_dir_path = ExportCacheManager.ROOT assert os.path.exists(export_cache_dir_path) for child in os.scandir(export_cache_dir_path): diff --git a/cvat/apps/dataset_manager/util.py b/cvat/apps/dataset_manager/util.py index 6ab9ed71ce08..34c1e5c2b4c2 100644 --- a/cvat/apps/dataset_manager/util.py +++ b/cvat/apps/dataset_manager/util.py @@ -16,6 +16,7 @@ from enum import Enum from threading import Lock from typing import Any +from uuid import UUID import attrs import django_rq @@ -113,6 +114,7 @@ class ExportFileType(str, Enum): ANNOTATIONS = "annotations" BACKUP = "backup" DATASET = "dataset" + EVENTS = "events" @classmethod def values(cls) -> list[str]: @@ -127,23 +129,32 @@ class InstanceType(str, Enum): def values(cls) -> list[str]: return list(map(lambda x: x.value, cls)) -@attrs.frozen -class _ParsedExportFilename: - file_type: ExportFileType - file_ext: str - instance_type: InstanceType = attrs.field(converter=InstanceType) - instance_id: int - instance_timestamp: float = attrs.field(converter=float) +@attrs.define(kw_only=True) +class SimpleFileId: + value: str = attrs.field(converter=str) -@attrs.frozen -class ParsedDatasetFilename(_ParsedExportFilename): - format_repr: str +@attrs.define(kw_only=True) +class ConstructedFileId(SimpleFileId): + value: str = attrs.field(init=False) + + instance_type: InstanceType = attrs.field(converter=InstanceType, on_setattr=attrs.setters.frozen) + instance_id: int = attrs.field(converter=int, on_setattr=attrs.setters.frozen) + instance_timestamp: float = attrs.field(converter=float, on_setattr=attrs.setters.frozen) + + def __attrs_post_init__(self): + self.value = "-".join(map(str, [self.instance_type, self.instance_id, self.instance_timestamp])) @attrs.frozen -class ParsedBackupFilename(_ParsedExportFilename): - pass +class ParsedExportFilename: + file_type: ExportFileType = attrs.field(converter=ExportFileType) + file_ext: str + file_id: SimpleFileId = attrs.field(validator=attrs.validators.instance_of(SimpleFileId)) + +@attrs.frozen +class ParsedExportFilenameWithConstructedId(ParsedExportFilename): + file_id: ConstructedFileId = attrs.field(validator=attrs.validators.instance_of(ConstructedFileId)) class TmpDirManager: @@ -191,9 +202,11 @@ def get_tmp_directory_for_export( class ExportCacheManager: + ROOT = settings.EXPORT_CACHE_ROOT + SPLITTER = "-" INSTANCE_PREFIX = "instance" - FILE_NAME_TEMPLATE = SPLITTER.join([ + FILE_NAME_TEMPLATE_WITH_INSTANCE = SPLITTER.join([ "{instance_type}", "{instance_id}", "{file_type}", INSTANCE_PREFIX + # store the instance timestamp in the file name to reliably get this information # ctime / mtime do not return file creation time on linux @@ -201,6 +214,15 @@ class ExportCacheManager: "{instance_timestamp}{optional_suffix}.{file_ext}" ]) + FILE_NAME_TEMPLATE_WITHOUT_INSTANCE = SPLITTER.join([ + "{file_type}", "{file_id}.{file_ext}" + ]) + + @classmethod + def file_types_with_general_template(cls): + return (ExportFileType.EVENTS,) + + @classmethod def make_dataset_file_path( cls, @@ -219,7 +241,7 @@ def make_dataset_file_path( file_type = ExportFileType.DATASET if save_images else ExportFileType.ANNOTATIONS normalized_format_name = make_file_name(to_snake_case(format_name)) - filename = cls.FILE_NAME_TEMPLATE.format_map( + filename = cls.FILE_NAME_TEMPLATE_WITH_INSTANCE.format_map( { "instance_type": instance_type, "instance_id": instance_id, @@ -230,7 +252,7 @@ def make_dataset_file_path( } ) - return osp.join(settings.EXPORT_CACHE_ROOT, filename) + return osp.join(cls.ROOT, filename) @classmethod def make_backup_file_path( @@ -241,7 +263,7 @@ def make_backup_file_path( instance_timestamp: float, ) -> str: instance_type = InstanceType(instance_type.lower()) - filename = cls.FILE_NAME_TEMPLATE.format_map( + filename = cls.FILE_NAME_TEMPLATE_WITH_INSTANCE.format_map( { "instance_type": instance_type, "instance_id": instance_id, @@ -251,53 +273,72 @@ def make_backup_file_path( "file_ext": "zip", } ) - return osp.join(settings.EXPORT_CACHE_ROOT, filename) + return osp.join(cls.ROOT, filename) + + @classmethod + def make_file_path( + cls, + *, + file_type: str, + file_id: UUID, + file_ext: str, + ) -> str: + filename = cls.FILE_NAME_TEMPLATE_WITHOUT_INSTANCE.format_map({ + "file_type": ExportFileType(file_type), # convert here to be sure only expected types are used + "file_id": file_id, + "file_ext": file_ext, + }) + return osp.join(cls.ROOT, filename) @classmethod def parse_filename( cls, filename: str, - ) -> ParsedDatasetFilename | ParsedBackupFilename: + ) -> ParsedExportFilename | ParsedExportFilenameWithConstructedId: basename, file_ext = osp.splitext(filename) file_ext = file_ext.strip(".").lower() - basename_match = re.fullmatch( - ( - rf"^(?P{'|'.join(InstanceType.values())})" - rf"{cls.SPLITTER}(?P\d+)" - rf"{cls.SPLITTER}(?P{'|'.join(ExportFileType.values())})" - rf"{cls.SPLITTER}(?P.+)$" - ), - basename, - ) - if not basename_match: - raise CacheFileOrDirPathParseError(f"Couldn't parse file name: {basename!r}") - - fragments = basename_match.groupdict() - fragments["instance_id"] = int(fragments["instance_id"]) - - unparsed = fragments.pop("unparsed")[len(cls.INSTANCE_PREFIX):] - specific_params = {} + try: + for exp_file_type in cls.file_types_with_general_template(): + if basename.startswith(exp_file_type): + file_type, file_id = basename.split(cls.SPLITTER, maxsplit=1) + + return ParsedExportFilename( + file_type=file_type, + file_id=SimpleFileId(value=file_id), + file_ext=file_ext + ) + + basename_match = re.fullmatch( + ( + rf"^(?P{'|'.join(InstanceType.values())})" + rf"{cls.SPLITTER}(?P\d+)" + rf"{cls.SPLITTER}(?P{'|'.join(ExportFileType.values())})" + rf"{cls.SPLITTER}(?P.+)$" + ), + basename, + ) - if fragments["file_type"] in (ExportFileType.DATASET, ExportFileType.ANNOTATIONS): - try: - instance_timestamp, format_repr = unparsed.split(cls.SPLITTER, maxsplit=1) - except ValueError: - raise CacheFileOrDirPathParseError(f"Couldn't parse file name: {basename!r}") + if not basename_match: + assert False # will be handled - specific_params["format_repr"] = format_repr - ParsedFileNameClass = ParsedDatasetFilename - else: + fragments = basename_match.groupdict() + unparsed = fragments.pop("unparsed")[len(cls.INSTANCE_PREFIX):] instance_timestamp = unparsed - ParsedFileNameClass = ParsedBackupFilename - try: - parsed_file_name = ParsedFileNameClass( + if fragments["file_type"] in (ExportFileType.DATASET, ExportFileType.ANNOTATIONS): + # The "format" is a part of file id, but there is actually + # no need to use it after filename parsing, so just drop it. + instance_timestamp, _ = unparsed.split(cls.SPLITTER, maxsplit=1) + + parsed_file_name = ParsedExportFilenameWithConstructedId( + file_type=fragments.pop("file_type"), + file_id=ConstructedFileId( + instance_timestamp=instance_timestamp, + **fragments, + ), file_ext=file_ext, - instance_timestamp=instance_timestamp, - **fragments, - **specific_params, ) - except ValueError as ex: + except Exception as ex: raise CacheFileOrDirPathParseError(f"Couldn't parse file name: {basename!r}") from ex return parsed_file_name diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 2a7ffed51910..68336a211d98 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -6,7 +6,6 @@ import os import uuid from datetime import datetime, timedelta -from pathlib import Path import attrs import clickhouse_connect @@ -16,9 +15,8 @@ from rest_framework import serializers, status from rest_framework.response import Response from rest_framework.reverse import reverse -from rq import get_current_job -from cvat.apps.dataset_manager.util import TmpDirManager +from cvat.apps.dataset_manager.util import ExportCacheManager from cvat.apps.dataset_manager.views import log_exception from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.rq import RQMetaWithFailureInfo @@ -33,7 +31,7 @@ DEFAULT_CACHE_TTL = timedelta(hours=1) -def _create_csv(query_params: dict): +def _create_csv(query_params: dict, output_filename: str): try: clickhouse_settings = settings.CLICKHOUSE["events"] @@ -73,9 +71,6 @@ def _create_csv(query_params: dict): ) as client: result = client.query(query, parameters=parameters) - current_job = get_current_job() - output_filename = Path(TmpDirManager.TMP_ROOT) / current_job.id - with open(output_filename, "w", encoding="UTF8") as f: writer = csv.writer(f) writer.writerow(result.column_names) @@ -111,7 +106,9 @@ def init_request_args(self): perm = EventsPermission.create_scope_list(self.request) self.filter_query = perm.filter(self.request.query_params) - def define_query_params(self) -> dict: + def _init_callback_with_params(self): + self.callback = _create_csv + query_params = { "org_id": self.filter_query.get("org_id", None), "project_id": self.filter_query.get("project_id", None), @@ -149,12 +146,10 @@ def define_query_params(self) -> dict: query_params["to"] = datetime.now(timezone.utc) query_params["from"] = query_params["to"] - timedelta(days=30) - return query_params - - def _init_callback_with_params(self): - self.callback = _create_csv - query_params = self.define_query_params() - self.callback_args = (query_params,) + output_filename = ExportCacheManager.make_file_path( + file_type="events", file_id=self.query_id, file_ext="csv" + ) + self.callback_args = (query_params, output_filename) def where_to_redirect(self) -> str: return reverse("events-download-file", request=self.request) From b7fa6611b83dd28c341f557f90eb6236f105223e Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 13:55:21 +0200 Subject: [PATCH 053/103] [ResourceImporter] Cleanup callback/callback_args initialization --- cvat/apps/engine/background.py | 47 +++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 7faecd731cb9..314b3619c6e7 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -179,6 +179,14 @@ def where_to_redirect(self) -> str: class BackupExporter(AbstractExporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} + # def validate_request(self): + # super().validate_request() + + # if isinstance(self.db_instance, Task) and self.db_instance.data is None: + # raise ValidationError("Backup of a task without data is not allowed") + # elif isinstance(self.db_instance, Project) and Data.objects.filter(): + # pass + def validate_request_id(self, request_id, /) -> None: parsed_request_id = ExportRequestId.parse(request_id) @@ -341,7 +349,7 @@ def _handle_non_tus_file_upload(self): tf.write(chunk) @abstractmethod - def _init_callback_with_params(self) -> tuple[Callable, tuple]: ... + def _init_callback_with_params(self): ... def init_callback_with_params(self): # Note: self.import_args is changed here @@ -350,26 +358,25 @@ def init_callback_with_params(self): elif not self.import_args.file_path: self._handle_non_tus_file_upload() + self._init_callback_with_params() + # redefine here callback and callback args in order to: # - (optional) download file from cloud storage # - remove uploaded file at the end - self.callback = import_resource_with_clean_up_after - import_func, import_func_args = self._init_callback_with_params() - if self.import_args.location == Location.LOCAL: - self.callback_args = ( - import_func, - *import_func_args, - ) - else: + if self.import_args.location == Location.CLOUD_STORAGE: self.callback_args = ( - import_resource_from_cloud_storage, - import_func_args[0], + *self.callback_args[0], db_storage, key, - import_func, - *import_func_args[1:], + self.callback, + *self.callback_args[1:], ) + self.callback = import_resource_from_cloud_storage + + # redefine callback to clean up uploaded file + self.callback_args = (self.callback, *self.callback_args) + self.callback = import_resource_with_clean_up_after @attrs.define(kw_only=True) @@ -403,20 +410,19 @@ def init_request_args(self) -> None: def _init_callback_with_params(self): if isinstance(self.db_instance, Project): - callback = dm.project.import_dataset_as_project + self.callback = dm.project.import_dataset_as_project elif isinstance(self.db_instance, Task): - callback = dm.task.import_task_annotations + self.callback = dm.task.import_task_annotations else: assert isinstance(self.db_instance, Job) - callback = dm.task.import_job_annotations + self.callback = dm.task.import_job_annotations - callback_args = ( + self.callback_args = ( self.import_args.file_path, self.db_instance.pk, self.import_args.format, self.import_args.conv_mask_to_poly, ) - return callback, callback_args def validate_request(self): super().validate_request() @@ -489,9 +495,8 @@ def build_request_id(self): ).render() def _init_callback_with_params(self): - callback = import_project if self.resource == RequestTarget.PROJECT else import_task - callback_args = (self.import_args.file_path, self.user_id, self.import_args.org_id) - return callback, callback_args + self.callback = import_project if self.resource == RequestTarget.PROJECT else import_task + self.callback_args = (self.import_args.file_path, self.user_id, self.import_args.org_id) def finalize_request(self): # FUTURE-TODO: send logs to event store From 510fef7bb65f4343aa5279be7290ac52224b7f78 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 13:56:35 +0200 Subject: [PATCH 054/103] [AbstractExporter] Split logic into 2 classes --- cvat/apps/engine/mixins.py | 10 +- cvat/apps/events/views.py | 5 +- cvat/apps/redis_handler/background.py | 160 +++++++++++++++----------- 3 files changed, 100 insertions(+), 75 deletions(-) diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 6b631f7c88f8..af31141169af 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -464,8 +464,9 @@ def initiate_dataset_export(self, request: ExtendedRequest, pk: int): @action(methods=['GET'], detail=True, url_path='dataset/download') def download_dataset(self, request: ExtendedRequest, pk: int): obj = self.get_object() # force to call check_object_permissions - export_manager = DatasetExporter(request=request, db_instance=obj) - return export_manager.download_file() + + downloader = DatasetExporter(request=request, db_instance=obj).get_downloader() + return downloader.download_file() class BackupMixin: @@ -512,5 +513,6 @@ def initiate_backup_export(self, request: ExtendedRequest, pk: int): @action(methods=['GET'], detail=True, url_path='backup/download') def download_backup(self, request: ExtendedRequest, pk: int): obj = self.get_object() # force to call check_object_permissions - export_manager = BackupExporter(request=request, db_instance=obj) - return export_manager.download_file() + + downloader = BackupExporter(request=request, db_instance=obj).get_downloader() + return downloader.download_file() diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index 297d93e27e66..164331932388 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -208,5 +208,6 @@ def initiate_export(self, request: ExtendedRequest): @action(detail=False, methods=["GET"], url_path="download") def download_file(self, request: ExtendedRequest): self.check_permissions(request) - exporter = EventsExporter(request=request) - return exporter.download_file() + + downloader = EventsExporter(request=request).get_downloader() + return downloader.download_file() diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 0cbd8dad2d39..cb13cd41cae8 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -26,8 +26,6 @@ from cvat.apps.dataset_manager.util import get_export_cache_lock from cvat.apps.engine.cloud_provider import export_resource_to_cloud_storage - -# from cvat.apps.dataset_manager.views import get_export_cache_ttl from cvat.apps.engine.location import Location, StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import RequestTarget @@ -123,11 +121,10 @@ def init_callback_with_params(self) -> None: ... def validate_request(self) -> Response | None: """Hook to run some validations before processing a request""" - if self.request.method != "POST": - raise MethodNotAllowed( - self.request.method, - detail="Only POST requests can be used to initiate a background process", - ) + # prevent architecture bugs + assert ( + self.request.method == "POST" + ), "Only POST requests can be used to initiate a background process" def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | None: if not job: @@ -135,15 +132,18 @@ def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | job_status = job.get_status(refresh=False) - if job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED}: + if job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED, RQJobStatus.DEFERRED}: + from cvat.apps.redis_handler.serializers import ExistedRequestIdSerializer + + serializer = ExistedRequestIdSerializer( + {"reason": "Request is being processed", "rq_id": job.id} + ) + return Response( - data="Request is being processed", + serializer.data, status=status.HTTP_409_CONFLICT, ) - if job_status == RQJobStatus.DEFERRED: - job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) - if job_status == RQJobStatus.SCHEDULED: scheduler: DjangoScheduler = django_rq.get_scheduler(queue.name, queue=queue) # remove the job id from the set with scheduled keys @@ -200,6 +200,85 @@ def schedule_job(self) -> Response: @attrs.define(kw_only=True) class AbstractExporter(AbstractRequestManager): + + class Downloader: + def __init__( + self, + *, + request: ExtendedRequest, + queue: DjangoRQ, + request_id: str, + ): + self.request = request + self.queue = queue + self.request_id = request_id + + def validate_request(self): + # prevent architecture bugs + assert "GET" == self.request.method, "Only GET requests can be used to download a file" + + def download_file(self) -> Response: + self.validate_request() + + # ensure that there is no race condition when processing parallel requests + with get_rq_lock_for_job(self.queue, self.request_id): + job = self.queue.fetch_job(self.request_id) + + if not job: + return HttpResponseBadRequest("Unknown export request id") + + # define status once to avoid refreshing it on each check + # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases + job_status = job.get_status(refresh=False) + + if job_status != RQJobStatus.FINISHED: + return HttpResponseBadRequest("The export process is not finished") + + job_meta = ExportRQMeta.for_job(job) + file_path = job.return_value() + + if not file_path: + return ( + Response( + "A result for exporting job was not found for finished RQ job", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + if job_meta.result_url + # user tries to download a final file locally while the export is made to cloud storage + else HttpResponseBadRequest( + "The export process has no result file to be downloaded locally" + ) + ) + + with get_export_cache_lock( + file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT + ): + if not osp.exists(file_path): + return Response( + "The exported file has expired, please retry exporting", + status=status.HTTP_404_NOT_FOUND, + ) + + return sendfile( + self.request, + file_path, + attachment=True, + attachment_filename=job_meta.result_filename, + ) + + def get_downloader(self): + request_id = self.request.query_params.get(self.REQUEST_ID_KEY) + + if not request_id: + raise ValidationError("Missing request id in the query parameters") + + try: + self.validate_request_id(request_id) + except ValueError: + raise ValidationError("Invalid export request id") + + return self.Downloader(request=self.request, queue=self.get_queue(), request_id=request_id) + QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value @property @@ -301,60 +380,3 @@ def build_meta(self, *, request_id): ), result_filename=self.get_result_filename(), ) - - def download_file(self) -> Response: - queue = self.get_queue() - request_id = self.request.query_params.get(self.REQUEST_ID_KEY) - - if not request_id: - return HttpResponseBadRequest("Missing request id in the query parameters") - - try: - self.validate_request_id(request_id) - except ValueError: - return HttpResponseBadRequest("Invalid export request id") - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, request_id): - job = queue.fetch_job(request_id) - - if not job: - return HttpResponseBadRequest("Unknown export request id") - - # define status once to avoid refreshing it on each check - # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases - job_status = job.get_status(refresh=False) - - if job_status != RQJobStatus.FINISHED: - return HttpResponseBadRequest("The export process is not finished") - - job_meta = ExportRQMeta.for_job(job) - file_path = job.return_value() - - if not file_path: - return ( - Response( - "A result for exporting job was not found for finished RQ job", - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - if job_meta.result_url # user tries to download a final file locally while the export is made to cloud storage - else HttpResponseBadRequest( - "The export process has no result file to be downloaded locally" - ) - ) - - with get_export_cache_lock( - file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT - ): - if not osp.exists(file_path): - return Response( - "The exported file has expired, please retry exporting", - status=status.HTTP_404_NOT_FOUND, - ) - - return sendfile( - self.request, - file_path, - attachment=True, - attachment_filename=job_meta.result_filename, - ) From dbde0d222c6a642e6efe10bbcb4d57b2d803bbfa Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 13:57:13 +0200 Subject: [PATCH 055/103] Drop hidden meta field --- cvat/apps/engine/rq.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index b75157f13d59..5b3df4bf239b 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -50,7 +50,6 @@ class RequestField: JOB_ID = "job_id" STATUS = "status" PROGRESS = "progress" - HIDDEN = "hidden" # import specific fields TASK_PROGRESS = "task_progress" @@ -228,8 +227,6 @@ def request(self): task_id: int | None = ImmutableRQMetaAttribute(RQJobMetaField.TASK_ID, optional=True) job_id: int | None = ImmutableRQMetaAttribute(RQJobMetaField.JOB_ID, optional=True) - hidden: bool | None = ImmutableRQMetaAttribute(RQJobMetaField.HIDDEN, optional=True) - # mutable && optional fields progress: float | None = MutableRQMetaAttribute( RQJobMetaField.PROGRESS, validator=lambda x: isinstance(x, float), optional=True @@ -251,7 +248,6 @@ def build( *, request: ExtendedRequest, db_obj: Model | None, - hidden: bool | None = None, ): # to prevent circular import from cvat.apps.events.handlers import job_id, organization_slug, task_id @@ -280,7 +276,6 @@ def build( RQJobMetaField.PROJECT_ID: pid, RQJobMetaField.TASK_ID: tid, RQJobMetaField.JOB_ID: jid, - **({RQJobMetaField.HIDDEN: hidden} if hidden is not None else {}), } @@ -335,6 +330,7 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: class ExportRequestId(RequestId): + # optional because export queue works also with events @cached_property def subresource(self) -> RequestSubresource | None: if subresource := self.extra.get("subresource"): @@ -356,7 +352,6 @@ def subresource(self) -> RequestSubresource | None: @cached_property def format(self) -> str | None: - # TODO: quote/unquote return self.extra.get("format") From 272afdfbcb6fbc73b530f4ea23757a24e0b3d1f5 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 14:16:40 +0200 Subject: [PATCH 056/103] Revert changes not related to this PR --- cvat/apps/redis_handler/background.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index cb13cd41cae8..c80ef1ba95a1 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -123,7 +123,7 @@ def validate_request(self) -> Response | None: # prevent architecture bugs assert ( - self.request.method == "POST" + "POST" == self.request.method ), "Only POST requests can be used to initiate a background process" def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | None: @@ -132,18 +132,15 @@ def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | job_status = job.get_status(refresh=False) - if job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED, RQJobStatus.DEFERRED}: - from cvat.apps.redis_handler.serializers import ExistedRequestIdSerializer - - serializer = ExistedRequestIdSerializer( - {"reason": "Request is being processed", "rq_id": job.id} - ) - + if job_status in {RQJobStatus.STARTED, RQJobStatus.QUEUED}: return Response( - serializer.data, + data="Request is being processed", status=status.HTTP_409_CONFLICT, ) + if job_status == RQJobStatus.DEFERRED: + job.cancel(enqueue_dependents=settings.ONE_RUNNING_JOB_IN_QUEUE_PER_USER) + if job_status == RQJobStatus.SCHEDULED: scheduler: DjangoScheduler = django_rq.get_scheduler(queue.name, queue=queue) # remove the job id from the set with scheduled keys From cf6118b6c961dcb0f71f21f356df9b8f69386774 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 17:02:02 +0200 Subject: [PATCH 057/103] Add DeprecatedResponse --- cvat/apps/engine/view_utils.py | 19 ++++++++++++++ cvat/apps/events/export.py | 31 +++++++++++++++-------- cvat/apps/quality_control/views.py | 40 +++++++++++++++++------------- 3 files changed, 62 insertions(+), 28 deletions(-) diff --git a/cvat/apps/engine/view_utils.py b/cvat/apps/engine/view_utils.py index 9311516b6932..861f6f3f1115 100644 --- a/cvat/apps/engine/view_utils.py +++ b/cvat/apps/engine/view_utils.py @@ -5,6 +5,7 @@ # NOTE: importing in the utils.py header leads to circular importing import textwrap +from datetime import datetime from typing import Optional from django.db.models.query import QuerySet @@ -110,3 +111,21 @@ def get_410_response_when_checking_process_status(process_type: str, /) -> HttpR The common requests API should be used instead: GET /api/requests/rq_id, where rq_id is obtained from the response of the initializing request. """)) + +class DeprecatedResponse(Response): + def __init__(self, + data=None, + status=None, + template_name=None, + headers=None, + exception=False, + content_type=None, + *, + deprecation_date: datetime, + ): + headers = headers or {} + # https://greenbytes.de/tech/webdav/draft-ietf-httpapi-deprecation-header-latest.html#the-deprecation-http-response-header-field + deprecation_timestamp = int(deprecation_date.timestamp()) + headers["Deprecation"] = f"@{deprecation_timestamp}" + + super().__init__(data, status, template_name, headers, exception, content_type) \ No newline at end of file diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 68336a211d98..96bdcbdd8eff 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -13,7 +13,6 @@ from django.conf import settings from django.utils import timezone from rest_framework import serializers, status -from rest_framework.response import Response from rest_framework.reverse import reverse from cvat.apps.dataset_manager.util import ExportCacheManager @@ -22,6 +21,7 @@ from cvat.apps.engine.rq import RQMetaWithFailureInfo from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import sendfile +from cvat.apps.engine.view_utils import DeprecatedResponse from cvat.apps.events.permissions import EventsPermission from cvat.apps.redis_handler.background import AbstractExporter from cvat.apps.redis_handler.rq import RequestId @@ -90,7 +90,10 @@ class EventsExporter(AbstractExporter): def __attrs_post_init__(self): super().__attrs_post_init__() - self.query_id = self.request.query_params.get("query_id") or uuid.uuid4() + if query_id := self.request.query_params.get("query_id"): + self.query_id = uuid.UUID(query_id) + else: + self.query_id = uuid.uuid4() def build_request_id(self): return RequestId( @@ -176,9 +179,7 @@ def export(request: ExtendedRequest): response_data = { "query_id": manager.query_id, } - deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) - response_headers = {"Deprecation": f"@{deprecation_timestamp}"} - + deprecation_date = datetime(2025, 3, 17, tzinfo=timezone.utc) rq_job = queue.fetch_job(request_id) if rq_job: @@ -192,17 +193,23 @@ def export(request: ExtendedRequest): return sendfile(request, file_path, attachment=True, attachment_filename=filename) else: if os.path.exists(file_path): - return Response(status=status.HTTP_201_CREATED, headers=response_headers) + return DeprecatedResponse( + status=status.HTTP_201_CREATED, deprecation_date=deprecation_date + ) elif rq_job.is_failed: rq_job_meta = RQMetaWithFailureInfo.for_job(rq_job) exc_info = rq_job_meta.formatted_exception or str(rq_job.exc_info) rq_job.delete() - return Response( - exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR, headers=response_headers + return DeprecatedResponse( + exc_info, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + deprecation_date=deprecation_date, ) else: - return Response( - data=response_data, status=status.HTTP_202_ACCEPTED, headers=response_headers + return DeprecatedResponse( + data=response_data, + status=status.HTTP_202_ACCEPTED, + deprecation_date=deprecation_date, ) manager.init_request_args() @@ -210,4 +217,6 @@ def export(request: ExtendedRequest): manager.init_callback_with_params() manager.setup_new_job(queue, request_id) - return Response(data=response_data, status=status.HTTP_202_ACCEPTED) + return DeprecatedResponse( + data=response_data, status=status.HTTP_202_ACCEPTED, deprecation_date=deprecation_date + ) diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 4c2adf9c7f95..8fe253c82cd9 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -6,7 +6,7 @@ from datetime import datetime from django.db.models import Q -from django.http import HttpResponse, HttpResponseNotFound +from django.http import HttpResponse from django.utils import timezone from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import ( @@ -18,13 +18,14 @@ from rest_framework import mixins, status, viewsets from rest_framework.decorators import action from rest_framework.exceptions import NotFound, ValidationError -from rest_framework.response import Response from rq.job import JobStatus as RqJobStatus from cvat.apps.engine.mixins import PartialUpdateModelMixin from cvat.apps.engine.models import Task from cvat.apps.engine.rq import BaseRQMeta +from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import get_server_url +from cvat.apps.engine.view_utils import DeprecatedResponse from cvat.apps.quality_control import quality_reports as qc from cvat.apps.quality_control.models import ( AnnotationConflict, @@ -266,7 +267,7 @@ def get_queryset(self): ), }, ) - def create(self, request, *args, **kwargs): + def create(self, request: ExtendedRequest, *args, **kwargs): self.check_permissions(request) rq_id = request.query_params.get(self.CREATE_REPORT_RQ_ID_PARAMETER, None) @@ -286,8 +287,7 @@ def create(self, request, *args, **kwargs): return manager.schedule_job() else: - deprecation_timestamp = int(datetime(2025, 3, 17, tzinfo=timezone.utc).timestamp()) - response_headers = {"Deprecation": f"@{deprecation_timestamp}"} + deprecation_date = datetime(2025, 3, 17, tzinfo=timezone.utc) serializer = RequestIdSerializer(data={"rq_id": rq_id}) serializer.is_valid(raise_exception=True) rq_id = serializer.validated_data["rq_id"] @@ -304,15 +304,21 @@ def create(self, request, *args, **kwargs): .allow ): # We should not provide job existence information to unauthorized users - return HttpResponseNotFound("Unknown request id", headers=response_headers) + return DeprecatedResponse( + "Unknown request id", + status=status.HTTP_404_NOT_FOUND, + deprecation_date=deprecation_date, + ) rq_job_status = rq_job.get_status(refresh=False) if rq_job_status == RqJobStatus.FAILED: message = str(rq_job.exc_info) rq_job.delete() - return Response( - message, status=status.HTTP_500_INTERNAL_SERVER_ERROR, headers=response_headers + return DeprecatedResponse( + message, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + deprecation_date=deprecation_date, ) elif rq_job_status in ( @@ -321,31 +327,31 @@ def create(self, request, *args, **kwargs): RqJobStatus.SCHEDULED, RqJobStatus.DEFERRED, ): - return Response( - serializer.data, status=status.HTTP_202_ACCEPTED, headers=response_headers + return DeprecatedResponse( + serializer.data, + status=status.HTTP_202_ACCEPTED, + deprecation_date=deprecation_date, ) elif rq_job_status == RqJobStatus.FINISHED: return_value = rq_job.return_value() rq_job.delete() if not return_value: - raise Response( + return DeprecatedResponse( "No report has been computed", status=status.HTTP_500_INTERNAL_SERVER_ERROR, - headers=response_headers, + deprecation_date=deprecation_date, ) report = self.get_queryset().get(pk=return_value) report_serializer = QualityReportSerializer( instance=report, context={"request": request} ) - return Response( + return DeprecatedResponse( data=report_serializer.data, status=status.HTTP_201_CREATED, - headers={ - **self.get_success_headers(report_serializer.data), - **response_headers, - }, + headers=self.get_success_headers(report_serializer.data), + deprecation_date=deprecation_date, ) raise AssertionError(f"Unexpected rq job '{rq_id}' status '{rq_job_status}'") From 9e8ccb60b657d23d6c08a72fb0f5335b1ad8d9a9 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 17:10:16 +0200 Subject: [PATCH 058/103] schedule_job -> enqueue_job --- cvat/apps/consensus/views.py | 2 +- cvat/apps/engine/background.py | 2 +- cvat/apps/engine/mixins.py | 4 ++-- cvat/apps/engine/views.py | 12 ++++++------ cvat/apps/events/views.py | 2 +- cvat/apps/quality_control/views.py | 2 +- cvat/apps/redis_handler/background.py | 13 ++++++------- 7 files changed, 18 insertions(+), 19 deletions(-) diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index 134d81c75464..82a33d65aa10 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -74,7 +74,7 @@ def create(self, request: ExtendedRequest, *args, **kwargs): raise NotFound(f"Jobs {job_id} do not exist") from ex manager = merging.MergingManager(request=request, db_instance=instance) - return manager.schedule_job() + return manager.enqueue_job() @extend_schema(tags=["consensus"]) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 314b3619c6e7..bb38db192c18 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -374,7 +374,7 @@ def init_callback_with_params(self): ) self.callback = import_resource_from_cloud_storage - # redefine callback to clean up uploaded file + # re-define callback to clean up uploaded file self.callback_args = (self.callback, *self.callback_args) self.callback = import_resource_with_clean_up_after diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index af31141169af..9a86f398122e 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -449,7 +449,7 @@ def initiate_dataset_export(self, request: ExtendedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() export_manager = DatasetExporter(request=request, db_instance=self._object) - return export_manager.schedule_job() + return export_manager.enqueue_job() @extend_schema(summary='Download a prepared dataset file', parameters=[ @@ -497,7 +497,7 @@ class BackupMixin: def initiate_backup_export(self, request: ExtendedRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions export_manager = BackupExporter(request=request, db_instance=db_object) - return export_manager.schedule_job() + return export_manager.enqueue_job() @extend_schema(summary='Download a prepared backup file', diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 8da674bc7dae..1029e3b4c69e 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -413,11 +413,11 @@ def get_upload_dir(self): def upload_finished(self, request: ExtendedRequest): if self.action == 'dataset': importer = DatasetImporter(request=request, db_instance=self._object) - return importer.schedule_job() + return importer.enqueue_job() elif self.action == 'import_backup': importer = BackupImporter(request=request, resource=RequestTarget.PROJECT) - return importer.schedule_job() + return importer.enqueue_job() return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) @@ -943,7 +943,7 @@ def upload_finished(self, request: ExtendedRequest): @transaction.atomic def _handle_upload_annotations(request: ExtendedRequest): importer = DatasetImporter(request=request, db_instance=self._object) - return importer.schedule_job() + return importer.enqueue_job() def _handle_upload_data(request: ExtendedRequest): with transaction.atomic(): @@ -1007,12 +1007,12 @@ def _handle_upload_data(request: ExtendedRequest): # Need to process task data when the transaction is committed creator = TaskCreator(request=request, db_instance=self._object, db_data=data) - return creator.schedule_job() + return creator.enqueue_job() @transaction.atomic def _handle_upload_backup(request: ExtendedRequest): importer = BackupImporter(request=request, resource=RequestTarget.TASK) - return importer.schedule_job() + return importer.enqueue_job() if self.action == 'annotations': return _handle_upload_annotations(request) @@ -1605,7 +1605,7 @@ def get_upload_dir(self): def upload_finished(self, request: ExtendedRequest): if self.action == 'annotations': importer = DatasetImporter(request=request, db_instance=self._object) - return importer.schedule_job() + return importer.enqueue_job() return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index 164331932388..de2352dce0ed 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -186,7 +186,7 @@ def list(self, request: ExtendedRequest): def initiate_export(self, request: ExtendedRequest): self.check_permissions(request) exporter = EventsExporter(request=request) - return exporter.schedule_job() + return exporter.enqueue_job() @extend_schema( summary="Download a prepared file with events", diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 8fe253c82cd9..cb1914199de8 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -284,7 +284,7 @@ def create(self, request: ExtendedRequest, *args, **kwargs): raise NotFound(f"Task {task_id} does not exist") from ex manager = qc.QualityReportRQJobManager(request=request, db_instance=task) - return manager.schedule_job() + return manager.enqueue_job() else: deprecation_date = datetime(2025, 3, 17, tzinfo=timezone.utc) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index c80ef1ba95a1..e9b21f450ed3 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -100,12 +100,11 @@ def build_request_id(self): ... def validate_request_id(self, request_id: str, /) -> None: ... - def get_job_by_id(self, id_: str, /, *, validate: bool = True) -> RQJob | None: - if validate: - try: - self.validate_request_id(id_) - except Exception: - return None + def get_job_by_id(self, id_: str, /) -> RQJob | None: + try: + self.validate_request_id(id_) + except Exception: + return None queue = self.get_queue() return queue.fetch_job(id_) @@ -174,7 +173,7 @@ def get_response(self, request_id: str) -> Response: serializer = RequestIdSerializer({"rq_id": request_id}) return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - def schedule_job(self) -> Response: + def enqueue_job(self) -> Response: self.init_request_args() self.validate_request() self.init_callback_with_params() From 5a4d6add39c8cac55b9d57d1c49b4be1fb005d96 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 17:10:32 +0200 Subject: [PATCH 059/103] Fix analytics tests --- tests/python/rest_api/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/rest_api/test_analytics.py b/tests/python/rest_api/test_analytics.py index 97fd3ff7abb7..082ee194eb66 100644 --- a/tests/python/rest_api/test_analytics.py +++ b/tests/python/rest_api/test_analytics.py @@ -185,7 +185,7 @@ def _export_events( assert api_version == 2 - request_id, response = api_client.events_api.create_file_export( + request_id, response = api_client.events_api.create_export( **kwargs, _check_status=False ) assert response.status == HTTPStatus.ACCEPTED From b8465b0470eb27bf53af354bca8458bbee89b1dd Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 10 Apr 2025 17:13:28 +0200 Subject: [PATCH 060/103] Fix requests permissions --- cvat/apps/redis_handler/permissions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index 8242660395eb..32af1685f8fa 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -58,6 +58,7 @@ def create( JobPermission.create_scope_view(request, parsed_request_id.id) ) continue + assert False, "Unsupported operation on resource" self = cls.create_base_perm(request, view, scope, iam_context, obj) permissions.append(self) @@ -79,7 +80,7 @@ def get_scopes(request: ExtendedRequest, view: ViewSet, obj: RQJob | None) -> li ] def get_resource(self): - if owner := BaseRQMeta.for_job(self.obj).user: + if self.obj and (owner := BaseRQMeta.for_job(self.obj).user): return { "owner": { "id": owner.id, From 74afbfcf1f03e8d7562c33feba6e08897c370e66 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 11 Apr 2025 11:29:16 +0200 Subject: [PATCH 061/103] Rename back: RequestIdSerializer -> RqIdSerializer --- cvat/apps/consensus/views.py | 4 +-- cvat/apps/engine/mixins.py | 6 ++-- cvat/apps/engine/view_utils.py | 2 +- cvat/apps/engine/views.py | 18 +++++------ cvat/apps/events/views.py | 4 +-- cvat/apps/quality_control/views.py | 6 ++-- cvat/apps/redis_handler/background.py | 4 +-- cvat/apps/redis_handler/rq.py | 2 +- cvat/apps/redis_handler/serializers.py | 2 +- cvat/schema.yml | 44 +++++++++++++------------- 10 files changed, 46 insertions(+), 46 deletions(-) diff --git a/cvat/apps/consensus/views.py b/cvat/apps/consensus/views.py index 82a33d65aa10..c1c4c8f00b54 100644 --- a/cvat/apps/consensus/views.py +++ b/cvat/apps/consensus/views.py @@ -25,7 +25,7 @@ from cvat.apps.engine.models import Job, Task from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.view_utils import get_410_response_when_checking_process_status -from cvat.apps.redis_handler.serializers import RequestIdSerializer +from cvat.apps.redis_handler.serializers import RqIdSerializer @extend_schema(tags=["consensus"]) @@ -38,7 +38,7 @@ class ConsensusMergesViewSet(viewsets.GenericViewSet): request=ConsensusMergeCreateSerializer, responses={ "202": OpenApiResponse( - RequestIdSerializer, + RqIdSerializer, description=textwrap.dedent( """\ A consensus merge request has been enqueued, the request id is returned. diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 9a86f398122e..63004bbfaafc 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -30,7 +30,7 @@ from cvat.apps.engine.rq import RequestId from cvat.apps.engine.serializers import DataSerializer from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.serializers import RequestIdSerializer +from cvat.apps.redis_handler.serializers import RqIdSerializer slogger = ServerLogManager(__name__) @@ -439,7 +439,7 @@ class DatasetMixin: ], request=OpenApiTypes.NONE, responses={ - '202': OpenApiResponse(response=RequestIdSerializer, description='Exporting has been started'), + '202': OpenApiResponse(response=RqIdSerializer, description='Exporting has been started'), '405': OpenApiResponse(description='Format is not available'), '409': OpenApiResponse(description='Exporting is already in progress'), }, @@ -488,7 +488,7 @@ class BackupMixin: ], request=OpenApiTypes.NONE, responses={ - '202': OpenApiResponse(response=RequestIdSerializer, description='Creating a backup file has been started'), + '202': OpenApiResponse(response=RqIdSerializer, description='Creating a backup file has been started'), '400': OpenApiResponse(description='Wrong query parameters were passed'), '409': OpenApiResponse(description='The backup process has already been initiated and is not yet finished'), }, diff --git a/cvat/apps/engine/view_utils.py b/cvat/apps/engine/view_utils.py index 861f6f3f1115..2904e8595d06 100644 --- a/cvat/apps/engine/view_utils.py +++ b/cvat/apps/engine/view_utils.py @@ -128,4 +128,4 @@ def __init__(self, deprecation_timestamp = int(deprecation_date.timestamp()) headers["Deprecation"] = f"@{deprecation_timestamp}" - super().__init__(data, status, template_name, headers, exception, content_type) \ No newline at end of file + super().__init__(data, status, template_name, headers, exception, content_type) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 1029e3b4c69e..97c481290335 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -142,7 +142,7 @@ ) from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.iam.permissions import IsAuthenticatedOrReadPublicResource, PolicyEnforcer -from cvat.apps.redis_handler.serializers import RequestIdSerializer +from cvat.apps.redis_handler.serializers import RqIdSerializer from utils.dataset_manifest import ImageManifestManager from . import models @@ -371,7 +371,7 @@ def perform_create(self, serializer, **kwargs): ], request=UploadedZipFileSerializer(required=False), responses={ - '202': OpenApiResponse(RequestIdSerializer, description='Importing has been started'), + '202': OpenApiResponse(RqIdSerializer, description='Importing has been started'), '400': OpenApiResponse(description='Failed to import dataset'), '405': OpenApiResponse(description='Format is not available'), }) @@ -457,7 +457,7 @@ def export_backup(self, request: ExtendedRequest, pk: int): ], request=UploadedZipFileSerializer(required=False), responses={ - '202': OpenApiResponse(RequestIdSerializer, description='Import of a backup file has started'), + '202': OpenApiResponse(RqIdSerializer, description='Import of a backup file has started'), }) @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', serializer_class=None, @@ -805,7 +805,7 @@ def get_queryset(self): ], request=UploadedZipFileSerializer(required=False), responses={ - '202': OpenApiResponse(RequestIdSerializer, description='Import of a backup file has started'), + '202': OpenApiResponse(RqIdSerializer, description='Import of a backup file has started'), }) @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', @@ -1098,10 +1098,10 @@ def _handle_upload_backup(request: ExtendedRequest): '202': OpenApiResponse( response=PolymorphicProxySerializer( component_name='DataResponse', - # FUTURE-FIXME: endpoint should return RequestIdSerializer or OpenApiTypes.NONE - # but SDK generated from a schema with nullable RequestIdSerializer + # FUTURE-FIXME: endpoint should return RqIdSerializer or OpenApiTypes.NONE + # but SDK generated from a schema with nullable RqIdSerializer # throws an error when tried to convert empty response to a specific type - serializers=[RequestIdSerializer, OpenApiTypes.BINARY], + serializers=[RqIdSerializer, OpenApiTypes.BINARY], resource_type_field_name=None ), @@ -1205,7 +1205,7 @@ def append_data_chunk(self, request: ExtendedRequest, pk: int, file_id: str): request=UploadedFileSerializer(required=False), responses={ '201': OpenApiResponse(description='Uploading has finished'), - '202': OpenApiResponse(RequestIdSerializer, description='Uploading has been started'), + '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), '405': OpenApiResponse(description='Format is not available'), }) @extend_schema(methods=['PUT'], summary='Replace task annotations', @@ -1678,7 +1678,7 @@ def upload_finished(self, request: ExtendedRequest): request=UploadedFileSerializer(required=False), responses={ '201': OpenApiResponse(description='Uploading has finished'), - '202': OpenApiResponse(RequestIdSerializer, description='Uploading has been started'), + '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), '405': OpenApiResponse(description='Format is not available'), }) @extend_schema( diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index de2352dce0ed..618779c6792e 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -16,7 +16,7 @@ from cvat.apps.events.export import EventsExporter from cvat.apps.events.serializers import ClientEventsSerializer from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS -from cvat.apps.redis_handler.serializers import RequestIdSerializer +from cvat.apps.redis_handler.serializers import RqIdSerializer from .const import USER_ACTIVITY_SCOPE from .export import export @@ -179,7 +179,7 @@ def list(self, request: ExtendedRequest): ), ], responses={ - "202": OpenApiResponse(RequestIdSerializer), + "202": OpenApiResponse(RqIdSerializer), }, ) @action(detail=False, methods=["POST"], url_path="export") diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index cb1914199de8..0d71940464a2 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -44,7 +44,7 @@ QualityReportSerializer, QualitySettingsSerializer, ) -from cvat.apps.redis_handler.serializers import RequestIdSerializer +from cvat.apps.redis_handler.serializers import RqIdSerializer @extend_schema(tags=["quality"]) @@ -250,7 +250,7 @@ def get_queryset(self): responses={ "201": QualityReportSerializer, "202": OpenApiResponse( - RequestIdSerializer, + RqIdSerializer, description=textwrap.dedent( """\ A quality report request has been enqueued, the request id is returned. @@ -288,7 +288,7 @@ def create(self, request: ExtendedRequest, *args, **kwargs): else: deprecation_date = datetime(2025, 3, 17, tzinfo=timezone.utc) - serializer = RequestIdSerializer(data={"rq_id": rq_id}) + serializer = RqIdSerializer(data={"rq_id": rq_id}) serializer.is_valid(raise_exception=True) rq_id = serializer.validated_data["rq_id"] rq_job = qc.QualityReportRQJobManager(request=request).get_job_by_id(rq_id) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index e9b21f450ed3..4025362b2cd6 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -33,7 +33,7 @@ from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, define_dependent_job from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import get_rq_lock_by_user, get_rq_lock_for_job, sendfile -from cvat.apps.redis_handler.serializers import RequestIdSerializer +from cvat.apps.redis_handler.serializers import RqIdSerializer slogger = ServerLogManager(__name__) @@ -170,7 +170,7 @@ def finalize_request(self) -> None: """Hook to run some actions (e.g. collect events) after processing a request""" def get_response(self, request_id: str) -> Response: - serializer = RequestIdSerializer({"rq_id": request_id}) + serializer = RqIdSerializer({"rq_id": request_id}) return Response(serializer.data, status=status.HTTP_202_ACCEPTED) def enqueue_job(self) -> Response: diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 674d4bdfc671..a8e0ddb0a11e 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -72,7 +72,7 @@ def render(self) -> str: [ self.KEY_VAL_SEP.join([k, v]) for k, v in { - "queue": self.queue, # TODO: probably can be added in RequestIdSerializer? + "queue": self.queue, "action": str(self.action), "target": str(self.target), "id": str(self.id), diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 338a33909046..012eb7d76278 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -27,7 +27,7 @@ slogger = ServerLogManager(__name__) -class RequestIdSerializer(serializers.Serializer): +class RqIdSerializer(serializers.Serializer): rq_id = serializers.CharField(help_text="Request id") diff --git a/cvat/schema.yml b/cvat/schema.yml index 963daedc7dcc..bcd6ec440e5e 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -982,7 +982,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: | A consensus merge request has been enqueued, the request id is returned. The request status can be checked by using common requests API: GET /api/requests/ @@ -1314,7 +1314,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: '' /api/guides: post: @@ -2355,7 +2355,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Uploading has been started '405': description: Format is not available @@ -2620,7 +2620,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Exporting has been started '405': description: Format is not available @@ -3722,7 +3722,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Creating a backup file has been started '400': description: Wrong query parameters were passed @@ -3792,7 +3792,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Importing has been started '400': description: Failed to import dataset @@ -3859,7 +3859,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Exporting has been started '405': description: Format is not available @@ -3961,7 +3961,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Import of a backup file has started /api/quality/conflicts: get: @@ -4205,7 +4205,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: | A quality report request has been enqueued, the request id is returned. The request status can be checked at this endpoint by passing the rq_id @@ -5149,7 +5149,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Uploading has been started '405': description: Format is not available @@ -5291,7 +5291,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Creating a backup file has been started '400': description: Wrong query parameters were passed @@ -5572,7 +5572,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Exporting has been started '405': description: Format is not available @@ -5789,7 +5789,7 @@ paths: content: application/vnd.cvat+json: schema: - $ref: '#/components/schemas/RequestId' + $ref: '#/components/schemas/RqId' description: Import of a backup file has started /api/users: get: @@ -7144,7 +7144,7 @@ components: - image_quality DataResponse: oneOf: - - $ref: '#/components/schemas/RequestId' + - $ref: '#/components/schemas/RqId' - type: string format: binary DatasetFormat: @@ -9921,14 +9921,6 @@ components: required: - target - type - RequestId: - type: object - properties: - rq_id: - type: string - description: Request id - required: - - rq_id RequestStatus: enum: - queued @@ -9959,6 +9951,14 @@ components: * `supervisor` - Supervisor * `maintainer` - Maintainer * `owner` - Owner + RqId: + type: object + properties: + rq_id: + type: string + description: Request id + required: + - rq_id RqStatus: type: object properties: From 440f78a20f70303bd0d68cd6551c10166d7e05f9 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 14 Apr 2025 15:42:28 +0200 Subject: [PATCH 062/103] Refactor a bit --- cvat/apps/consensus/merging_manager.py | 2 - cvat/apps/dataset_manager/cron.py | 1 - cvat/apps/engine/background.py | 160 +++++++++++-------- cvat/apps/engine/location.py | 56 +++---- cvat/apps/engine/models.py | 4 + cvat/apps/engine/rq.py | 52 ++++-- cvat/apps/engine/serializers.py | 29 ++-- cvat/apps/engine/tests/utils.py | 13 +- cvat/apps/engine/views.py | 26 +-- cvat/apps/quality_control/quality_reports.py | 8 +- cvat/apps/redis_handler/background.py | 154 +++++++++--------- cvat/apps/redis_handler/permissions.py | 6 +- cvat/apps/redis_handler/rq.py | 104 +++++++----- cvat/apps/redis_handler/views.py | 23 +-- cvat/schema.yml | 70 +++++--- cvat/settings/base.py | 8 + tests/python/rest_api/test_analytics.py | 4 +- tests/python/rest_api/test_projects.py | 2 +- tests/python/rest_api/utils.py | 4 +- 19 files changed, 404 insertions(+), 322 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index e39bcb3215de..16f9fd898dd0 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -5,7 +5,6 @@ import math from typing import Type -import attrs import datumaro as dm from django.conf import settings from django.db import transaction @@ -157,7 +156,6 @@ class MergingNotAvailable(Exception): pass -@attrs.define(kw_only=True) class MergingManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.CONSENSUS.value SUPPORTED_RESOURCES = {RequestTarget.TASK, RequestTarget.JOB} diff --git a/cvat/apps/dataset_manager/cron.py b/cvat/apps/dataset_manager/cron.py index 6d1f5b9c267f..6ec083bb0447 100644 --- a/cvat/apps/dataset_manager/cron.py +++ b/cvat/apps/dataset_manager/cron.py @@ -11,7 +11,6 @@ from pathlib import Path from typing import ClassVar, Type -from django.conf import settings from django.utils import timezone from cvat.apps.dataset_manager.util import ( diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index bb38db192c18..a32404eec1ec 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -7,12 +7,11 @@ from dataclasses import dataclass from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Any, Callable from uuid import uuid4 -import attrs from attrs.converters import to_bool from django.conf import settings +from django.db.models import Model from rest_framework.exceptions import MethodNotAllowed, ValidationError from rest_framework.reverse import reverse from rq.job import Job as RQJob @@ -29,7 +28,7 @@ import_task, ) from cvat.apps.engine.cloud_provider import import_resource_from_cloud_storage -from cvat.apps.engine.location import StorageType, get_location_configuration +from cvat.apps.engine.location import LocationConfig, StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( Data, @@ -43,8 +42,14 @@ ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export from cvat.apps.engine.rq import ExportRequestId, ImportRequestId -from cvat.apps.engine.serializers import UploadedFileSerializer, UploadedZipFileSerializer +from cvat.apps.engine.serializers import ( + AnnotationFileSerializer, + DatasetFileSerializer, + ProjectFileSerializer, + TaskFileSerializer, +) from cvat.apps.engine.task import create_thread as create_task +from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import ( build_annotations_file_name, build_backup_file_name, @@ -83,7 +88,7 @@ def init_request_args(self) -> None: save_images = is_dataset_export(self.request) format_name = self.request.query_params.get("format", "") - self.export_args = self.ExportArgs( + self.export_args: DatasetExporter.ExportArgs = self.ExportArgs( **self.export_args.to_dict(), format=format_name, save_images=save_images, @@ -118,6 +123,7 @@ def build_request_id(self): ).render() def validate_request_id(self, request_id, /) -> None: + # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) parsed_request_id = ExportRequestId.parse(request_id) if ( @@ -150,7 +156,7 @@ def finalize_request(self): handle_dataset_export( self.db_instance, format_name=self.export_args.format, - cloud_storage_id=self.export_args.location_config.get("storage_id"), + cloud_storage_id=self.export_args.location_config.storage_id, save_images=self.export_args.save_images, ) @@ -251,15 +257,22 @@ def finalize_request(self): pass -@attrs.define(kw_only=True) class ResourceImporter(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.IMPORT_DATA.value - upload_serializer_class: type[UploadedFileSerializer | UploadedZipFileSerializer] = attrs.field( - init=False - ) + @dataclass + class ImportArgs: + location_config: LocationConfig + file_path: str | None + + def to_dict(self): + return dataclass_asdict(self) + + import_args: ImportArgs | None - tmp_dir: Path = attrs.field(init=False) + def __init__(self, *, request: ExtendedRequest, db_instance: Model | None, tmp_dir: Path): + super().__init__(request=request, db_instance=db_instance) + self.tmp_dir = tmp_dir @property def job_result_ttl(self): @@ -269,20 +282,6 @@ def job_result_ttl(self): def job_failed_ttl(self): return int(settings.IMPORT_CACHE_FAILED_TTL.total_seconds()) - @dataclass - class ImportArgs: - location_config: dict[str, Any] - file_path: str | None - - @property - def location(self) -> Location: - return self.location_config["location"] - - def to_dict(self): - return dataclass_asdict(self) - - import_args: ImportArgs | None = attrs.field(init=False) - def init_request_args(self): file_path: str | None = None @@ -298,7 +297,7 @@ def init_request_args(self): if filename := self.request.query_params.get("filename"): file_path = ( str(self.tmp_dir / filename) - if location_config["location"] != Location.CLOUD_STORAGE + if location_config.location != Location.CLOUD_STORAGE else filename ) @@ -310,27 +309,18 @@ def init_request_args(self): def validate_request(self): super().validate_request() - if self.import_args.location not in Location.list(): - raise ValidationError( - f"Unexpected location {self.import_args.location} specified for the request" - ) - - if self.import_args.location == Location.CLOUD_STORAGE: - if not self.import_args.file_path: - raise ValidationError("The filename was not specified") - - if self.import_args.location_config.get("storage_id") is None: - raise ValidationError( - "Cloud storage location was selected as the source," - + " but cloud storage id was not specified" - ) + if ( + self.import_args.location_config.location == Location.CLOUD_STORAGE + and not self.import_args.file_path + ): + raise ValidationError("The filename was not specified") def _handle_cloud_storage_file_upload(self): - storage_id = self.import_args.location_config["storage_id"] + storage_id = self.import_args.location_config.storage_id db_storage = get_cloud_storage_for_import_or_export( storage_id=storage_id, request=self.request, - is_default=self.import_args.location_config["is_default"], + is_default=self.import_args.location_config.is_default, ) key = self.import_args.file_path @@ -338,10 +328,11 @@ def _handle_cloud_storage_file_upload(self): self.import_args.file_path = tf.name return db_storage, key + @abstractmethod + def _get_payload_file(self): ... + def _handle_non_tus_file_upload(self): - file_serializer = self.upload_serializer_class(data=self.request.data) - file_serializer.is_valid(raise_exception=True) - payload_file = file_serializer.validated_data["file"] + payload_file = self._get_payload_file() with NamedTemporaryFile(prefix="cvat_", dir=TmpDirManager.TMP_ROOT, delete=False) as tf: self.import_args.file_path = tf.name @@ -353,7 +344,7 @@ def _init_callback_with_params(self): ... def init_callback_with_params(self): # Note: self.import_args is changed here - if self.import_args.location == Location.CLOUD_STORAGE: + if self.import_args.location_config.location == Location.CLOUD_STORAGE: db_storage, key = self._handle_cloud_storage_file_upload() elif not self.import_args.file_path: self._handle_non_tus_file_upload() @@ -363,8 +354,7 @@ def init_callback_with_params(self): # redefine here callback and callback args in order to: # - (optional) download file from cloud storage # - remove uploaded file at the end - - if self.import_args.location == Location.CLOUD_STORAGE: + if self.import_args.location_config.location == Location.CLOUD_STORAGE: self.callback_args = ( *self.callback_args[0], db_storage, @@ -374,12 +364,10 @@ def init_callback_with_params(self): ) self.callback = import_resource_from_cloud_storage - # re-define callback to clean up uploaded file self.callback_args = (self.callback, *self.callback_args) self.callback = import_resource_with_clean_up_after -@attrs.define(kw_only=True) class DatasetImporter(ResourceImporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} @@ -388,14 +376,15 @@ class ImportArgs(ResourceImporter.ImportArgs): format: str conv_mask_to_poly: bool - def __attrs_post_init__(self) -> None: - super().__attrs_post_init__() - self.upload_serializer_class = ( - UploadedZipFileSerializer - if isinstance(self.db_instance, Project) - else UploadedFileSerializer + def __init__( + self, + *, + request: ExtendedRequest, + db_instance: Project | Task | Job, + ): + super().__init__( + request=request, db_instance=db_instance, tmp_dir=Path(db_instance.get_tmp_dirname()) ) - self.tmp_dir = Path(self.db_instance.get_tmp_dirname()) def init_request_args(self) -> None: super().init_request_args() @@ -408,6 +397,19 @@ def init_request_args(self) -> None: conv_mask_to_poly=conv_mask_to_poly, ) + def _get_payload_file(self): + # Common serializer is not used to not break API + if isinstance(self.db_instance, Project): + serializer_class = DatasetFileSerializer + file_field = "dataset_file" + else: + serializer_class = AnnotationFileSerializer + file_field = "annotation_file" + + file_serializer = serializer_class(data=self.request.data) + file_serializer.is_valid(raise_exception=True) + return file_serializer.validated_data[file_field] + def _init_callback_with_params(self): if isinstance(self.db_instance, Project): self.callback = dm.project.import_dataset_as_project @@ -454,26 +456,26 @@ def finalize_request(self): handle_dataset_import( self.db_instance, format_name=self.import_args.format, - cloud_storage_id=self.import_args.location_config.get("storage_id"), + cloud_storage_id=self.import_args.location_config.storage_id, ) -@attrs.define(kw_only=True) class BackupImporter(ResourceImporter): SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} - resource: RequestTarget = attrs.field(validator=attrs.validators.in_(SUPPORTED_RESOURCES)) - upload_serializer_class: type[UploadedZipFileSerializer] = attrs.field( - init=False, default=UploadedZipFileSerializer - ) - @dataclass class ImportArgs(ResourceImporter.ImportArgs): org_id: int | None - def __attrs_post_init__(self) -> None: - super().__attrs_post_init__() - self.tmp_dir = Path(TmpDirManager.TMP_ROOT) + def __init__( + self, + *, + request: ExtendedRequest, + resource: RequestTarget, + ): + super().__init__(request=request, db_instance=None, tmp_dir=Path(TmpDirManager.TMP_ROOT)) + assert resource in self.SUPPORTED_RESOURCES, f"Unsupported resource: {resource}" + self.resource = resource def init_request_args(self) -> None: super().init_request_args() @@ -494,6 +496,19 @@ def build_request_id(self): }, ).render() + def _get_payload_file(self): + # Common serializer is not used to not break API + if isinstance(self.db_instance, Project): + serializer_class = ProjectFileSerializer + file_field = "project_file" + else: + serializer_class = TaskFileSerializer + file_field = "task_file" + + file_serializer = serializer_class(data=self.request.data) + file_serializer.is_valid(raise_exception=True) + return file_serializer.validated_data[file_field] + def _init_callback_with_params(self): self.callback = import_project if self.resource == RequestTarget.PROJECT else import_task self.callback_args = (self.import_args.file_path, self.user_id, self.import_args.org_id) @@ -503,12 +518,19 @@ def finalize_request(self): pass -@attrs.define(kw_only=True) class TaskCreator(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.IMPORT_DATA.value SUPPORTED_RESOURCES = {RequestTarget.TASK} - db_data: Data = attrs.field() + def __init__( + self, + *, + request: ExtendedRequest, + db_instance: Task, + db_data: Data, + ): + super().__init__(request=request, db_instance=db_instance) + self.db_data = db_data @property def job_failure_ttl(self): diff --git a/cvat/apps/engine/location.py b/cvat/apps/engine/location.py index 35d903369186..5edf0665a4d8 100644 --- a/cvat/apps/engine/location.py +++ b/cvat/apps/engine/location.py @@ -5,7 +5,9 @@ from enum import Enum from typing import Any, Optional, Union -from cvat.apps.engine.models import Job, Location, Project, Task +import attrs + +from cvat.apps.engine.models import Job, Location, Project, Storage, Task class StorageType(str, Enum): @@ -16,47 +18,47 @@ def __str__(self): return self.value +@attrs.frozen(kw_only=True) +class LocationConfig: + is_default: bool = attrs.field(validator=attrs.validators.instance_of(bool), default=True) + location: Location = attrs.field(converter=Location) + storage_id: int | None = attrs.field( + converter=lambda x: x if x is None else int(x), default=None + ) + + def __attrs_post_init__(self): + if self.location == Location.CLOUD_STORAGE and not self.storage_id: + raise ValueError( + "Trying to use undefined cloud storage (cloud_storage_id was not provided)" + ) + + def get_location_configuration( query_params: dict[str, Any], field_name: str, *, db_instance: Optional[Union[Project, Task, Job]] = None, -) -> dict[str, Any]: +) -> LocationConfig: location = query_params.get("location") - # handle resource import + # handle backup imports if not location and not db_instance: location = Location.LOCAL use_default_settings = location is None - location_conf = {"is_default": use_default_settings} - if use_default_settings: - storage = ( + storage: Storage = ( getattr(db_instance, field_name) if not isinstance(db_instance, Job) else getattr(db_instance.segment.task, field_name) ) - if storage is None: - location_conf["location"] = Location.LOCAL - else: - location_conf["location"] = storage.location - if cloud_storage_id := storage.cloud_storage_id: - location_conf["storage_id"] = cloud_storage_id - else: - if location not in Location.list(): - raise ValueError(f"The specified location {location} is not supported") - - cloud_storage_id = query_params.get("cloud_storage_id") - - if location == Location.CLOUD_STORAGE and not cloud_storage_id: - raise ValueError( - "Cloud storage was selected as location but cloud_storage_id was not specified" - ) - - location_conf["location"] = location - if cloud_storage_id: - location_conf["storage_id"] = int(cloud_storage_id) + return ( + LocationConfig(location=Location.LOCAL) + if storage is None + else LocationConfig(location=storage.location, storage_id=storage.cloud_storage_id) + ) - return location_conf + return LocationConfig( + is_default=False, location=location, storage_id=query_params.get("cloud_storage_id") + ) diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index c915c0a47de2..7a561778ee55 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -1260,6 +1260,10 @@ def __str__(self): def list(cls): return [i.value for i in cls] + @classmethod + def _missing_(cls, value): + raise ValueError(f"The specified location {value} is not supported") + class CloudStorage(TimestampedModel): # restrictions: # AWS bucket name, Azure container name - 63, Google bucket name - 63 without dots and 222 with dots diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 5b3df4bf239b..991120e3bf68 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -5,8 +5,9 @@ from __future__ import annotations from abc import ABCMeta, abstractmethod +from contextlib import suppress from functools import cached_property -from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Protocol from django.conf import settings from django.db.models import Model @@ -329,30 +330,53 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: return False -class ExportRequestId(RequestId): - # optional because export queue works also with events - @cached_property - def subresource(self) -> RequestSubresource | None: - if subresource := self.extra.get("subresource"): - return RequestSubresource(subresource) - return None +class RequestIdWithFormatMixin: + extra: dict[str, Any] @cached_property def format(self) -> str | None: return self.extra.get("format") -class ImportRequestId(RequestId): +class RequestIdWithSubresourceMixin: + TYPE_SEP: ClassVar[str] + + action: str + target: str + extra: dict[str, Any] + + @cached_property + def subresource(self) -> RequestSubresource: + return RequestSubresource(self.extra["subresource"]) + + @cached_property + def type(self) -> str: + return self.TYPE_SEP.join([self.action, self.subresource or self.target]) + + +class RequestIdWithOptionalSubresourceMixin(RequestIdWithSubresourceMixin): @cached_property def subresource(self) -> RequestSubresource | None: - if subresource := self.extra.get("subresource"): - return RequestSubresource(subresource) + with suppress(KeyError): + return super().subresource return None - @cached_property - def format(self) -> str | None: - return self.extra.get("format") + +class ExportRequestId( + RequestIdWithOptionalSubresourceMixin, # subresource is optional because export queue works also with events + RequestIdWithFormatMixin, + RequestId, +): + pass + + +class ImportRequestId( + RequestIdWithOptionalSubresourceMixin, # subresource is optional because import queue works also with backups/task creation jobs + RequestIdWithFormatMixin, + RequestId, +): + pass def define_dependent_job( diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 172421b3fdc9..b66dff9edae1 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -2934,27 +2934,24 @@ class FileInfoSerializer(serializers.Serializer): type = serializers.ChoiceField(choices=["REG", "DIR"]) mime_type = serializers.CharField(max_length=255) +class AnnotationFileSerializer(serializers.Serializer): + annotation_file = serializers.FileField() -class UploadedFileSerializer(serializers.Serializer): - file = serializers.FileField() +class DatasetFileSerializer(serializers.Serializer): + dataset_file = serializers.FileField() - def __init__(self, *args, only_zip: bool = False, **kwargs): - super().__init__(*args, **kwargs) - self._only_zip = only_zip - - # probably there is no need in such validation - def validate_file(self, value): - if self._only_zip and os.path.splitext(value.name)[1] != '.zip': - raise serializers.ValidationError('A file should be a zip archive') + @staticmethod + def validate_dataset_file(value): + if os.path.splitext(value.name)[1] != '.zip': + raise serializers.ValidationError('Dataset file should be zip archive') return value +class TaskFileSerializer(serializers.Serializer): + task_file = serializers.FileField() + +class ProjectFileSerializer(serializers.Serializer): + project_file = serializers.FileField() -@extend_schema_serializer( - component_name="UploadedFile", -) -class UploadedZipFileSerializer(UploadedFileSerializer): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, only_zip=True) class CommentReadSerializer(serializers.ModelSerializer): owner = BasicUserSerializer(allow_null=True, required=False) diff --git a/cvat/apps/engine/tests/utils.py b/cvat/apps/engine/tests/utils.py index f7b2bd7e4891..3333712f1f8f 100644 --- a/cvat/apps/engine/tests/utils.py +++ b/cvat/apps/engine/tests/utils.py @@ -196,6 +196,7 @@ def _import( api_path: str, file_content: BytesIO, *, + through_field: str, query_params: dict[str, Any] | None = None, expected_4xx_status_code: int | None = None, ): @@ -205,7 +206,7 @@ def _import( response = self._post_request( api_path, user, - data={"file": file_content}, + data={through_field: file_content}, format="multipart", ) self.assertEqual(response.status_code, expected_4xx_status_code or status.HTTP_202_ACCEPTED) @@ -222,7 +223,7 @@ def _import_project_dataset( expected_4xx_status_code: int | None = None ): return self._import( - user, f"/api/projects/{projetc_id}/dataset", file_content, + user, f"/api/projects/{projetc_id}/dataset", file_content, through_field="dataset_file", query_params=query_params, expected_4xx_status_code=expected_4xx_status_code ) @@ -231,7 +232,7 @@ def _import_task_annotations( expected_4xx_status_code: int | None = None ): return self._import( - user, f"/api/tasks/{task_id}/annotations", file_content, + user, f"/api/tasks/{task_id}/annotations", file_content, through_field="annotation_file", query_params=query_params, expected_4xx_status_code=expected_4xx_status_code ) @@ -240,7 +241,7 @@ def _import_job_annotations( expected_4xx_status_code: int | None = None ): return self._import( - user, f"/api/jobs/{job_id}/annotations", file_content, + user, f"/api/jobs/{job_id}/annotations", file_content, through_field="annotation_file", query_params=query_params, expected_4xx_status_code=expected_4xx_status_code ) @@ -249,7 +250,7 @@ def _import_project_backup( expected_4xx_status_code: int | None = None ) -> int | None: response = self._import( - user, "/api/projects/backup", file_content, + user, "/api/projects/backup", file_content, through_field="project_file", query_params=query_params, expected_4xx_status_code=expected_4xx_status_code ) if expected_4xx_status_code: @@ -262,7 +263,7 @@ def _import_task_backup( expected_4xx_status_code: int | None = None ) -> int | None: response = self._import( - user, "/api/tasks/backup", file_content, + user, "/api/tasks/backup", file_content, through_field="task_file", query_params=query_params, expected_4xx_status_code=expected_4xx_status_code ) if expected_4xx_status_code: diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 97c481290335..f02337a327ae 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -98,6 +98,7 @@ from cvat.apps.engine.rq import ImportRQMeta, RequestId, RQMetaWithFailureInfo from cvat.apps.engine.serializers import ( AboutSerializer, + AnnotationFileSerializer, AnnotationGuideReadSerializer, AnnotationGuideWriteSerializer, AssetReadSerializer, @@ -111,6 +112,7 @@ DataMetaReadSerializer, DataMetaWriteSerializer, DataSerializer, + DatasetFileSerializer, FileInfoSerializer, IssueReadSerializer, IssueWriteSerializer, @@ -122,15 +124,15 @@ LabeledDataSerializer, LabelSerializer, PluginsSerializer, + ProjectFileSerializer, ProjectReadSerializer, ProjectWriteSerializer, RqStatusSerializer, + TaskFileSerializer, TaskReadSerializer, TaskValidationLayoutReadSerializer, TaskValidationLayoutWriteSerializer, TaskWriteSerializer, - UploadedFileSerializer, - UploadedZipFileSerializer, UserSerializer, ) from cvat.apps.engine.types import ExtendedRequest @@ -369,7 +371,7 @@ def perform_create(self, serializer, **kwargs): OpenApiParameter('filename', description='Dataset file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=UploadedZipFileSerializer(required=False), + request=DatasetFileSerializer(required=False), responses={ '202': OpenApiResponse(RqIdSerializer, description='Importing has been started'), '400': OpenApiResponse(description='Failed to import dataset'), @@ -437,10 +439,10 @@ def export_backup(self, request: ExtendedRequest, pk: int): The backup import process is as follows: The first request POST /api/projects/backup schedules a background job on the server - in which the process of a project creating from an uploaded backup is carried out. + in which the process of creating a project from the uploaded backup is carried out. To check the status of the import process, use GET /api/requests/rq_id, - where rq_id is request ID obtained from the response of the previous request. + where rq_id is the request ID obtained from the response to the previous request. Once the import completes successfully, the response will contain the ID of the newly created project in the result_id field. @@ -455,9 +457,9 @@ def export_backup(self, request: ExtendedRequest, pk: int): OpenApiParameter('filename', description='Backup file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=UploadedZipFileSerializer(required=False), + request=ProjectFileSerializer(required=False), responses={ - '202': OpenApiResponse(RqIdSerializer, description='Import of a backup file has started'), + '202': OpenApiResponse(RqIdSerializer, description='Import of the backup file has started'), }) @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', serializer_class=None, @@ -788,7 +790,7 @@ def get_queryset(self): in which the process of a task creating from an uploaded backup is carried out. To check the status of the import process, use GET /api/requests/rq_id, - where rq_id is request ID obtained from the response of the previous request. + where rq_id is the request ID obtained from the response to the previous request. Once the import completes successfully, the response will contain the ID of the newly created task in the result_id field. @@ -803,9 +805,9 @@ def get_queryset(self): OpenApiParameter('filename', description='Backup file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=UploadedZipFileSerializer(required=False), + request=TaskFileSerializer(required=False), responses={ - '202': OpenApiResponse(RqIdSerializer, description='Import of a backup file has started'), + '202': OpenApiResponse(RqIdSerializer, description='Import of the backup file has started'), }) @action(detail=False, methods=['OPTIONS', 'POST'], url_path=r'backup/?$', @@ -1202,7 +1204,7 @@ def append_data_chunk(self, request: ExtendedRequest, pk: int, file_id: str): OpenApiParameter('filename', description='Annotation file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=UploadedFileSerializer(required=False), + request=AnnotationFileSerializer(required=False), responses={ '201': OpenApiResponse(description='Uploading has finished'), '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), @@ -1675,7 +1677,7 @@ def upload_finished(self, request: ExtendedRequest): OpenApiParameter('filename', description='Annotation file name', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - request=UploadedFileSerializer(required=False), + request=AnnotationFileSerializer(required=False), responses={ '201': OpenApiResponse(description='Uploading has finished'), '202': OpenApiResponse(RqIdSerializer, description='Uploading has been started'), diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 0a04c45eb6e4..5beff432032f 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -50,6 +50,7 @@ User, ValidationMode, ) +from cvat.apps.engine.rq import RequestIdWithSubresourceMixin from cvat.apps.profiler import silk_profile from cvat.apps.quality_control import models from cvat.apps.quality_control.models import ( @@ -2261,13 +2262,10 @@ def generate_report(self) -> ComparisonReport: ) -class QualityRequestId(RequestId): - @property - def subresource(self): - return self.extra["subresource"] +class QualityRequestId(RequestIdWithSubresourceMixin, RequestId): + pass -@define(kw_only=True) class QualityReportRQJobManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.QUALITY_REPORTS.value SUPPORTED_RESOURCES: ClassVar[set[RequestTarget]] = {RequestTarget.TASK} diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 4025362b2cd6..7b258d7081a2 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -10,7 +10,6 @@ from typing import Any, Callable, ClassVar from urllib.parse import quote -import attrs import django_rq from django.conf import settings from django.db.models import Model @@ -18,7 +17,6 @@ from django.utils import timezone from django_rq.queues import DjangoRQ, DjangoScheduler from rest_framework import status -from rest_framework.exceptions import MethodNotAllowed from rest_framework.response import Response from rest_framework.serializers import ValidationError from rq.job import Job as RQJob @@ -26,7 +24,12 @@ from cvat.apps.dataset_manager.util import get_export_cache_lock from cvat.apps.engine.cloud_provider import export_resource_to_cloud_storage -from cvat.apps.engine.location import Location, StorageType, get_location_configuration +from cvat.apps.engine.location import ( + Location, + LocationConfig, + StorageType, + get_location_configuration, +) from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import RequestTarget from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export @@ -43,37 +46,31 @@ LOCK_ACQUIRE_TIMEOUT = LOCK_TTL - 5 -@attrs.define(kw_only=True) class AbstractRequestManager(metaclass=ABCMeta): - SUPPORTED_RESOURCES: ClassVar[set[RequestTarget]] + SUPPORTED_RESOURCES: ClassVar[set[RequestTarget] | None] = None QUEUE_NAME: ClassVar[str] REQUEST_ID_KEY = "rq_id" - # todo: frozen - request: ExtendedRequest = attrs.field() - user_id: int = attrs.field(init=False) - - callback: Callable = attrs.field(init=False, validator=attrs.validators.instance_of(Callable)) - callback_args: tuple | None = attrs.field(init=False, default=None) - callback_kwargs: dict[str, Any] | None = attrs.field(init=False, default=None) - - db_instance: Model | None = attrs.field(default=None) - resource: RequestTarget | None = attrs.field( - init=False, - default=None, - on_setattr=attrs.setters.validate, - ) - - @resource.validator - def validate_resource(self, attribute: attrs.Attribute, value: Any): - if value and value not in self.SUPPORTED_RESOURCES: - raise ValidationError(f"Unsupported resource: {self.resource}") - - def __attrs_post_init__(self): - self.user_id = self.request.user.id - - if self.db_instance is not None: - self.resource = RequestTarget(self.db_instance.__class__.__name__.lower()) + callback: Callable + callback_args: tuple | None + callback_kwargs: dict[str, Any] | None + + def __init__( + self, + *, + request: ExtendedRequest, + db_instance: Model | None = None, + ) -> None: + self.request = request + self.user_id = request.user.id + self.db_instance = db_instance + + if db_instance: + assert self.SUPPORTED_RESOURCES, "Should be defined" + self.resource = RequestTarget(db_instance.__class__.__name__.lower()) + assert ( + self.resource in self.SUPPORTED_RESOURCES + ), f"Unsupported resource: {self.resource}" @classmethod def get_queue(cls) -> DjangoRQ: @@ -115,7 +112,18 @@ def init_request_args(self): """ @abstractmethod - def init_callback_with_params(self) -> None: ... + def init_callback_with_params(self) -> None: + """ + Method should initialize callback function with its args/kwargs: + + self.callback = ... + (optional) self.callback_args = ... + (optional) self.callback_kwargs = ... + """ + + def _set_default_callback_params(self): + self.callback_args = None + self.callback_kwargs = None def validate_request(self) -> Response | None: """Hook to run some validations before processing a request""" @@ -176,6 +184,7 @@ def get_response(self, request_id: str) -> Response: def enqueue_job(self) -> Response: self.init_request_args() self.validate_request() + self._set_default_callback_params() self.init_callback_with_params() queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) @@ -194,7 +203,6 @@ def enqueue_job(self) -> Response: return self.get_response(request_id) -@attrs.define(kw_only=True) class AbstractExporter(AbstractRequestManager): class Downloader: @@ -262,21 +270,18 @@ def download_file(self) -> Response: attachment_filename=job_meta.result_filename, ) - def get_downloader(self): - request_id = self.request.query_params.get(self.REQUEST_ID_KEY) - - if not request_id: - raise ValidationError("Missing request id in the query parameters") - - try: - self.validate_request_id(request_id) - except ValueError: - raise ValidationError("Invalid export request id") + @dataclass + class ExportArgs: + filename: str | None + location_config: LocationConfig - return self.Downloader(request=self.request, queue=self.get_queue(), request_id=request_id) + def to_dict(self): + return dataclass_asdict(self) QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value + export_args: ExportArgs | None + @property def job_result_ttl(self): from cvat.apps.dataset_manager.views import get_export_cache_ttl @@ -287,20 +292,6 @@ def job_result_ttl(self): def job_failed_ttl(self): return self.job_result_ttl - @dataclass - class ExportArgs: - filename: str | None - location_config: dict[str, Any] - - @property - def location(self) -> Location: - return self.location_config["location"] - - def to_dict(self): - return dataclass_asdict(self) - - export_args: ExportArgs | None = attrs.field(init=False) - @abstractmethod def get_result_filename(self) -> str: ... @@ -331,17 +322,24 @@ def init_request_args(self) -> None: ) @abstractmethod - def _init_callback_with_params(self): ... + def _init_callback_with_params(self): + """ + Private method that should initialize callback function with its args/kwargs + like the init_callback_with_params method in the parent class. + """ def init_callback_with_params(self): + """ + Method should not be overridden + """ self._init_callback_with_params() - if self.export_args.location == Location.CLOUD_STORAGE: - storage_id = self.export_args.location_config["storage_id"] + if self.export_args.location_config.location == Location.CLOUD_STORAGE: + storage_id = self.export_args.location_config.storage_id db_storage = get_cloud_storage_for_import_or_export( storage_id=storage_id, request=self.request, - is_default=self.export_args.location_config["is_default"], + is_default=self.export_args.location_config.is_default, ) self.callback_args = (db_storage, self.callback) + self.callback_args @@ -350,20 +348,11 @@ def init_callback_with_params(self): def validate_request(self): super().validate_request() - if self.export_args.location not in Location.list(): - raise ValidationError( - f"Unexpected location {self.export_args.location} specified for the request" - ) - - if self.export_args.location == Location.CLOUD_STORAGE: - if not self.export_args.filename: - raise ValidationError("The filename was not specified") - - if self.export_args.location_config.get("storage_id") is None: - raise ValidationError( - "Cloud storage location was selected as the source," - + " but cloud storage id was not specified" - ) + if ( + self.export_args.location_config.location == Location.CLOUD_STORAGE + and not self.export_args.filename + ): + raise ValidationError("The filename was not specified") def build_meta(self, *, request_id): return ExportRQMeta.build_for( @@ -371,8 +360,21 @@ def build_meta(self, *, request_id): db_obj=self.db_instance, result_url=( self.make_result_url(request_id=request_id) - if self.export_args.location != Location.CLOUD_STORAGE + if self.export_args.location_config.location != Location.CLOUD_STORAGE else None ), result_filename=self.get_result_filename(), ) + + def get_downloader(self): + request_id = self.request.query_params.get(self.REQUEST_ID_KEY) + + if not request_id: + raise ValidationError("Missing request id in the query parameters") + + try: + self.validate_request_id(request_id) + except ValueError: + raise ValidationError("Invalid export request id") + + return self.Downloader(request=self.request, queue=self.get_queue(), request_id=request_id) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index 32af1685f8fa..d92ff4920b4b 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -42,7 +42,11 @@ def create( # In case when background job is unique for a user, status check should be available only for this user/admin # In other cases, status check should be available for all users that have target resource VIEW permission - if not parsed_request_id.user_id and isinstance(parsed_request_id.id, int): + if parsed_request_id.user_id: + job_owner = BaseRQMeta.for_job(obj).user + assert job_owner and job_owner.id == parsed_request_id.user_id + + elif isinstance(parsed_request_id.id, int): if parsed_request_id.target == RequestTarget.PROJECT.value: permissions.append( ProjectPermission.create_scope_view(request, parsed_request_id.id) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index a8e0ddb0a11e..1e6d40f13da9 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -1,10 +1,13 @@ from __future__ import annotations -import base64 +import urllib.parse +from functools import cached_property from typing import Any, ClassVar, Protocol from uuid import UUID import attrs +from django.conf import settings +from django.utils.module_loading import import_string from rq.job import Job as RQJob @@ -22,7 +25,9 @@ def convert_id(value: int | str | UUID) -> int | UUID: def convert_extra(value: dict) -> dict[str, Any]: assert isinstance(value, dict), f"Unexpected type: {type(value)}" + for k, v in value.items(): + assert v if not isinstance(v, str): value[k] = str(v) @@ -37,7 +42,14 @@ class IncorrectRequestIdError(ValueError): class RequestId: FIELD_SEP: ClassVar[str] = "&" KEY_VAL_SEP: ClassVar[str] = "=" - TYPE_SEP: ClassVar[str] = ":" + + SPECIAL_CHARS = {FIELD_SEP, KEY_VAL_SEP, "/", "."} + ENCODE_MAPPING = { + ".": "@", + } + DECODE_MAPPING = {v: k for k, v in ENCODE_MAPPING.items()} + + TYPE_SEP: ClassVar[str] = ":" # used in serialization logic queue: str = attrs.field(validator=attrs.validators.instance_of(str)) action: str = attrs.field(validator=attrs.validators.instance_of(str)) @@ -48,63 +60,67 @@ class RequestId: ) extra: dict[str, Any] = attrs.field(converter=convert_extra, factory=dict) - # todo: prohibit by default to set this field user_id: int | None = attrs.field(converter=lambda x: x if x is None else int(x), default=None) - @property + @cached_property def type(self) -> str: - subresource = getattr(self, "subresource", None) - return self.TYPE_SEP.join([self.action, subresource or self.target]) - - def convert_to(self, child_class: type[RequestId], /): - # method is going to be used by child classes - return child_class( - queue=self.queue, - action=self.action, - target=self.target, - id=self.id, - user_id=self.user_id, - extra=self.extra, - ) + return self.TYPE_SEP.join([self.action, self.target]) + + def to_dict(self) -> dict[str, Any]: + repr_ = attrs.asdict(self, filter=lambda _, v: bool(v)) + if extra := repr_.pop("extra", None): + repr_.update(extra) + + return repr_ + + @classmethod + def normalize(cls, repr_: dict[str, Any]) -> None: + for key, value in repr_.items(): + str_value = str(value) + + for spec_char in cls.SPECIAL_CHARS: + if spec_char in str_value: + if spec_char in cls.ENCODE_MAPPING: + str_value = str_value.replace(spec_char, cls.ENCODE_MAPPING[spec_char]) + continue + + raise IncorrectRequestIdError( + f"{key} contains special characters: {spec_char!r}" + ) + repr_[key] = str_value def render(self) -> str: - data = self.FIELD_SEP.join( - [ - self.KEY_VAL_SEP.join([k, v]) - for k, v in { - "queue": self.queue, - "action": str(self.action), - "target": str(self.target), - "id": str(self.id), - **( - { - "user_id": str(self.user_id), - } - if self.user_id is not None - else {} - ), - **self.extra, - }.items() - ] - ).encode() - - return base64.b64encode(data).decode() + rq_id_repr = self.to_dict() + + # rq_id is going to be used in urls as path parameter, so it should be URL safe. + self.normalize(rq_id_repr) + # urllib.parse.quote/urllib.parse.urlencode are not used here because: + # - it's client logic to encode request ID + # - return value is used as RQ job ID and should be + # a. in a decoded state + # b. readable + return self.FIELD_SEP.join([f"{k}{self.KEY_VAL_SEP}{v}" for k, v in rq_id_repr.items()]) @classmethod def parse(cls, request_id: str, /): try: - decoded_rq_id = base64.b64decode(request_id).decode() - - keys = set(attrs.fields_dict(cls).keys()) - {"extra"} + common_keys = set(attrs.fields_dict(cls).keys()) - {"extra"} params = {} - for pair in decoded_rq_id.split(RequestId.FIELD_SEP): - key, value = pair.split(RequestId.KEY_VAL_SEP, maxsplit=1) - if key in keys: + for key, value in dict(urllib.parse.parse_qsl(request_id)).items(): + for from_char, to_char in cls.DECODE_MAPPING.items(): + if from_char in value: + value = value.replace(from_char, to_char) + + if key in common_keys: params[key] = value else: params.setdefault("extra", {})[key] = value + if custom_cls_path := settings.RQ_QUEUES[params["queue"]].get("PARSED_JOB_ID_CLASS"): + custom_cls = import_string(custom_cls_path) + return custom_cls(**params) + return cls(**params) except Exception as ex: raise IncorrectRequestIdError from ex diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 79fb97090875..3aae4c6d43ba 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -1,12 +1,12 @@ import functools from collections import namedtuple from collections.abc import Iterable +from typing import cast import django_rq from django.conf import settings from django.http import HttpResponseBadRequest, HttpResponseNotFound from django.utils.decorators import method_decorator -from django.utils.module_loading import import_string from django.views.decorators.cache import never_cache from django_rq.queues import DjangoRQ from drf_spectacular.utils import OpenApiResponse, extend_schema, extend_schema_view @@ -49,17 +49,11 @@ ), ) class RequestViewSet(viewsets.GenericViewSet): - # FUTURE-TODO: support re-enqueue action SUPPORTED_QUEUES = { queue_name for queue_name, queue_conf in settings.RQ_QUEUES.items() if queue_conf.get("VISIBLE_VIA_REQUESTS_API") } - PARSED_JOB_ID_CLASSES = { - queue_name: import_string(settings.RQ_QUEUES[queue_name]["PARSED_JOB_ID_CLASS"]) - for queue_name in SUPPORTED_QUEUES - if "PARSED_JOB_ID_CLASS" in settings.RQ_QUEUES[queue_name] - } serializer_class = RequestSerializer iam_organization_field = None @@ -122,10 +116,6 @@ def get_queryset(self): def queues(self) -> Iterable[DjangoRQ]: return (django_rq.get_queue(queue_name) for queue_name in self.SUPPORTED_QUEUES) - @classmethod - def get_parsed_id_class(cls, queue_name: str) -> type[RequestId]: - return cls.PARSED_JOB_ID_CLASSES.get(queue_name, RequestId) - def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: job_ids = set( queue.get_job_ids() @@ -136,12 +126,11 @@ def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: ) jobs = [] - ParsedIdClass = self.get_parsed_id_class(queue.name) - for job in queue.job_class.fetch_many(job_ids, queue.connection): if job and is_rq_job_owner(job, user_id): + job = cast(CustomRQJob, job) try: - parsed_request_id = ParsedIdClass.parse(job.id) + parsed_request_id = RequestId.parse(job.id) except Exception: # nosec B112 continue @@ -191,12 +180,6 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: job = queue.fetch_job(rq_id) if job: - ParsedIdClass = self.get_parsed_id_class(queue.name) - if ( - type(parsed_request_id) is not ParsedIdClass # fmt: skip # pylint: disable=unidiomatic-typecheck - ): - parsed_request_id = parsed_request_id.convert_to(ParsedIdClass) - job.parsed_id = parsed_request_id return job diff --git a/cvat/schema.yml b/cvat/schema.yml index bcd6ec440e5e..f9ea4fc637a6 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -2338,10 +2338,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/AnnotationFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/AnnotationFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -3777,10 +3777,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/DatasetFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/DatasetFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -3897,10 +3897,10 @@ paths: The backup import process is as follows: The first request POST /api/projects/backup schedules a background job on the server - in which the process of a project creating from an uploaded backup is carried out. + in which the process of creating a project from the uploaded backup is carried out. To check the status of the import process, use GET /api/requests/rq_id, - where rq_id is request ID obtained from the response of the previous request. + where rq_id is the request ID obtained from the response to the previous request. Once the import completes successfully, the response will contain the ID of the newly created project in the result_id field. @@ -3946,10 +3946,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/ProjectFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/ProjectFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -3962,7 +3962,7 @@ paths: application/vnd.cvat+json: schema: $ref: '#/components/schemas/RqId' - description: Import of a backup file has started + description: Import of the backup file has started /api/quality/conflicts: get: operationId: quality_list_conflicts @@ -5132,10 +5132,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/AnnotationFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/AnnotationFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -5728,7 +5728,7 @@ paths: in which the process of a task creating from an uploaded backup is carried out. To check the status of the import process, use GET /api/requests/rq_id, - where rq_id is request ID obtained from the response of the previous request. + where rq_id is the request ID obtained from the response to the previous request. Once the import completes successfully, the response will contain the ID of the newly created task in the result_id field. @@ -5774,10 +5774,10 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/TaskFileRequest' multipart/form-data: schema: - $ref: '#/components/schemas/UploadedFileRequest' + $ref: '#/components/schemas/TaskFileRequest' security: - sessionAuth: [] csrfAuth: [] @@ -5790,7 +5790,7 @@ paths: application/vnd.cvat+json: schema: $ref: '#/components/schemas/RqId' - description: Import of a backup file has started + description: Import of the backup file has started /api/users: get: operationId: users_list @@ -6461,6 +6461,14 @@ components: * `mismatching_attributes` - MISMATCHING_ATTRIBUTES * `mismatching_groups` - MISMATCHING_GROUPS * `covered_annotation` - COVERED_ANNOTATION + AnnotationFileRequest: + type: object + properties: + annotation_file: + type: string + format: binary + required: + - annotation_file AnnotationGuideRead: type: object properties: @@ -7147,6 +7155,14 @@ components: - $ref: '#/components/schemas/RqId' - type: string format: binary + DatasetFileRequest: + type: object + properties: + dataset_file: + type: string + format: binary + required: + - dataset_file DatasetFormat: type: object properties: @@ -9430,6 +9446,14 @@ components: description: |- * `image_size` - IMAGE_SIZE * `group_bbox_size` - GROUP_BBOX_SIZE + ProjectFileRequest: + type: object + properties: + project_file: + type: string + format: binary + required: + - project_file ProjectRead: type: object properties: @@ -10339,6 +10363,14 @@ components: * `accuracy` - ACCURACY * `precision` - PRECISION * `recall` - RECALL + TaskFileRequest: + type: object + properties: + task_file: + type: string + format: binary + required: + - task_file TaskRead: type: object properties: @@ -10650,14 +10682,6 @@ components: required: - frame - type - UploadedFileRequest: - type: object - properties: - file: - type: string - format: binary - required: - - file User: type: object properties: diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 4451939daca2..836c8816f6e7 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -302,43 +302,51 @@ class CVAT_QUEUES(Enum): CVAT_QUEUES.EXPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", + # custom fields "VISIBLE_VIA_REQUESTS_API": True, "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ExportRequestId", }, CVAT_QUEUES.AUTO_ANNOTATION.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "24h", + # custom fields "VISIBLE_VIA_REQUESTS_API": True, }, CVAT_QUEUES.WEBHOOKS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + # custom fields "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.NOTIFICATIONS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + # custom fields "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.QUALITY_REPORTS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + # custom fields "VISIBLE_VIA_REQUESTS_API": True, "PARSED_JOB_ID_CLASS": "cvat.apps.quality_control.quality_reports.QualityRequestId", }, CVAT_QUEUES.CLEANING.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "2h", + # custom fields "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.CHUNKS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "5m", + # custom fields "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.CONSENSUS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", + # custom fields "VISIBLE_VIA_REQUESTS_API": True, }, } diff --git a/tests/python/rest_api/test_analytics.py b/tests/python/rest_api/test_analytics.py index 082ee194eb66..f1381091b160 100644 --- a/tests/python/rest_api/test_analytics.py +++ b/tests/python/rest_api/test_analytics.py @@ -185,9 +185,7 @@ def _export_events( assert api_version == 2 - request_id, response = api_client.events_api.create_export( - **kwargs, _check_status=False - ) + request_id, response = api_client.events_api.create_export(**kwargs, _check_status=False) assert response.status == HTTPStatus.ACCEPTED if "location" in kwargs and "cloud_storage_id" in kwargs: diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index 95961bd45fea..2589dcb2cc53 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -623,7 +623,7 @@ def _test_import_project(self, username, project_id, format_name, data): (_, response) = api_client.projects_api.create_dataset( id=project_id, format=format_name, - uploaded_file_request={"file": data}, + dataset_write_request={"dataset_file": data}, _content_type="multipart/form-data", ) assert response.status == HTTPStatus.ACCEPTED diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 4531b09d899a..2ffde31d8882 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -256,14 +256,14 @@ def import_backup( def import_project_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: return import_backup( - api_client.projects_api, uploaded_file_request={"file": file_content}, **kwargs + api_client.projects_api, project_file_request={"project_file": file_content}, **kwargs ) def import_task_backup(username: str, file_content: BytesIO, **kwargs) -> None: with make_api_client(username) as api_client: return import_backup( - api_client.tasks_api, uploaded_file_request={"file": file_content}, **kwargs + api_client.tasks_api, task_file_request={"task_file": file_content}, **kwargs ) From bf830f09532c4b0488e65a3fa24589a740207028 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 09:26:08 +0200 Subject: [PATCH 063/103] t --- cvat/apps/engine/background.py | 4 +-- cvat/apps/engine/mixins.py | 4 +-- cvat/apps/engine/rq.py | 8 +++--- cvat/apps/engine/views.py | 2 +- cvat/apps/events/export.py | 27 +++++++++++++------- cvat/apps/quality_control/quality_reports.py | 1 - cvat/apps/redis_handler/rq.py | 4 +-- cvat/apps/redis_handler/serializers.py | 12 +++------ tests/python/rest_api/test_consensus.py | 2 +- tests/python/rest_api/test_projects.py | 2 +- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index a32404eec1ec..28521eb8b176 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -200,7 +200,7 @@ def validate_request_id(self, request_id, /) -> None: parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != RequestTarget(self.resource) or parsed_request_id.id != self.db_instance.pk - or parsed_request_id.subresource is not RequestSubresource.BACKUP + or parsed_request_id.subresource != RequestSubresource.BACKUP ): raise ValueError("The provided request id does not match exported target or resource") @@ -498,7 +498,7 @@ def build_request_id(self): def _get_payload_file(self): # Common serializer is not used to not break API - if isinstance(self.db_instance, Project): + if self.resource == RequestTarget.PROJECT: serializer_class = ProjectFileSerializer file_field = "project_file" else: diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 63004bbfaafc..12d0b8cc13ab 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -26,7 +26,7 @@ from cvat.apps.engine.background import BackupExporter, DatasetExporter from cvat.apps.engine.handlers import clear_import_cache from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import Location, RequestAction, RequestSubresource, RequestTarget +from cvat.apps.engine.models import Location, RequestAction, RequestTarget from cvat.apps.engine.rq import RequestId from cvat.apps.engine.serializers import DataSerializer from cvat.apps.engine.types import ExtendedRequest @@ -275,7 +275,7 @@ def init_tus_upload(self, request: ExtendedRequest): target=RequestTarget(object_class_name), id=self._object.pk, extra={ - "subresource": RequestSubresource(import_type) + "subresource": import_type, } ).render() queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 991120e3bf68..80e23056b6f6 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -20,8 +20,6 @@ from cvat.apps.engine.types import ExtendedRequest from cvat.apps.redis_handler.rq import RequestId -from .models import RequestSubresource - if TYPE_CHECKING: from django.contrib.auth.models import User @@ -346,8 +344,8 @@ class RequestIdWithSubresourceMixin: extra: dict[str, Any] @cached_property - def subresource(self) -> RequestSubresource: - return RequestSubresource(self.extra["subresource"]) + def subresource(self) -> str: + return self.extra["subresource"] @cached_property def type(self) -> str: @@ -356,7 +354,7 @@ def type(self) -> str: class RequestIdWithOptionalSubresourceMixin(RequestIdWithSubresourceMixin): @cached_property - def subresource(self) -> RequestSubresource | None: + def subresource(self) -> str | None: with suppress(KeyError): return super().subresource diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index f02337a327ae..56794775e788 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1311,7 +1311,6 @@ def status(self, request, pk): def _get_rq_response(queue, job_id): queue = django_rq.get_queue(queue) job = queue.fetch_job(job_id) - rq_job_meta = ImportRQMeta.for_job(job) response = {} if job is None or job.is_finished: response = { "state": "Finished" } @@ -1324,6 +1323,7 @@ def _get_rq_response(queue, job_id): # https://github.com/cvat-ai/cvat/issues/5215 response = { "state": "Failed", "message": parse_exception_message(job.exc_info or "Unknown error") } else: + rq_job_meta = ImportRQMeta.for_job(job) response = { "state": "Started" } if rq_job_meta.status: response['message'] = rq_job_meta.status diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 96bdcbdd8eff..246b39f96141 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -7,7 +7,6 @@ import uuid from datetime import datetime, timedelta -import attrs import clickhouse_connect from dateutil import parser from django.conf import settings @@ -18,7 +17,8 @@ from cvat.apps.dataset_manager.util import ExportCacheManager from cvat.apps.dataset_manager.views import log_exception from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.rq import RQMetaWithFailureInfo +from cvat.apps.engine.models import RequestAction +from cvat.apps.engine.rq import ExportRequestId, RQMetaWithFailureInfo from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import sendfile from cvat.apps.engine.view_utils import DeprecatedResponse @@ -29,6 +29,7 @@ slogger = ServerLogManager(__name__) DEFAULT_CACHE_TTL = timedelta(hours=1) +TARGET = "events" def _create_csv(query_params: dict, output_filename: str): @@ -82,14 +83,16 @@ def _create_csv(query_params: dict, output_filename: str): raise -@attrs.define(kw_only=True) class EventsExporter(AbstractExporter): - filter_query: dict = attrs.field(init=False) - query_id: uuid.UUID = attrs.field(init=False) # temporary arg + def __init__( + self, + *, + request: ExtendedRequest, + ) -> None: + super().__init__(request=request) - def __attrs_post_init__(self): - super().__attrs_post_init__() + # temporary arg if query_id := self.request.query_params.get("query_id"): self.query_id = uuid.UUID(query_id) else: @@ -98,12 +101,18 @@ def __attrs_post_init__(self): def build_request_id(self): return RequestId( queue=self.QUEUE_NAME, - action="export", - target="events", + action=RequestAction.EXPORT, + target=TARGET, id=self.query_id, user_id=self.user_id, ).render() + def validate_request_id(self, request_id, /) -> None: + parsed_request_id = ExportRequestId.parse(request_id) + + if parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != TARGET: + raise ValueError("The provided request id does not match exported target") + def init_request_args(self): super().init_request_args() perm = EventsPermission.create_scope_list(self.request) diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 5beff432032f..deed13603e45 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2313,7 +2313,6 @@ def _check_task_quality(cls, *, task_id: int) -> int: return cls()._compute_reports(task_id=task_id) def _compute_reports(self, task_id: int) -> int: - # raise Exception("Ooops") with transaction.atomic(): try: # Preload all the data for the computations. diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 1e6d40f13da9..e0d26ff7fea3 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -126,9 +126,9 @@ def parse(cls, request_id: str, /): raise IncorrectRequestIdError from ex -class WithParsedId(Protocol): +class _WithParsedId(Protocol): parsed_id: RequestId -class CustomRQJob(RQJob, WithParsedId): +class CustomRQJob(RQJob, _WithParsedId): pass diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 012eb7d76278..db326e4cbc5d 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -17,8 +17,8 @@ from cvat.apps.engine import models from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import RequestAction, RequestSubresource -from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta +from cvat.apps.engine.models import RequestAction +from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta, RequestIdWithFormatMixin from cvat.apps.engine.serializers import BasicUserSerializer from cvat.apps.engine.utils import parse_exception_message from cvat.apps.lambda_manager.rq import LambdaRQMeta @@ -61,13 +61,7 @@ def to_representation(self, rq_job: CustomRQJob) -> dict[str, Any]: } if parsed_request_id.action == RequestAction.AUTOANNOTATE: representation["function_id"] = LambdaRQMeta.for_job(rq_job).function_id - elif parsed_request_id.action in ( - RequestAction.IMPORT, - RequestAction.EXPORT, - ) and parsed_request_id.subresource in ( - RequestSubresource.ANNOTATIONS, - RequestSubresource.DATASET, - ): + elif isinstance(parsed_request_id, RequestIdWithFormatMixin): representation["format"] = parsed_request_id.format return representation diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index fe9879ba8773..9e06344df8be 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -285,7 +285,7 @@ def _test_check_merge_status( *, staff_user: str, another_user: str, - another_user_status: HTTPStatus = HTTPStatus.FORBIDDEN, + another_user_status: int = HTTPStatus.FORBIDDEN, ): with make_api_client(another_user) as api_client: (_, response) = api_client.requests_api.retrieve( diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index 2589dcb2cc53..dc75db71b043 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -623,7 +623,7 @@ def _test_import_project(self, username, project_id, format_name, data): (_, response) = api_client.projects_api.create_dataset( id=project_id, format=format_name, - dataset_write_request={"dataset_file": data}, + dataset_file_request={"dataset_file": data}, _content_type="multipart/form-data", ) assert response.status == HTTPStatus.ACCEPTED From db4380e366a5acc384bd004ac10a61c011c61616 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 13:24:40 +0200 Subject: [PATCH 064/103] Update test_user_without_rights_cannot_check_status_of_report_creation_in_org && test_user_without_rights_cannot_check_status_of_merge_in_org --- cvat/apps/engine/rq.py | 9 ++- cvat/apps/redis_handler/rq.py | 3 +- tests/python/rest_api/test_consensus.py | 62 +++++-------------- tests/python/rest_api/test_quality_control.py | 62 +++++-------------- tests/python/rest_api/utils.py | 34 +++++++++- 5 files changed, 69 insertions(+), 101 deletions(-) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 80e23056b6f6..e3f6e4f2712b 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -6,7 +6,6 @@ from abc import ABCMeta, abstractmethod from contextlib import suppress -from functools import cached_property from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Protocol from django.conf import settings @@ -331,7 +330,7 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: class RequestIdWithFormatMixin: extra: dict[str, Any] - @cached_property + @property def format(self) -> str | None: return self.extra.get("format") @@ -343,17 +342,17 @@ class RequestIdWithSubresourceMixin: target: str extra: dict[str, Any] - @cached_property + @property def subresource(self) -> str: return self.extra["subresource"] - @cached_property + @property def type(self) -> str: return self.TYPE_SEP.join([self.action, self.subresource or self.target]) class RequestIdWithOptionalSubresourceMixin(RequestIdWithSubresourceMixin): - @cached_property + @property def subresource(self) -> str | None: with suppress(KeyError): return super().subresource diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index e0d26ff7fea3..66addd08459d 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -1,7 +1,6 @@ from __future__ import annotations import urllib.parse -from functools import cached_property from typing import Any, ClassVar, Protocol from uuid import UUID @@ -62,7 +61,7 @@ class RequestId: user_id: int | None = attrs.field(converter=lambda x: x if x is None else int(x), default=None) - @cached_property + @property def type(self) -> str: return self.TYPE_SEP.join([self.action, self.target]) diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index 9e06344df8be..8eabc2fec1e0 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -18,7 +18,13 @@ from shared.utils.config import USER_PASS, make_api_client -from .utils import CollectionSimpleFilterTestBase, compare_annotations, wait_background_request +from .utils import ( + CollectionSimpleFilterTestBase, + compare_annotations, + invite_user_to_org, + register_new_user, + wait_background_request, +) class _PermissionTestBase: @@ -330,58 +336,20 @@ def test_user_without_rights_cannot_check_status_of_merge_in_sandbox( def test_user_without_rights_cannot_check_status_of_merge_in_org( self, find_org_task_with_consensus, - find_users, same_org: bool, role: str, - admin_user, organizations, ): task, task_staff = find_org_task_with_consensus(is_staff=True, user_org_role="supervisor") - org_filter = "org" - if not same_org: - org_filter = "exclude_" + org_filter - - try: - another_user = next( - u - for u in find_users( - role=role, **{org_filter: task["organization"]}, exclude_is_superuser=True - ) - if ( - u["id"] != task_staff["id"] - and u["id"] != task["owner"]["id"] - and u["id"] != (task["assignee"] or {}).get("id") - ) - ) - except StopIteration: - # create a new user that passes the requirements - with make_api_client(admin_user) as api_client: - user_name = f"{same_org}{role}" - another_user, _ = api_client.auth_api.create_register( - models.RegisterSerializerExRequest( - username=user_name, - password1=USER_PASS, - password2=USER_PASS, - email=f"{user_name}@email.com", - ) - ) - - org_id = ( - task["organization"] - if same_org - else next(o for o in organizations if o["id"] != task["organization"])["id"] - ) - - # looks like a bug in SDK, second post request fails with CSRF issue when the same api_client is used - with make_api_client(admin_user) as api_client: - api_client.invitations_api.create( - models.InvitationWriteRequest( - role=role, - email=another_user["email"], - ), - org_id=org_id, - ) + # create a new user that passes the requirements + another_user = register_new_user(f"{same_org}{role}") + org_id = ( + task["organization"] + if same_org + else next(o for o in organizations if o["id"] != task["organization"])["id"] + ) + invite_user_to_org(another_user["email"], org_id, role) rq_id = self.request_merge(task_id=task["id"], user=task_staff["username"]) self._test_check_merge_status( diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py index 001776cf564c..ae21c88fadfc 100644 --- a/tests/python/rest_api/test_quality_control.py +++ b/tests/python/rest_api/test_quality_control.py @@ -16,9 +16,15 @@ from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff -from shared.utils.config import USER_PASS, make_api_client +from shared.utils.config import make_api_client -from .utils import CollectionSimpleFilterTestBase, parse_frame_step, wait_background_request +from .utils import ( + CollectionSimpleFilterTestBase, + invite_user_to_org, + parse_frame_step, + register_new_user, + wait_background_request, +) class _PermissionTestBase: @@ -677,57 +683,21 @@ def test_user_without_rights_cannot_check_status_of_report_creation_in_org( role: str, admin_user: str, find_org_task_without_gt: Callable[[bool, str], tuple[dict[str, Any], dict[str, Any]]], - find_users: Callable[..., list[dict[str, Any]]], organizations, ): task, task_staff = find_org_task_without_gt(is_staff=True, user_org_role="supervisor") self.create_gt_job(admin_user, task["id"]) - org_filter = "org" - if not same_org: - org_filter = "exclude_" + org_filter - - try: - another_user = next( - u - for u in find_users( - role=role, exclude_is_superuser=True, **{org_filter: task["organization"]} - ) - if ( - u["id"] != task_staff["id"] - and u["id"] != task["owner"]["id"] - and u["id"] != (task["assignee"] or {}).get("id") - ) - ) - except StopIteration: - # create a new user that passes the requirements - with make_api_client(admin_user) as api_client: - user_name = f"{same_org}{role}" - another_user, _ = api_client.auth_api.create_register( - models.RegisterSerializerExRequest( - username=user_name, - password1=USER_PASS, - password2=USER_PASS, - email=f"{user_name}@email.com", - ) - ) - - org_id = ( - task["organization"] - if same_org - else next(o for o in organizations if o["id"] != task["organization"])["id"] - ) + # create another user that passes the requirements + another_user = register_new_user(f"{same_org}{role}") + org_id = ( + task["organization"] + if same_org + else next(o for o in organizations if o["id"] != task["organization"])["id"] + ) + invite_user_to_org(another_user["email"], org_id, role) - # looks like a bug in SDK, second post request fails with CSRF issue when the same api_client is used - with make_api_client(admin_user) as api_client: - api_client.invitations_api.create( - models.InvitationWriteRequest( - role=role, - email=another_user["email"], - ), - org_id=org_id, - ) rq_id = self._initialize_report_creation(task["id"], task_staff["username"]) self._test_check_status_of_report_creation( rq_id, task_staff=task_staff["username"], another_user=another_user["username"] diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 2ffde31d8882..3d4d560f4a82 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -22,7 +22,7 @@ from deepdiff import DeepDiff from urllib3 import HTTPResponse -from shared.utils.config import make_api_client +from shared.utils.config import USER_PASS, make_api_client, post_method def initialize_export(endpoint: Endpoint, *, expect_forbidden: bool = False, **kwargs) -> str: @@ -491,3 +491,35 @@ def unique( it: Union[Iterator[_T], Iterable[_T]], *, key: Callable[[_T], Hashable] = None ) -> Iterable[_T]: return {key(v): v for v in it}.values() + + +def register_new_user(username: str) -> dict[str, Any]: + response = post_method( + "admin1", + "auth/register", + data={ + "username": username, + "password1": USER_PASS, + "password2": USER_PASS, + "email": f"{username}@email.com", + }, + ) + + assert response.status_code == HTTPStatus.CREATED + return response.json() + + +def invite_user_to_org( + user_email: str, + org_id: int, + role: str, +): + with make_api_client("admin1") as api_client: + invitation, _ = api_client.invitations_api.create( + models.InvitationWriteRequest( + role=role, + email=user_email, + ), + org_id=org_id, + ) + return invitation From 277a14004349300d4318717cf09cb7337e95c567 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 13:32:45 +0200 Subject: [PATCH 065/103] Remove unused import --- tests/python/rest_api/test_consensus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/rest_api/test_consensus.py b/tests/python/rest_api/test_consensus.py index 8eabc2fec1e0..81c6bbfa937c 100644 --- a/tests/python/rest_api/test_consensus.py +++ b/tests/python/rest_api/test_consensus.py @@ -16,7 +16,7 @@ from cvat_sdk.core.helpers import get_paginated_collection from deepdiff import DeepDiff -from shared.utils.config import USER_PASS, make_api_client +from shared.utils.config import make_api_client from .utils import ( CollectionSimpleFilterTestBase, From bf5f09d88d0743f38cd8f7b88be4c0dec7a0d92f Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 13:33:46 +0200 Subject: [PATCH 066/103] Fix type --- tests/python/rest_api/test_quality_control.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/rest_api/test_quality_control.py b/tests/python/rest_api/test_quality_control.py index ae21c88fadfc..9da1381ee56f 100644 --- a/tests/python/rest_api/test_quality_control.py +++ b/tests/python/rest_api/test_quality_control.py @@ -595,7 +595,7 @@ def _test_check_status_of_report_creation( *, task_staff: str, another_user: str, - another_user_status: HTTPStatus = HTTPStatus.FORBIDDEN, + another_user_status: int = HTTPStatus.FORBIDDEN, ): with make_api_client(another_user) as api_client: (_, response) = api_client.requests_api.retrieve( From 0aea750824d549ac303e860243b6a7b053c210aa Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 14:11:47 +0200 Subject: [PATCH 067/103] test_cant_import_annotations_as_project: revert expected exception back --- tests/python/rest_api/test_projects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index dc75db71b043..5bb5f2b1723c 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -946,7 +946,7 @@ def _export_task(task_id: int, format_name: str) -> io.BytesIO: ) ) - with pytest.raises(exceptions.ApiException, match="A file should be a zip archive"): + with pytest.raises(exceptions.ApiException, match="Dataset file should be zip archive"): self._test_import_project( admin_user, project.id, From 546b5276cf5c749fefbaec050adc3d9084256d9e Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 14:16:47 +0200 Subject: [PATCH 068/103] fix legacy events api --- cvat/apps/events/export.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 246b39f96141..fe98dc1b896c 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -223,6 +223,7 @@ def export(request: ExtendedRequest): manager.init_request_args() # request validation is missed here since exporting to a cloud_storage is disabled + manager._set_default_callback_params() manager.init_callback_with_params() manager.setup_new_job(queue, request_id) From fdc8f081ab91f812926b61821fae8d971ada6384 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 14:32:58 +0200 Subject: [PATCH 069/103] Fix typo --- cvat/apps/engine/background.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 28521eb8b176..71ac692bb31b 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -356,7 +356,7 @@ def init_callback_with_params(self): # - remove uploaded file at the end if self.import_args.location_config.location == Location.CLOUD_STORAGE: self.callback_args = ( - *self.callback_args[0], + self.callback_args[0], db_storage, key, self.callback, From 4a4c871d02289ce7cd7785cd9cef0849349fb25b Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 14:33:38 +0200 Subject: [PATCH 070/103] Remove wrong validation --- cvat/apps/redis_handler/background.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 7b258d7081a2..6cf5d44ff0c2 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -345,15 +345,6 @@ def init_callback_with_params(self): self.callback_args = (db_storage, self.callback) + self.callback_args self.callback = export_resource_to_cloud_storage - def validate_request(self): - super().validate_request() - - if ( - self.export_args.location_config.location == Location.CLOUD_STORAGE - and not self.export_args.filename - ): - raise ValidationError("The filename was not specified") - def build_meta(self, *, request_id): return ExportRQMeta.build_for( request=self.request, @@ -370,6 +361,7 @@ def get_downloader(self): request_id = self.request.query_params.get(self.REQUEST_ID_KEY) if not request_id: + # TODO: check response content type raise ValidationError("Missing request id in the query parameters") try: From ac5f8d169bcfb3fca456509f90d9c2d561ee99e5 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 15 Apr 2025 17:58:28 +0200 Subject: [PATCH 071/103] Do not use spaces in rq job ids && fix test_list_requests_when_there_is_job_with_non_regular_or_corrupted_meta --- cvat/apps/dataset_manager/cron.py | 2 +- cvat/apps/dataset_manager/views.py | 2 +- cvat/apps/engine/models.py | 2 +- cvat/apps/redis_handler/rq.py | 27 +++++++++++++------------- tests/python/rest_api/test_requests.py | 4 ++-- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/cvat/apps/dataset_manager/cron.py b/cvat/apps/dataset_manager/cron.py index 6ec083bb0447..0bb336af93e4 100644 --- a/cvat/apps/dataset_manager/cron.py +++ b/cvat/apps/dataset_manager/cron.py @@ -42,7 +42,7 @@ def clear_export_cache(file_path: Path) -> bool: if isinstance(parsed_filename.file_id, ConstructedFileId): cache_ttl = get_export_cache_ttl(parsed_filename.file_id.instance_type) else: - cache_ttl = get_export_cache_ttl(None) # use common default cache TTL + cache_ttl = get_export_cache_ttl() # use common default cache TTL if timezone.now().timestamp() <= file_path.stat().st_mtime + cache_ttl.total_seconds(): logger.debug(f"Export cache file {file_path.name!r} was recently accessed") diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index c1a80906f40e..0204ce7de425 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -59,7 +59,7 @@ def log_exception(logger: logging.Logger | None = None, exc_info: bool = True): EXPORT_LOCKED_RETRY_INTERVAL = timedelta(seconds=settings.EXPORT_LOCKED_RETRY_INTERVAL) -def get_export_cache_ttl(db_instance: str | Project | Task | Job | None) -> timedelta: +def get_export_cache_ttl(db_instance: str | Project | Task | Job | None = None) -> timedelta: if not db_instance: return DEFAULT_CACHE_TTL diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 7a561778ee55..02909b1cc1d0 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -1262,7 +1262,7 @@ def list(cls): @classmethod def _missing_(cls, value): - raise ValueError(f"The specified location {value} is not supported") + raise ValueError(f"The specified location {value!r} is not supported") class CloudStorage(TimestampedModel): # restrictions: diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 66addd08459d..f86d0a539dd1 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -42,11 +42,12 @@ class RequestId: FIELD_SEP: ClassVar[str] = "&" KEY_VAL_SEP: ClassVar[str] = "=" - SPECIAL_CHARS = {FIELD_SEP, KEY_VAL_SEP, "/", "."} ENCODE_MAPPING = { ".": "@", + " ": "__", # one underscore can be used in the queue name } DECODE_MAPPING = {v: k for k, v in ENCODE_MAPPING.items()} + NOT_ALLOWED_CHARS = {FIELD_SEP, KEY_VAL_SEP, "/"} | set(DECODE_MAPPING.keys()) TYPE_SEP: ClassVar[str] = ":" # used in serialization logic @@ -66,26 +67,24 @@ def type(self) -> str: return self.TYPE_SEP.join([self.action, self.target]) def to_dict(self) -> dict[str, Any]: - repr_ = attrs.asdict(self, filter=lambda _, v: bool(v)) - if extra := repr_.pop("extra", None): - repr_.update(extra) - - return repr_ + base = attrs.asdict(self, filter=lambda _, v: bool(v)) + extra_data = base.pop("extra", {}) + return {**base, **extra_data} @classmethod def normalize(cls, repr_: dict[str, Any]) -> None: for key, value in repr_.items(): str_value = str(value) - - for spec_char in cls.SPECIAL_CHARS: - if spec_char in str_value: - if spec_char in cls.ENCODE_MAPPING: - str_value = str_value.replace(spec_char, cls.ENCODE_MAPPING[spec_char]) - continue - + for reserved in cls.NOT_ALLOWED_CHARS: + if reserved in str_value: raise IncorrectRequestIdError( - f"{key} contains special characters: {spec_char!r}" + f"{key} contains special character/sequence of characters: {reserved!r}" ) + + for from_char, to_char in cls.ENCODE_MAPPING.items(): + if from_char in str_value: + str_value = str_value.replace(from_char, to_char) + repr_[key] = str_value def render(self) -> str: diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index d9b8a223026c..dcc56d78231b 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -255,8 +255,8 @@ def test_list_requests_when_there_is_job_with_non_regular_or_corrupted_meta( assert 2 == background_requests.count corrupted_job, normal_job = background_requests.results - - remove_meta_command = f'redis-cli -e HDEL rq:job:{corrupted_job["id"]} meta' + corrupted_job_key = f"rq:job:{corrupted_job['id']}" + remove_meta_command = f'redis-cli -e HDEL "{corrupted_job_key}" meta' if request.config.getoption("--platform") == "local": stdout, _ = docker_exec_redis_inmem(["sh", "-c", remove_meta_command]) From aa8a9bd7342d65203c1414bd121803725632f091 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 17 Apr 2025 13:42:18 +0200 Subject: [PATCH 072/103] remove queue from ids && split parsed.id to target_id & id && declare supported actions for each queue in settings --- cvat/apps/consensus/merging_manager.py | 3 +- cvat/apps/engine/background.py | 26 +++--- cvat/apps/engine/mixins.py | 3 +- cvat/apps/engine/permissions.py | 2 +- cvat/apps/engine/rq.py | 30 +----- cvat/apps/engine/views.py | 3 +- cvat/apps/events/export.py | 5 +- cvat/apps/lambda_manager/views.py | 2 +- cvat/apps/quality_control/quality_reports.py | 6 +- cvat/apps/redis_handler/apps.py | 28 ++++++ cvat/apps/redis_handler/background.py | 12 ++- cvat/apps/redis_handler/permissions.py | 14 ++- cvat/apps/redis_handler/rq.py | 96 +++++++++++++++----- cvat/apps/redis_handler/views.py | 23 ++--- cvat/settings/base.py | 20 ++-- 15 files changed, 151 insertions(+), 122 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index 16f9fd898dd0..09014e34c976 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -166,10 +166,9 @@ def job_result_ttl(self): def build_request_id(self) -> str: return RequestId( - queue=self.QUEUE_NAME, action="merge", target=self.resource, - id=self.db_instance.pk, + target_id=self.db_instance.pk, ).render() def init_callback_with_params(self): diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 71ac692bb31b..dd8da3e9e6bc 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -107,10 +107,9 @@ def validate_request(self): def build_request_id(self): return ExportRequestId( - queue=self.QUEUE_NAME, action=RequestAction.EXPORT, target=RequestTarget(self.resource), - id=self.db_instance.pk, + target_id=self.db_instance.pk, user_id=self.user_id, extra={ "subresource": ( @@ -122,14 +121,14 @@ def build_request_id(self): }, ).render() - def validate_request_id(self, request_id, /) -> None: + def validate_request_id(self, request_id, /, queue_name) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) - parsed_request_id = ExportRequestId.parse(request_id) + parsed_request_id: ExportRequestId = ExportRequestId.parse(request_id, queue=queue_name) if ( parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != RequestTarget(self.resource) - or parsed_request_id.id != self.db_instance.pk + or parsed_request_id.target_id != self.db_instance.pk or parsed_request_id.subresource not in {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} ): @@ -193,13 +192,14 @@ class BackupExporter(AbstractExporter): # elif isinstance(self.db_instance, Project) and Data.objects.filter(): # pass - def validate_request_id(self, request_id, /) -> None: - parsed_request_id = ExportRequestId.parse(request_id) + def validate_request_id(self, request_id, /, queue_name) -> None: + # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) + parsed_request_id: ExportRequestId = ExportRequestId.parse(request_id, queue=queue_name) if ( parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != RequestTarget(self.resource) - or parsed_request_id.id != self.db_instance.pk + or parsed_request_id.target_id != self.db_instance.pk or parsed_request_id.subresource != RequestSubresource.BACKUP ): raise ValueError("The provided request id does not match exported target or resource") @@ -237,10 +237,9 @@ def get_result_filename(self): def build_request_id(self): return ExportRequestId( - queue=self.QUEUE_NAME, action=RequestAction.EXPORT, target=RequestTarget(self.resource), - id=self.db_instance.pk, + target_id=self.db_instance.pk, user_id=self.user_id, extra={ "subresource": RequestSubresource.BACKUP, @@ -439,10 +438,9 @@ def validate_request(self): def build_request_id(self): return ImportRequestId( - queue=self.QUEUE_NAME, action=RequestAction.IMPORT, target=RequestTarget(self.resource), - id=self.db_instance.pk, + target_id=self.db_instance.pk, extra={ "subresource": ( RequestSubresource.DATASET @@ -487,7 +485,6 @@ def init_request_args(self) -> None: def build_request_id(self): return ImportRequestId( - queue=self.QUEUE_NAME, action=RequestAction.IMPORT, target=self.resource, id=uuid4(), @@ -538,10 +535,9 @@ def job_failure_ttl(self): def build_request_id(self): return RequestId( - queue=self.QUEUE_NAME, action=RequestAction.CREATE, target=RequestTarget.TASK, - id=self.db_instance.pk, + target_id=self.db_instance.pk, ).render() def init_callback_with_params(self): diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 12d0b8cc13ab..7052a5b30d98 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -270,10 +270,9 @@ def init_tus_upload(self, request: ExtendedRequest): # check whether the rq_job is in progress or has been finished/failed object_class_name = self._object.__class__.__name__.lower() template = RequestId( - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, action=RequestAction.IMPORT, target=RequestTarget(object_class_name), - id=self._object.pk, + target_id=self._object.pk, extra={ "subresource": import_type, } diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index e9df673357b9..b870121cf9c5 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -56,7 +56,7 @@ class Scopes(StrEnum): def extend_params_with_rq_job_details(*, request: ExtendedRequest, params: dict[str, Any]) -> None: if rq_id := request.query_params.get("rq_id"): try: - params["rq_job_id"] = ExportRequestId.parse(rq_id) + params["rq_job_id"] = ExportRequestId.parse(rq_id)[0] return except Exception: raise ValidationError("Unexpected request id format") diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index e3f6e4f2712b..48373b7a8e1e 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -5,8 +5,7 @@ from __future__ import annotations from abc import ABCMeta, abstractmethod -from contextlib import suppress -from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Protocol +from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol from django.conf import settings from django.db.models import Model @@ -17,7 +16,7 @@ from rq.registry import BaseRegistry as RQBaseRegistry from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.rq import RequestId +from cvat.apps.redis_handler.rq import RequestId, RequestIdWithOptionalSubresourceMixin if TYPE_CHECKING: from django.contrib.auth.models import User @@ -335,31 +334,6 @@ def format(self) -> str | None: return self.extra.get("format") -class RequestIdWithSubresourceMixin: - TYPE_SEP: ClassVar[str] - - action: str - target: str - extra: dict[str, Any] - - @property - def subresource(self) -> str: - return self.extra["subresource"] - - @property - def type(self) -> str: - return self.TYPE_SEP.join([self.action, self.subresource or self.target]) - - -class RequestIdWithOptionalSubresourceMixin(RequestIdWithSubresourceMixin): - @property - def subresource(self) -> str | None: - with suppress(KeyError): - return super().subresource - - return None - - class ExportRequestId( RequestIdWithOptionalSubresourceMixin, # subresource is optional because export queue works also with events RequestIdWithFormatMixin, diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 56794775e788..73e161ee1921 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -1295,10 +1295,9 @@ def status(self, request, pk): response = self._get_rq_response( queue=settings.CVAT_QUEUES.IMPORT_DATA.value, job_id=RequestId( - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, action=RequestAction.CREATE, target=RequestTarget.TASK, - id=task.id + target_id=task.id ).render() ) serializer = RqStatusSerializer(data=response) diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index fe98dc1b896c..6cd0ef24799e 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -100,15 +100,14 @@ def __init__( def build_request_id(self): return RequestId( - queue=self.QUEUE_NAME, action=RequestAction.EXPORT, target=TARGET, id=self.query_id, user_id=self.user_id, ).render() - def validate_request_id(self, request_id, /) -> None: - parsed_request_id = ExportRequestId.parse(request_id) + def validate_request_id(self, request_id, /, queue_name) -> None: + parsed_request_id: ExportRequestId = ExportRequestId.parse(request_id, queue=queue_name) if parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != TARGET: raise ValueError("The provided request id does not match exported target") diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index 2e144a3cca87..e4fd1788f2dd 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -608,7 +608,7 @@ def enqueue( ) -> LambdaJob: queue = self._get_queue() rq_id = RequestId( - queue=queue.name, action=RequestAction.AUTOANNOTATE, target=RequestTarget.TASK, id=task + action=RequestAction.AUTOANNOTATE, target=RequestTarget.TASK, target_id=task ).render() # Ensure that there is no race condition when processing parallel requests. diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index deed13603e45..1b136073be6d 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -50,7 +50,6 @@ User, ValidationMode, ) -from cvat.apps.engine.rq import RequestIdWithSubresourceMixin from cvat.apps.profiler import silk_profile from cvat.apps.quality_control import models from cvat.apps.quality_control.models import ( @@ -59,7 +58,7 @@ AnnotationType, ) from cvat.apps.redis_handler.background import AbstractRequestManager -from cvat.apps.redis_handler.rq import RequestId +from cvat.apps.redis_handler.rq import RequestId, RequestIdWithSubresourceMixin class Serializable: @@ -2276,10 +2275,9 @@ def job_result_ttl(self): def build_request_id(self): return QualityRequestId( - queue=self.QUEUE_NAME, action="calculate", target=self.resource, - id=self.db_instance.pk, + target_id=self.db_instance.pk, extra={"subresource": "quality"}, ).render() diff --git a/cvat/apps/redis_handler/apps.py b/cvat/apps/redis_handler/apps.py index be23d57b5d2b..0880fc82ef4a 100644 --- a/cvat/apps/redis_handler/apps.py +++ b/cvat/apps/redis_handler/apps.py @@ -3,7 +3,34 @@ # SPDX-License-Identifier: MIT +from contextlib import suppress + from django.apps import AppConfig +from django.conf import settings + + +class LayeredKeyDict(dict): + def __getitem__(self, key: str | tuple) -> str: + if isinstance(key, tuple) and (len(key) == 3): # action, target, subresource + with suppress(KeyError): + return self.__getitem__(key[0]) + return self.__getitem__((key[0], key[2])) # (action, subresource) + return super().__getitem__(key) + + +MAPPING = LayeredKeyDict() + + +def initialize_mapping(): + for queue_name, queue_conf in settings.RQ_QUEUES.items(): + if supported_actions := queue_conf.get("SUPPORTED_ACTIONS"): + for action in supported_actions: + if isinstance(action, str): + MAPPING[action] = queue_name + continue + + assert isinstance(action, tuple) + MAPPING[action] = queue_name class RedisHandlerConfig(AppConfig): @@ -13,3 +40,4 @@ def ready(self) -> None: from cvat.apps.iam.permissions import load_app_permissions load_app_permissions(self) + initialize_mapping() diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 6cf5d44ff0c2..6b06e0cad182 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -95,15 +95,16 @@ def job_failed_ttl(self) -> int | None: @abstractmethod def build_request_id(self): ... - def validate_request_id(self, request_id: str, /) -> None: ... + def validate_request_id(self, request_id: str, /, queue_name: str) -> None: ... def get_job_by_id(self, id_: str, /) -> RQJob | None: + queue = self.get_queue() + try: - self.validate_request_id(id_) + self.validate_request_id(id_, queue_name=queue.name) except Exception: return None - queue = self.get_queue() return queue.fetch_job(id_) def init_request_args(self): @@ -364,9 +365,10 @@ def get_downloader(self): # TODO: check response content type raise ValidationError("Missing request id in the query parameters") + queue = self.get_queue() try: - self.validate_request_id(request_id) + self.validate_request_id(request_id, queue_name=queue.name) except ValueError: raise ValidationError("Invalid export request id") - return self.Downloader(request=self.request, queue=self.get_queue(), request_id=request_id) + return self.Downloader(request=self.request, queue=queue, request_id=request_id) diff --git a/cvat/apps/redis_handler/permissions.py b/cvat/apps/redis_handler/permissions.py index d92ff4920b4b..51b3d047a37a 100644 --- a/cvat/apps/redis_handler/permissions.py +++ b/cvat/apps/redis_handler/permissions.py @@ -46,20 +46,26 @@ def create( job_owner = BaseRQMeta.for_job(obj).user assert job_owner and job_owner.id == parsed_request_id.user_id - elif isinstance(parsed_request_id.id, int): + elif parsed_request_id.target_id is not None: if parsed_request_id.target == RequestTarget.PROJECT.value: permissions.append( - ProjectPermission.create_scope_view(request, parsed_request_id.id) + ProjectPermission.create_scope_view( + request, parsed_request_id.target_id + ) ) continue elif parsed_request_id.target == RequestTarget.TASK.value: permissions.append( - TaskPermission.create_scope_view(request, parsed_request_id.id) + TaskPermission.create_scope_view( + request, parsed_request_id.target_id + ) ) continue elif parsed_request_id.target == RequestTarget.JOB.value: permissions.append( - JobPermission.create_scope_view(request, parsed_request_id.id) + JobPermission.create_scope_view( + request, parsed_request_id.target_id + ) ) continue assert False, "Unsupported operation on resource" diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index f86d0a539dd1..ff109f2be4ac 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -1,7 +1,9 @@ from __future__ import annotations import urllib.parse -from typing import Any, ClassVar, Protocol +from contextlib import suppress +from types import NoneType +from typing import Any, ClassVar, Protocol, overload from uuid import UUID import attrs @@ -9,17 +11,7 @@ from django.utils.module_loading import import_string from rq.job import Job as RQJob - -def convert_id(value: int | str | UUID) -> int | UUID: - if isinstance(value, (int, UUID)): - return value - - assert isinstance(value, str) - - if value.isnumeric(): - return int(value) - - return UUID(value) +from cvat.apps.redis_handler.apps import MAPPING def convert_extra(value: dict) -> dict[str, Any]: @@ -37,6 +29,31 @@ class IncorrectRequestIdError(ValueError): pass +class RequestIdWithSubresourceMixin: + TYPE_SEP: ClassVar[str] + + action: str + target: str + extra: dict[str, Any] + + @property + def subresource(self) -> str: + return self.extra["subresource"] + + @property + def type(self) -> str: + return self.TYPE_SEP.join([self.action, self.subresource or self.target]) + + +class RequestIdWithOptionalSubresourceMixin(RequestIdWithSubresourceMixin): + @property + def subresource(self) -> str | None: + with suppress(KeyError): + return super().subresource + + return None + + @attrs.frozen(kw_only=True) class RequestId: FIELD_SEP: ClassVar[str] = "&" @@ -44,24 +61,31 @@ class RequestId: ENCODE_MAPPING = { ".": "@", - " ": "__", # one underscore can be used in the queue name + " ": "_", } DECODE_MAPPING = {v: k for k, v in ENCODE_MAPPING.items()} NOT_ALLOWED_CHARS = {FIELD_SEP, KEY_VAL_SEP, "/"} | set(DECODE_MAPPING.keys()) TYPE_SEP: ClassVar[str] = ":" # used in serialization logic - queue: str = attrs.field(validator=attrs.validators.instance_of(str)) action: str = attrs.field(validator=attrs.validators.instance_of(str)) target: str = attrs.field(validator=attrs.validators.instance_of(str)) - id: int | UUID = attrs.field( - validator=attrs.validators.instance_of((int, UUID)), - converter=convert_id, + target_id: int | None = attrs.field( + converter=lambda x: x if x is None else int(x), default=None ) - extra: dict[str, Any] = attrs.field(converter=convert_extra, factory=dict) + id: UUID | None = attrs.field( + converter=lambda x: x if isinstance(x, (NoneType, UUID)) else UUID(x), + default=None, + ) # operation id + extra: dict[str, str] = attrs.field(converter=convert_extra, factory=dict) user_id: int | None = attrs.field(converter=lambda x: x if x is None else int(x), default=None) + def __attrs_post_init__(self): + assert ( + sum(1 for i in (self.target_id, self.id) if i) == 1 + ), "Only one of target_id or id should be set" + @property def type(self) -> str: return self.TYPE_SEP.join([self.action, self.target]) @@ -77,9 +101,7 @@ def normalize(cls, repr_: dict[str, Any]) -> None: str_value = str(value) for reserved in cls.NOT_ALLOWED_CHARS: if reserved in str_value: - raise IncorrectRequestIdError( - f"{key} contains special character/sequence of characters: {reserved!r}" - ) + raise IncorrectRequestIdError(f"{key} contains special character: {reserved!r}") for from_char, to_char in cls.ENCODE_MAPPING.items(): if from_char in str_value: @@ -100,7 +122,20 @@ def render(self) -> str: return self.FIELD_SEP.join([f"{k}{self.KEY_VAL_SEP}{v}" for k, v in rq_id_repr.items()]) @classmethod - def parse(cls, request_id: str, /): + @overload + def parse(cls, request_id: str, /, *, queue: str) -> RequestId: ... + + @classmethod + @overload + def parse(cls, request_id: str, /, *, queue: None = None) -> tuple[RequestId, str]: ... + + @classmethod + def parse( + cls, request_id: str, /, *, queue: str | None = None + ) -> RequestId | tuple[RequestId, str]: + class _RequestIdForMapping(RequestIdWithOptionalSubresourceMixin, RequestId): + pass + try: common_keys = set(attrs.fields_dict(cls).keys()) - {"extra"} params = {} @@ -115,11 +150,22 @@ def parse(cls, request_id: str, /): else: params.setdefault("extra", {})[key] = value - if custom_cls_path := settings.RQ_QUEUES[params["queue"]].get("PARSED_JOB_ID_CLASS"): + queue_is_known = bool(queue) + + if not queue_is_known: + _parsed = _RequestIdForMapping(**params) + queue = MAPPING[(_parsed.action, _parsed.target, _parsed.subresource)] + + if custom_cls_path := settings.RQ_QUEUES[queue].get("PARSED_JOB_ID_CLASS"): custom_cls = import_string(custom_cls_path) - return custom_cls(**params) + result = custom_cls(**params) + else: + result = cls(**params) + + if not queue_is_known: + result = (result, queue) - return cls(**params) + return result except Exception as ex: raise IncorrectRequestIdError from ex diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 3aae4c6d43ba..ab957e0e1f70 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -13,7 +13,6 @@ from redis.exceptions import ConnectionError as RedisConnectionError from rest_framework import status, viewsets from rest_framework.decorators import action -from rest_framework.exceptions import ValidationError from rest_framework.response import Response from rq.job import Job as RQJob from rq.job import JobStatus as RQJobStatus @@ -27,6 +26,7 @@ from cvat.apps.engine.models import RequestStatus # todo: move to the app from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest +from cvat.apps.redis_handler.apps import MAPPING from cvat.apps.redis_handler.rq import CustomRQJob, RequestId from cvat.apps.redis_handler.serializers import RequestSerializer @@ -49,12 +49,6 @@ ), ) class RequestViewSet(viewsets.GenericViewSet): - SUPPORTED_QUEUES = { - queue_name - for queue_name, queue_conf in settings.RQ_QUEUES.items() - if queue_conf.get("VISIBLE_VIA_REQUESTS_API") - } - serializer_class = RequestSerializer iam_organization_field = None filter_backends = [ @@ -114,7 +108,7 @@ def get_queryset(self): @property def queues(self) -> Iterable[DjangoRQ]: - return (django_rq.get_queue(queue_name) for queue_name in self.SUPPORTED_QUEUES) + return (django_rq.get_queue(queue_name) for queue_name in set(MAPPING.values())) def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: job_ids = set( @@ -130,7 +124,7 @@ def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: if job and is_rq_job_owner(job, user_id): job = cast(CustomRQJob, job) try: - parsed_request_id = RequestId.parse(job.id) + parsed_request_id = RequestId.parse(job.id, queue=queue.name) except Exception: # nosec B112 continue @@ -167,18 +161,13 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: Optional[RQJob]: The retrieved RQJob, or None if not found. """ try: - parsed_request_id = RequestId.parse(rq_id) + parsed_request_id, queue_name = RequestId.parse(rq_id) except Exception: return None - job: CustomRQJob | None = None - - if parsed_request_id.queue not in self.SUPPORTED_QUEUES: - raise ValidationError("Unsupported queue") - - queue: DjangoRQ = django_rq.get_queue(parsed_request_id.queue) + queue: DjangoRQ = django_rq.get_queue(queue_name) + job: CustomRQJob | None = queue.fetch_job(rq_id) - job = queue.fetch_job(rq_id) if job: job.parsed_id = parsed_request_id diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 836c8816f6e7..8b8b3fad03f4 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -296,58 +296,52 @@ class CVAT_QUEUES(Enum): **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", # custom fields - "VISIBLE_VIA_REQUESTS_API": True, "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ImportRequestId", + "SUPPORTED_ACTIONS": ["create", "import"], }, CVAT_QUEUES.EXPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", # custom fields - "VISIBLE_VIA_REQUESTS_API": True, "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ExportRequestId", + "SUPPORTED_ACTIONS": ["export"], }, CVAT_QUEUES.AUTO_ANNOTATION.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "24h", # custom fields - "VISIBLE_VIA_REQUESTS_API": True, + "SUPPORTED_ACTIONS": ["autoannotate"], }, CVAT_QUEUES.WEBHOOKS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", - # custom fields - "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.NOTIFICATIONS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", - # custom fields - "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.QUALITY_REPORTS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", # custom fields - "VISIBLE_VIA_REQUESTS_API": True, "PARSED_JOB_ID_CLASS": "cvat.apps.quality_control.quality_reports.QualityRequestId", + "SUPPORTED_ACTIONS": [ + ("calculate", "quality"), + ], }, CVAT_QUEUES.CLEANING.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "2h", - # custom fields - "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.CHUNKS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "5m", - # custom fields - "VISIBLE_VIA_REQUESTS_API": False, }, CVAT_QUEUES.CONSENSUS.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", # custom fields - "VISIBLE_VIA_REQUESTS_API": True, + "SUPPORTED_ACTIONS": ["merge"], }, } From 9044c0091b91e87fb79aef9553bc61b2e757f181 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 18 Apr 2025 14:14:56 +0200 Subject: [PATCH 073/103] [consensus] drop outdated permissions --- cvat/apps/consensus/permissions.py | 31 +++---------------- .../consensus/rules/consensus_merges.rego | 7 +---- 2 files changed, 5 insertions(+), 33 deletions(-) diff --git a/cvat/apps/consensus/permissions.py b/cvat/apps/consensus/permissions.py index 3eb24f59cb09..9c160d4034fc 100644 --- a/cvat/apps/consensus/permissions.py +++ b/cvat/apps/consensus/permissions.py @@ -16,43 +16,25 @@ class ConsensusMergePermission(OpenPolicyAgentPermission): - rq_job_owner_id: int | None task_id: int | None class Scopes(StrEnum): CREATE = "create" - VIEW_STATUS = "view:status" @classmethod - def create_scope_check_status( - cls, request: ExtendedRequest, rq_job_owner_id: int, iam_context=None - ): - if not iam_context and request: - iam_context = get_iam_context(request, None) - return cls(**iam_context, scope=cls.Scopes.VIEW_STATUS, rq_job_owner_id=rq_job_owner_id) - - @classmethod - def create(cls, request, view, obj, iam_context): + def create(cls, request: ExtendedRequest, view, obj, iam_context): Scopes = __class__.Scopes permissions = [] if view.basename == "consensus_merges": for scope in cls.get_scopes(request, view, obj): if scope == Scopes.CREATE: - # Note: POST /api/consensus/merges is used to initiate report creation - # and to check the operation status - rq_id = request.query_params.get("rq_id") + # FUTURE-FIXME: use serializers for validation task_id = request.data.get("task_id") job_id = request.data.get("job_id") - if not (task_id or job_id or rq_id): - raise PermissionDenied( - "Either task_id or job_id or rq_id must be specified" - ) - - if rq_id: - # There will be another check for this case during request processing - continue + if not (task_id or job_id): + raise PermissionDenied("Either task_id or job_id must be specified") # merge is always at least at the task level, even for specific jobs if task_id is not None or job_id is not None: @@ -90,9 +72,6 @@ def create(cls, request, view, obj, iam_context): return permissions def __init__(self, **kwargs): - if "rq_job_owner_id" in kwargs: - self.rq_job_owner_id = int(kwargs.pop("rq_job_owner_id")) - super().__init__(**kwargs) self.url = settings.IAM_OPA_DATA_URL + "/consensus_merges/allow" @@ -143,8 +122,6 @@ def get_resource(self): else None ), } - elif self.scope == self.Scopes.VIEW_STATUS: - data = {"owner": {"id": self.rq_job_owner_id}} return data diff --git a/cvat/apps/consensus/rules/consensus_merges.rego b/cvat/apps/consensus/rules/consensus_merges.rego index 113ff7885595..ef0618ece2b7 100644 --- a/cvat/apps/consensus/rules/consensus_merges.rego +++ b/cvat/apps/consensus/rules/consensus_merges.rego @@ -7,7 +7,7 @@ import data.organizations import data.quality_utils # input: { -# "scope": <"create"|"view"|"view:status"|"list"> or null, +# "scope": <"create"|"view"|"list"> or null, # "auth": { # "user": { # "id": , @@ -57,11 +57,6 @@ allow if { organizations.is_member } -allow if { - input.scope == utils.VIEW_STATUS - utils.is_resource_owner -} - allow if { input.scope in {utils.CREATE, utils.VIEW} utils.is_sandbox From d041f588afa50beda2245fde441179cbcd44c475 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 18 Apr 2025 14:20:55 +0200 Subject: [PATCH 074/103] Support legacy requests formats --- cvat/apps/engine/background.py | 10 +- cvat/apps/engine/permissions.py | 5 +- cvat/apps/engine/rq.py | 13 ++- cvat/apps/engine/views.py | 6 +- cvat/apps/events/export.py | 8 +- cvat/apps/quality_control/permissions.py | 2 + cvat/apps/quality_control/quality_reports.py | 15 ++- cvat/apps/redis_handler/apps.py | 18 +++- cvat/apps/redis_handler/rq.py | 107 +++++++++++++++---- cvat/apps/redis_handler/views.py | 8 +- 10 files changed, 148 insertions(+), 44 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index dd8da3e9e6bc..c0e7d4f208b2 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -123,7 +123,9 @@ def build_request_id(self): def validate_request_id(self, request_id, /, queue_name) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) - parsed_request_id: ExportRequestId = ExportRequestId.parse(request_id, queue=queue_name) + parsed_request_id: ExportRequestId = ExportRequestId.parse( + request_id, queue=queue_name, try_legacy_format=True + ) if ( parsed_request_id.action != RequestAction.EXPORT @@ -194,7 +196,9 @@ class BackupExporter(AbstractExporter): def validate_request_id(self, request_id, /, queue_name) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) - parsed_request_id: ExportRequestId = ExportRequestId.parse(request_id, queue=queue_name) + parsed_request_id: ExportRequestId = ExportRequestId.parse( + request_id, queue=queue_name, try_legacy_format=True + ) if ( parsed_request_id.action != RequestAction.EXPORT @@ -534,7 +538,7 @@ def job_failure_ttl(self): return int(settings.IMPORT_CACHE_FAILED_TTL.total_seconds()) def build_request_id(self): - return RequestId( + return ImportRequestId( action=RequestAction.CREATE, target=RequestTarget.TASK, target_id=self.db_instance.pk, diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index b870121cf9c5..f3fefa133435 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -54,9 +54,12 @@ class Scopes(StrEnum): @staticmethod def extend_params_with_rq_job_details(*, request: ExtendedRequest, params: dict[str, Any]) -> None: + # prevent importing from partially initialized module + from cvat.apps.redis_handler.background import AbstractExporter + if rq_id := request.query_params.get("rq_id"): try: - params["rq_job_id"] = ExportRequestId.parse(rq_id)[0] + params["rq_job_id"] = ExportRequestId.parse(rq_id, queue=AbstractExporter.QUEUE_NAME, try_legacy_format=True) return except Exception: raise ValidationError("Unexpected request id format") diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 48373b7a8e1e..5093a27dd8e4 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -339,7 +339,12 @@ class ExportRequestId( RequestIdWithFormatMixin, RequestId, ): - pass + LEGACY_FORMAT_PATTERNS = ( + r"export:(?P(task|project))-(?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})" + + r"-(?P(backup))-by-(?P\d+)", + r"export:(?P(project|task|job))-(?P\d+)-(?P(annotations|dataset))" + + r"-in-(?P[\w@]+)-format-by-(?P\d+)", + ) class ImportRequestId( @@ -347,7 +352,11 @@ class ImportRequestId( RequestIdWithFormatMixin, RequestId, ): - pass + LEGACY_FORMAT_PATTERNS = ( + r"create:task-(?P\d+)", + r"import:(?P(task|project|job))-(?P\d+)-(?P(annotations|dataset))", + r"import:(?P(task|project))-(?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})-(?P(backup))", + ) def define_dependent_job( diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 73e161ee1921..deaa7d739909 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -95,7 +95,7 @@ UserPermission, get_iam_context, ) -from cvat.apps.engine.rq import ImportRQMeta, RequestId, RQMetaWithFailureInfo +from cvat.apps.engine.rq import ImportRequestId, ImportRQMeta, RQMetaWithFailureInfo from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, @@ -814,7 +814,7 @@ def get_queryset(self): serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): - if request.query_params.get("rq_id"): # permissions? + if request.query_params.get("rq_id"): return get_410_response_when_checking_process_status("import") return self.upload_data(request) @@ -1294,7 +1294,7 @@ def status(self, request, pk): task = self.get_object() # force call of check_object_permissions() response = self._get_rq_response( queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - job_id=RequestId( + job_id=ImportRequestId( action=RequestAction.CREATE, target=RequestTarget.TASK, target_id=task.id diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 6cd0ef24799e..9d1b77e6cf05 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -24,7 +24,6 @@ from cvat.apps.engine.view_utils import DeprecatedResponse from cvat.apps.events.permissions import EventsPermission from cvat.apps.redis_handler.background import AbstractExporter -from cvat.apps.redis_handler.rq import RequestId slogger = ServerLogManager(__name__) @@ -99,7 +98,7 @@ def __init__( self.query_id = uuid.uuid4() def build_request_id(self): - return RequestId( + return ExportRequestId( action=RequestAction.EXPORT, target=TARGET, id=self.query_id, @@ -107,7 +106,10 @@ def build_request_id(self): ).render() def validate_request_id(self, request_id, /, queue_name) -> None: - parsed_request_id: ExportRequestId = ExportRequestId.parse(request_id, queue=queue_name) + parsed_request_id: ExportRequestId = ExportRequestId.parse( + request_id, + queue=queue_name, # try_legacy_format is not set here since deprecated API accepts query_id, not the whole Request ID + ) if parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != TARGET: raise ValueError("The provided request id does not match exported target") diff --git a/cvat/apps/quality_control/permissions.py b/cvat/apps/quality_control/permissions.py index 39af9fe3649c..c95de30d37c2 100644 --- a/cvat/apps/quality_control/permissions.py +++ b/cvat/apps/quality_control/permissions.py @@ -62,7 +62,9 @@ def create(cls, request, view, obj, iam_context): permissions.append(TaskPermission.create_scope_view(request, task=obj)) elif scope == Scopes.CREATE: # Note: POST /api/quality/reports is used to initiate report creation and to check the process status + # FUTURE-TODO: delete after several releases rq_id = request.query_params.get("rq_id") + # FUTURE-FIXME: use serializers for validation task_id = request.data.get("task_id") if not (task_id or rq_id): diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 1b136073be6d..840a8620f546 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2262,7 +2262,10 @@ def generate_report(self) -> ComparisonReport: class QualityRequestId(RequestIdWithSubresourceMixin, RequestId): - pass + LEGACY_FORMAT_PATTERNS = ( + r"quality-check-(?P(task))-(?P\d+)-user-(\d+)", # user id is excluded in the new format + ) + LEGACY_FORMAT_EXTRA = (("subresource", "quality"), ("action", "calculate")) class QualityReportRQJobManager(AbstractRequestManager): @@ -2273,6 +2276,16 @@ class QualityReportRQJobManager(AbstractRequestManager): def job_result_ttl(self): return 120 + def get_job_by_id(self, id_, /): + try: + id_ = QualityRequestId.parse( + id_, queue=self.QUEUE_NAME, try_legacy_format=True + ).render() + except ValueError: + raise ValidationError("Provider request ID is invalid") + + return super().get_job_by_id(id_) + def build_request_id(self): return QualityRequestId( action="calculate", diff --git a/cvat/apps/redis_handler/apps.py b/cvat/apps/redis_handler/apps.py index 0880fc82ef4a..621ef693bbc0 100644 --- a/cvat/apps/redis_handler/apps.py +++ b/cvat/apps/redis_handler/apps.py @@ -18,19 +18,27 @@ def __getitem__(self, key: str | tuple) -> str: return super().__getitem__(key) -MAPPING = LayeredKeyDict() +ACTION_TO_QUEUE = LayeredKeyDict() +QUEUE_TO_PARSED_JOB_ID_CLS = {} +PARSED_JOB_ID_CLS_TO_QUEUE = {} -def initialize_mapping(): +def initialize_mappings(): for queue_name, queue_conf in settings.RQ_QUEUES.items(): + # initialize ACTION_TO_QUEUE mapping if supported_actions := queue_conf.get("SUPPORTED_ACTIONS"): for action in supported_actions: if isinstance(action, str): - MAPPING[action] = queue_name + ACTION_TO_QUEUE[action] = queue_name continue assert isinstance(action, tuple) - MAPPING[action] = queue_name + ACTION_TO_QUEUE[action] = queue_name + + # initialize QUEUE_TO_PARSED_JOB_ID_CLS/PARSED_JOB_ID_CLS_TO_QUEUE mappings + if parsed_job_id_cls := queue_conf.get("PARSED_JOB_ID_CLASS"): + QUEUE_TO_PARSED_JOB_ID_CLS[queue_name] = parsed_job_id_cls + PARSED_JOB_ID_CLS_TO_QUEUE[parsed_job_id_cls] = queue_name class RedisHandlerConfig(AppConfig): @@ -40,4 +48,4 @@ def ready(self) -> None: from cvat.apps.iam.permissions import load_app_permissions load_app_permissions(self) - initialize_mapping() + initialize_mappings() diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index ff109f2be4ac..6b9e63cb59fe 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re import urllib.parse from contextlib import suppress from types import NoneType @@ -7,11 +8,16 @@ from uuid import UUID import attrs -from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.utils.html import escape from django.utils.module_loading import import_string from rq.job import Job as RQJob -from cvat.apps.redis_handler.apps import MAPPING +from cvat.apps.redis_handler.apps import ( + ACTION_TO_QUEUE, + PARSED_JOB_ID_CLS_TO_QUEUE, + QUEUE_TO_PARSED_JOB_ID_CLS, +) def convert_extra(value: dict) -> dict[str, Any]: @@ -68,6 +74,11 @@ class RequestId: TYPE_SEP: ClassVar[str] = ":" # used in serialization logic + # FUTURE-TODO: remove after several releases + # backward compatibility with previous ID formats + LEGACY_FORMAT_PATTERNS: ClassVar[tuple[str]] = () + LEGACY_FORMAT_EXTRA: tuple[tuple[str, str]] = () + action: str = attrs.field(validator=attrs.validators.instance_of(str)) target: str = attrs.field(validator=attrs.validators.instance_of(str)) target_id: int | None = attrs.field( @@ -123,49 +134,99 @@ def render(self) -> str: @classmethod @overload - def parse(cls, request_id: str, /, *, queue: str) -> RequestId: ... + def parse( + cls, request_id: str, /, *, queue: str, try_legacy_format: bool = False + ) -> RequestId: ... @classmethod @overload - def parse(cls, request_id: str, /, *, queue: None = None) -> tuple[RequestId, str]: ... + def parse( + cls, request_id: str, /, *, queue: None = None, try_legacy_format: bool = False + ) -> tuple[RequestId, str]: ... @classmethod def parse( - cls, request_id: str, /, *, queue: str | None = None + cls, + request_id: str, + /, + *, + queue: str | None = None, + try_legacy_format: bool = False, ) -> RequestId | tuple[RequestId, str]: class _RequestIdForMapping(RequestIdWithOptionalSubresourceMixin, RequestId): pass - try: - common_keys = set(attrs.fields_dict(cls).keys()) - {"extra"} - params = {} + queue_provided = bool(queue) + is_new_format = True + common_keys = set(attrs.fields_dict(cls).keys()) - {"extra"} + dict_repr = {} + fragments = {} + + actual_cls = cls - for key, value in dict(urllib.parse.parse_qsl(request_id)).items(): + try: + # try to parse ID as key=value pairs (newly introduced format) + fragments = dict(urllib.parse.parse_qsl(request_id)) + + if not fragments: + # try to use legacy format + if not try_legacy_format: + raise IncorrectRequestIdError( + f"Unable to parse request ID: {escape(request_id)!r}" + ) + + match: re.Match | None = None + subclasses = (cls,) if cls is not RequestId else RequestId.__subclasses__() + + for subclass in subclasses: + for pattern in subclass.LEGACY_FORMAT_PATTERNS: + match = re.match(pattern, request_id) + if match: + break + if match: + break + + if not match: + raise IncorrectRequestIdError( + f"Unable to parse request ID: {escape(request_id)!r}" + ) + + is_new_format = False + fragments = {**match.groupdict(), **dict(subclass.LEGACY_FORMAT_EXTRA)} + + queue = PARSED_JOB_ID_CLS_TO_QUEUE.get(f"{subclass.__module__}.{subclass.__name__}") + if not queue: + raise ImproperlyConfigured( + "Job ID class must be set in the related queue config" + ) + actual_cls = subclass + + # init dict representation for request ID + for key, value in fragments.items(): for from_char, to_char in cls.DECODE_MAPPING.items(): if from_char in value: value = value.replace(from_char, to_char) if key in common_keys: - params[key] = value + dict_repr[key] = value else: - params.setdefault("extra", {})[key] = value + dict_repr.setdefault("extra", {})[key] = value - queue_is_known = bool(queue) + if is_new_format: + # try to define queue dynamically based on action/target/subresource + if not queue_provided: + _parsed = _RequestIdForMapping(**dict_repr) + queue = ACTION_TO_QUEUE[(_parsed.action, _parsed.target, _parsed.subresource)] - if not queue_is_known: - _parsed = _RequestIdForMapping(**params) - queue = MAPPING[(_parsed.action, _parsed.target, _parsed.subresource)] + if actual_cls_path := QUEUE_TO_PARSED_JOB_ID_CLS.get(queue): + actual_cls = import_string(actual_cls_path) - if custom_cls_path := settings.RQ_QUEUES[queue].get("PARSED_JOB_ID_CLASS"): - custom_cls = import_string(custom_cls_path) - result = custom_cls(**params) - else: - result = cls(**params) + result = actual_cls(**dict_repr) - if not queue_is_known: - result = (result, queue) + return (result, queue) if not queue_provided else result - return result + except ImproperlyConfigured: + raise except Exception as ex: raise IncorrectRequestIdError from ex diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index ab957e0e1f70..1d93990b447f 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -26,7 +26,7 @@ from cvat.apps.engine.models import RequestStatus # todo: move to the app from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.apps import MAPPING +from cvat.apps.redis_handler.apps import ACTION_TO_QUEUE from cvat.apps.redis_handler.rq import CustomRQJob, RequestId from cvat.apps.redis_handler.serializers import RequestSerializer @@ -108,7 +108,7 @@ def get_queryset(self): @property def queues(self) -> Iterable[DjangoRQ]: - return (django_rq.get_queue(queue_name) for queue_name in set(MAPPING.values())) + return (django_rq.get_queue(queue_name) for queue_name in set(ACTION_TO_QUEUE.values())) def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: job_ids = set( @@ -161,7 +161,9 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: Optional[RQJob]: The retrieved RQJob, or None if not found. """ try: - parsed_request_id, queue_name = RequestId.parse(rq_id) + parsed_request_id, queue_name = RequestId.parse(rq_id, try_legacy_format=True) + # TODO: return flag that legacy format is used + rq_id = parsed_request_id.render() except Exception: return None From a368dba4da1432674c23fe1d061ccbb3be87a336 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 18 Apr 2025 14:31:27 +0200 Subject: [PATCH 075/103] pylint && typo --- cvat/apps/engine/background.py | 1 - cvat/apps/quality_control/quality_reports.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index c0e7d4f208b2..8f99699f6aa5 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -58,7 +58,6 @@ ) from cvat.apps.events.handlers import handle_dataset_export, handle_dataset_import from cvat.apps.redis_handler.background import AbstractExporter, AbstractRequestManager -from cvat.apps.redis_handler.rq import RequestId slogger = ServerLogManager(__name__) diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 840a8620f546..9c5dd0c8d2b5 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2282,7 +2282,7 @@ def get_job_by_id(self, id_, /): id_, queue=self.QUEUE_NAME, try_legacy_format=True ).render() except ValueError: - raise ValidationError("Provider request ID is invalid") + raise ValidationError("Provided request ID is invalid") return super().get_job_by_id(id_) From f9525a1f4883baada73bb8882805cd22b9c7e622 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 21 Apr 2025 13:24:14 +0200 Subject: [PATCH 076/103] resource -> target --- cvat/apps/consensus/merging_manager.py | 4 +- cvat/apps/engine/background.py | 44 ++++++++++---------- cvat/apps/engine/views.py | 4 +- cvat/apps/quality_control/quality_reports.py | 4 +- cvat/apps/redis_handler/background.py | 10 ++--- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index 09014e34c976..425a01b0b9cf 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -158,7 +158,7 @@ class MergingNotAvailable(Exception): class MergingManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.CONSENSUS.value - SUPPORTED_RESOURCES = {RequestTarget.TASK, RequestTarget.JOB} + SUPPORTED_TARGETS = {RequestTarget.TASK, RequestTarget.JOB} @property def job_result_ttl(self): @@ -167,7 +167,7 @@ def job_result_ttl(self): def build_request_id(self) -> str: return RequestId( action="merge", - target=self.resource, + target=self.target, target_id=self.db_instance.pk, ).render() diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 8f99699f6aa5..4d4e5f007ac6 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -75,7 +75,7 @@ def cancel_and_delete(rq_job: RQJob) -> None: class DatasetExporter(AbstractExporter): - SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} + SUPPORTED_TARGETS = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} @dataclass class ExportArgs(AbstractExporter.ExportArgs): @@ -107,7 +107,7 @@ def validate_request(self): def build_request_id(self): return ExportRequestId( action=RequestAction.EXPORT, - target=RequestTarget(self.resource), + target=RequestTarget(self.target), target_id=self.db_instance.pk, user_id=self.user_id, extra={ @@ -128,12 +128,12 @@ def validate_request_id(self, request_id, /, queue_name) -> None: if ( parsed_request_id.action != RequestAction.EXPORT - or parsed_request_id.target != RequestTarget(self.resource) + or parsed_request_id.target != RequestTarget(self.target) or parsed_request_id.target_id != self.db_instance.pk or parsed_request_id.subresource not in {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} ): - raise ValueError("The provided request id does not match exported target or resource") + raise ValueError("The provided request id does not match exported target or subresource") def _init_callback_with_params(self): self.callback = get_export_callback( @@ -166,7 +166,7 @@ def get_result_filename(self) -> str: if not filename: timestamp = self.get_file_timestamp() filename = build_annotations_file_name( - class_name=self.resource, + class_name=self.target, identifier=self.db_instance.pk, timestamp=timestamp, format_name=self.export_args.format, @@ -178,12 +178,12 @@ def get_result_filename(self) -> str: def where_to_redirect(self) -> str: return reverse( - f"{self.resource}-download-dataset", args=[self.db_instance.pk], request=self.request + f"{self.target}-download-dataset", args=[self.db_instance.pk], request=self.request ) class BackupExporter(AbstractExporter): - SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} + SUPPORTED_TARGETS = {RequestTarget.PROJECT, RequestTarget.TASK} # def validate_request(self): # super().validate_request() @@ -201,11 +201,11 @@ def validate_request_id(self, request_id, /, queue_name) -> None: if ( parsed_request_id.action != RequestAction.EXPORT - or parsed_request_id.target != RequestTarget(self.resource) + or parsed_request_id.target != RequestTarget(self.target) or parsed_request_id.target_id != self.db_instance.pk or parsed_request_id.subresource != RequestSubresource.BACKUP ): - raise ValueError("The provided request id does not match exported target or resource") + raise ValueError("The provided request id does not match exported target or subresource") def _init_callback_with_params(self): self.callback = create_backup @@ -231,7 +231,7 @@ def get_result_filename(self): instance_timestamp = self.get_file_timestamp() filename = build_backup_file_name( - class_name=self.resource, + class_name=self.target, identifier=self.db_instance.name, timestamp=instance_timestamp, ) @@ -241,7 +241,7 @@ def get_result_filename(self): def build_request_id(self): return ExportRequestId( action=RequestAction.EXPORT, - target=RequestTarget(self.resource), + target=RequestTarget(self.target), target_id=self.db_instance.pk, user_id=self.user_id, extra={ @@ -251,7 +251,7 @@ def build_request_id(self): def where_to_redirect(self) -> str: return reverse( - f"{self.resource}-download-backup", args=[self.db_instance.pk], request=self.request + f"{self.target}-download-backup", args=[self.db_instance.pk], request=self.request ) def finalize_request(self): @@ -371,7 +371,7 @@ def init_callback_with_params(self): class DatasetImporter(ResourceImporter): - SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} + SUPPORTED_TARGETS = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} @dataclass class ImportArgs(ResourceImporter.ImportArgs): @@ -442,7 +442,7 @@ def validate_request(self): def build_request_id(self): return ImportRequestId( action=RequestAction.IMPORT, - target=RequestTarget(self.resource), + target=RequestTarget(self.target), target_id=self.db_instance.pk, extra={ "subresource": ( @@ -462,7 +462,7 @@ def finalize_request(self): class BackupImporter(ResourceImporter): - SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} + SUPPORTED_TARGETS = {RequestTarget.PROJECT, RequestTarget.TASK} @dataclass class ImportArgs(ResourceImporter.ImportArgs): @@ -472,11 +472,11 @@ def __init__( self, *, request: ExtendedRequest, - resource: RequestTarget, + target: RequestTarget, ): super().__init__(request=request, db_instance=None, tmp_dir=Path(TmpDirManager.TMP_ROOT)) - assert resource in self.SUPPORTED_RESOURCES, f"Unsupported resource: {resource}" - self.resource = resource + assert target in self.SUPPORTED_TARGETS, f"Unsupported target: {target}" + self.target = target def init_request_args(self) -> None: super().init_request_args() @@ -489,7 +489,7 @@ def init_request_args(self) -> None: def build_request_id(self): return ImportRequestId( action=RequestAction.IMPORT, - target=self.resource, + target=self.target, id=uuid4(), extra={ "subresource": RequestSubresource.BACKUP, @@ -498,7 +498,7 @@ def build_request_id(self): def _get_payload_file(self): # Common serializer is not used to not break API - if self.resource == RequestTarget.PROJECT: + if self.target == RequestTarget.PROJECT: serializer_class = ProjectFileSerializer file_field = "project_file" else: @@ -510,7 +510,7 @@ def _get_payload_file(self): return file_serializer.validated_data[file_field] def _init_callback_with_params(self): - self.callback = import_project if self.resource == RequestTarget.PROJECT else import_task + self.callback = import_project if self.target == RequestTarget.PROJECT else import_task self.callback_args = (self.import_args.file_path, self.user_id, self.import_args.org_id) def finalize_request(self): @@ -520,7 +520,7 @@ def finalize_request(self): class TaskCreator(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.IMPORT_DATA.value - SUPPORTED_RESOURCES = {RequestTarget.TASK} + SUPPORTED_TARGETS = {RequestTarget.TASK} def __init__( self, diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index deaa7d739909..e85831297eb1 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -418,7 +418,7 @@ def upload_finished(self, request: ExtendedRequest): return importer.enqueue_job() elif self.action == 'import_backup': - importer = BackupImporter(request=request, resource=RequestTarget.PROJECT) + importer = BackupImporter(request=request, target=RequestTarget.PROJECT) return importer.enqueue_job() return Response(data='Unknown upload was finished', @@ -1013,7 +1013,7 @@ def _handle_upload_data(request: ExtendedRequest): @transaction.atomic def _handle_upload_backup(request: ExtendedRequest): - importer = BackupImporter(request=request, resource=RequestTarget.TASK) + importer = BackupImporter(request=request, target=RequestTarget.TASK) return importer.enqueue_job() if self.action == 'annotations': diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 9c5dd0c8d2b5..79803daea127 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -2270,7 +2270,7 @@ class QualityRequestId(RequestIdWithSubresourceMixin, RequestId): class QualityReportRQJobManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.QUALITY_REPORTS.value - SUPPORTED_RESOURCES: ClassVar[set[RequestTarget]] = {RequestTarget.TASK} + SUPPORTED_TARGETS: ClassVar[set[RequestTarget]] = {RequestTarget.TASK} @property def job_result_ttl(self): @@ -2289,7 +2289,7 @@ def get_job_by_id(self, id_, /): def build_request_id(self): return QualityRequestId( action="calculate", - target=self.resource, + target=self.target, target_id=self.db_instance.pk, extra={"subresource": "quality"}, ).render() diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 6b06e0cad182..ba9c0a31dd7a 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -47,7 +47,7 @@ class AbstractRequestManager(metaclass=ABCMeta): - SUPPORTED_RESOURCES: ClassVar[set[RequestTarget] | None] = None + SUPPORTED_TARGETS: ClassVar[set[RequestTarget] | None] = None QUEUE_NAME: ClassVar[str] REQUEST_ID_KEY = "rq_id" @@ -66,11 +66,11 @@ def __init__( self.db_instance = db_instance if db_instance: - assert self.SUPPORTED_RESOURCES, "Should be defined" - self.resource = RequestTarget(db_instance.__class__.__name__.lower()) + assert self.SUPPORTED_TARGETS, "Should be defined" + self.target = RequestTarget(db_instance.__class__.__name__.lower()) assert ( - self.resource in self.SUPPORTED_RESOURCES - ), f"Unsupported resource: {self.resource}" + self.target in self.SUPPORTED_TARGETS + ), f"Unsupported target: {self.target}" @classmethod def get_queue(cls) -> DjangoRQ: From 023cb1c809e0ac9f184348096f6fd540d617963b Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 21 Apr 2025 13:27:49 +0200 Subject: [PATCH 077/103] Remove queue_name arg for the validate_request_id method --- cvat/apps/engine/background.py | 8 ++++---- cvat/apps/events/export.py | 4 ++-- cvat/apps/redis_handler/background.py | 13 +++++-------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 4d4e5f007ac6..f866d722613a 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -120,10 +120,10 @@ def build_request_id(self): }, ).render() - def validate_request_id(self, request_id, /, queue_name) -> None: + def validate_request_id(self, request_id, /) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) parsed_request_id: ExportRequestId = ExportRequestId.parse( - request_id, queue=queue_name, try_legacy_format=True + request_id, queue=self.QUEUE_NAME, try_legacy_format=True ) if ( @@ -193,10 +193,10 @@ class BackupExporter(AbstractExporter): # elif isinstance(self.db_instance, Project) and Data.objects.filter(): # pass - def validate_request_id(self, request_id, /, queue_name) -> None: + def validate_request_id(self, request_id, /) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) parsed_request_id: ExportRequestId = ExportRequestId.parse( - request_id, queue=queue_name, try_legacy_format=True + request_id, queue=self.QUEUE_NAME, try_legacy_format=True ) if ( diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 9d1b77e6cf05..65e0af684753 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -105,10 +105,10 @@ def build_request_id(self): user_id=self.user_id, ).render() - def validate_request_id(self, request_id, /, queue_name) -> None: + def validate_request_id(self, request_id, /) -> None: parsed_request_id: ExportRequestId = ExportRequestId.parse( request_id, - queue=queue_name, # try_legacy_format is not set here since deprecated API accepts query_id, not the whole Request ID + queue=self.QUEUE_NAME, # try_legacy_format is not set here since deprecated API accepts query_id, not the whole Request ID ) if parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != TARGET: diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index ba9c0a31dd7a..d6df35179717 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -95,16 +95,15 @@ def job_failed_ttl(self) -> int | None: @abstractmethod def build_request_id(self): ... - def validate_request_id(self, request_id: str, /, queue_name: str) -> None: ... + def validate_request_id(self, request_id: str, /) -> None: ... def get_job_by_id(self, id_: str, /) -> RQJob | None: - queue = self.get_queue() - try: - self.validate_request_id(id_, queue_name=queue.name) + self.validate_request_id(id_) except Exception: return None + queue = self.get_queue() return queue.fetch_job(id_) def init_request_args(self): @@ -362,13 +361,11 @@ def get_downloader(self): request_id = self.request.query_params.get(self.REQUEST_ID_KEY) if not request_id: - # TODO: check response content type raise ValidationError("Missing request id in the query parameters") - queue = self.get_queue() try: - self.validate_request_id(request_id, queue_name=queue.name) + self.validate_request_id(request_id) except ValueError: raise ValidationError("Invalid export request id") - return self.Downloader(request=self.request, queue=queue, request_id=request_id) + return self.Downloader(request=self.request, queue=self.get_queue(), request_id=request_id) From 69e0ed3a73d962528ea4e5e2caab55ffad893ca9 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 21 Apr 2025 13:29:55 +0200 Subject: [PATCH 078/103] Add return type for the build_meta method --- cvat/apps/redis_handler/background.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index d6df35179717..776c01f21554 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -157,7 +157,7 @@ def handle_existing_job(self, job: RQJob | None, queue: DjangoRQ) -> Response | job.delete() return None - def build_meta(self, *, request_id: str): + def build_meta(self, *, request_id: str) -> dict[str, Any]: return BaseRQMeta.build(request=self.request, db_obj=self.db_instance) def setup_new_job(self, queue: DjangoRQ, request_id: str, /, **kwargs): From e75917b798de7c277110ced6320fb10462a326c1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 22 Apr 2025 16:57:34 +0200 Subject: [PATCH 079/103] Apply comments --- cvat/apps/consensus/merging_manager.py | 4 +- cvat/apps/consensus/rq.py | 14 ++ cvat/apps/engine/background.py | 54 ++--- cvat/apps/engine/mixins.py | 4 +- cvat/apps/engine/permissions.py | 2 +- cvat/apps/engine/rq.py | 43 ++-- cvat/apps/events/export.py | 7 +- cvat/apps/quality_control/quality_reports.py | 15 +- cvat/apps/quality_control/rq.py | 27 +++ cvat/apps/redis_handler/apps.py | 29 +-- cvat/apps/redis_handler/background.py | 13 +- cvat/apps/redis_handler/rq.py | 230 ++++++++++--------- cvat/apps/redis_handler/serializers.py | 4 +- cvat/apps/redis_handler/views.py | 5 +- cvat/settings/base.py | 11 +- 15 files changed, 259 insertions(+), 203 deletions(-) create mode 100644 cvat/apps/consensus/rq.py create mode 100644 cvat/apps/quality_control/rq.py diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index 425a01b0b9cf..542948e235fa 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -12,6 +12,7 @@ from cvat.apps.consensus.intersect_merge import IntersectMerge from cvat.apps.consensus.models import ConsensusSettings +from cvat.apps.consensus.rq import ConsensusRequestId from cvat.apps.dataset_manager.bindings import import_dm_annotations from cvat.apps.dataset_manager.task import PatchAction, patch_job_data from cvat.apps.engine.models import ( @@ -165,8 +166,7 @@ def job_result_ttl(self): return 300 def build_request_id(self) -> str: - return RequestId( - action="merge", + return ConsensusRequestId( target=self.target, target_id=self.db_instance.pk, ).render() diff --git a/cvat/apps/consensus/rq.py b/cvat/apps/consensus/rq.py new file mode 100644 index 000000000000..8243aa9b61bf --- /dev/null +++ b/cvat/apps/consensus/rq.py @@ -0,0 +1,14 @@ +# Copyright (C) CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from typing import ClassVar + +from cvat.apps.redis_handler.rq import RequestId + + +class ConsensusRequestId(RequestId): + ACTION_DEFAULT_VALUE: ClassVar[str] = "merge" + ACTION_ALLOWED_VALUES: ClassVar[tuple[str]] = (ACTION_DEFAULT_VALUE,) + + QUEUE_SELECTORS: ClassVar[tuple[str]] = ACTION_ALLOWED_VALUES diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index f866d722613a..fead28291be3 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -110,20 +110,18 @@ def build_request_id(self): target=RequestTarget(self.target), target_id=self.db_instance.pk, user_id=self.user_id, - extra={ - "subresource": ( - RequestSubresource.DATASET - if self.export_args.save_images - else RequestSubresource.ANNOTATIONS - ), - "format": self.export_args.format, - }, + subresource=( + RequestSubresource.DATASET + if self.export_args.save_images + else RequestSubresource.ANNOTATIONS + ), + format=self.export_args.format, ).render() def validate_request_id(self, request_id, /) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) - parsed_request_id: ExportRequestId = ExportRequestId.parse( - request_id, queue=self.QUEUE_NAME, try_legacy_format=True + parsed_request_id: ExportRequestId = ExportRequestId.parse_and_validate_queue( + request_id, expected_queue=self.QUEUE_NAME, try_legacy_format=True ) if ( @@ -133,7 +131,9 @@ def validate_request_id(self, request_id, /) -> None: or parsed_request_id.subresource not in {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} ): - raise ValueError("The provided request id does not match exported target or subresource") + raise ValueError( + "The provided request id does not match exported target or subresource" + ) def _init_callback_with_params(self): self.callback = get_export_callback( @@ -176,7 +176,7 @@ def get_result_filename(self) -> str: return filename - def where_to_redirect(self) -> str: + def get_result_endpoint_url(self) -> str: return reverse( f"{self.target}-download-dataset", args=[self.db_instance.pk], request=self.request ) @@ -195,8 +195,8 @@ class BackupExporter(AbstractExporter): def validate_request_id(self, request_id, /) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) - parsed_request_id: ExportRequestId = ExportRequestId.parse( - request_id, queue=self.QUEUE_NAME, try_legacy_format=True + parsed_request_id: ExportRequestId = ExportRequestId.parse_and_validate_queue( + request_id, expected_queue=self.QUEUE_NAME, try_legacy_format=True ) if ( @@ -205,7 +205,9 @@ def validate_request_id(self, request_id, /) -> None: or parsed_request_id.target_id != self.db_instance.pk or parsed_request_id.subresource != RequestSubresource.BACKUP ): - raise ValueError("The provided request id does not match exported target or subresource") + raise ValueError( + "The provided request id does not match exported target or subresource" + ) def _init_callback_with_params(self): self.callback = create_backup @@ -244,12 +246,10 @@ def build_request_id(self): target=RequestTarget(self.target), target_id=self.db_instance.pk, user_id=self.user_id, - extra={ - "subresource": RequestSubresource.BACKUP, - }, + subresource=RequestSubresource.BACKUP, ).render() - def where_to_redirect(self) -> str: + def get_result_endpoint_url(self) -> str: return reverse( f"{self.target}-download-backup", args=[self.db_instance.pk], request=self.request ) @@ -444,13 +444,11 @@ def build_request_id(self): action=RequestAction.IMPORT, target=RequestTarget(self.target), target_id=self.db_instance.pk, - extra={ - "subresource": ( - RequestSubresource.DATASET - if isinstance(self.db_instance, Project) - else RequestSubresource.ANNOTATIONS - ), - }, + subresource=( + RequestSubresource.DATASET + if isinstance(self.db_instance, Project) + else RequestSubresource.ANNOTATIONS + ), ).render() def finalize_request(self): @@ -491,9 +489,7 @@ def build_request_id(self): action=RequestAction.IMPORT, target=self.target, id=uuid4(), - extra={ - "subresource": RequestSubresource.BACKUP, - }, + subresource=RequestSubresource.BACKUP, ).render() def _get_payload_file(self): diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 7052a5b30d98..ab1cb10dee7d 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -273,9 +273,7 @@ def init_tus_upload(self, request: ExtendedRequest): action=RequestAction.IMPORT, target=RequestTarget(object_class_name), target_id=self._object.pk, - extra={ - "subresource": import_type, - } + subresource=import_type, ).render() queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) finished_job_ids = queue.finished_job_registry.get_job_ids() diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index f3fefa133435..a39b6a8cab7c 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -59,7 +59,7 @@ def extend_params_with_rq_job_details(*, request: ExtendedRequest, params: dict[ if rq_id := request.query_params.get("rq_id"): try: - params["rq_job_id"] = ExportRequestId.parse(rq_id, queue=AbstractExporter.QUEUE_NAME, try_legacy_format=True) + params["rq_job_id"] = ExportRequestId.parse_and_validate_queue(rq_id, expected_queue=AbstractExporter.QUEUE_NAME, try_legacy_format=True) return except Exception: raise ValidationError("Unexpected request id format") diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index 5093a27dd8e4..ea821d3abe28 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -5,8 +5,10 @@ from __future__ import annotations from abc import ABCMeta, abstractmethod -from typing import TYPE_CHECKING, Any, Callable, Optional, Protocol +from types import NoneType +from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Protocol +import attrs from django.conf import settings from django.db.models import Model from django.utils import timezone @@ -16,7 +18,7 @@ from rq.registry import BaseRegistry as RQBaseRegistry from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.rq import RequestId, RequestIdWithOptionalSubresourceMixin +from cvat.apps.redis_handler.rq import RequestId, RequestIdWithOptionalSubresource if TYPE_CHECKING: from django.contrib.auth.models import User @@ -326,20 +328,26 @@ def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: return False -class RequestIdWithFormatMixin: - extra: dict[str, Any] - - @property - def format(self) -> str | None: - return self.extra.get("format") +@attrs.frozen(kw_only=True, slots=False) +class RequestIdWithOptionalFormat(RequestId): + format: str | None = attrs.field( + validator=attrs.validators.instance_of((str, NoneType)), default=None + ) +@attrs.frozen(kw_only=True, slots=False) class ExportRequestId( - RequestIdWithOptionalSubresourceMixin, # subresource is optional because export queue works also with events - RequestIdWithFormatMixin, - RequestId, + RequestIdWithOptionalSubresource, # subresource is optional because export queue works also with events + RequestIdWithOptionalFormat, ): - LEGACY_FORMAT_PATTERNS = ( + ACTION_DEFAULT_VALUE: ClassVar[str] = "export" + ACTION_ALLOWED_VALUES: ClassVar[tuple[str]] = (ACTION_DEFAULT_VALUE,) + + SUBRESOURCE_ALLOWED_VALUES: ClassVar[tuple[str]] = ("backup", "dataset", "annotations") + QUEUE_SELECTORS: ClassVar[tuple[str]] = ACTION_ALLOWED_VALUES + + # will be deleted after several releases + LEGACY_FORMAT_PATTERNS: ClassVar[tuple[str]] = ( r"export:(?P(task|project))-(?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})" + r"-(?P(backup))-by-(?P\d+)", r"export:(?P(project|task|job))-(?P\d+)-(?P(annotations|dataset))" @@ -347,11 +355,16 @@ class ExportRequestId( ) +@attrs.frozen(kw_only=True, slots=False) class ImportRequestId( - RequestIdWithOptionalSubresourceMixin, # subresource is optional because import queue works also with backups/task creation jobs - RequestIdWithFormatMixin, - RequestId, + RequestIdWithOptionalSubresource, # subresource is optional because import queue works also with task creation jobs + RequestIdWithOptionalFormat, ): + ACTION_ALLOWED_VALUES: ClassVar[tuple[str]] = ("create", "import") + SUBRESOURCE_ALLOWED_VALUES: ClassVar[tuple[str]] = ("backup", "dataset", "annotations") + QUEUE_SELECTORS: ClassVar[tuple[str]] = ACTION_ALLOWED_VALUES + + # will be deleted after several releases LEGACY_FORMAT_PATTERNS = ( r"create:task-(?P\d+)", r"import:(?P(task|project|job))-(?P\d+)-(?P(annotations|dataset))", diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 65e0af684753..2d98793e9193 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -99,16 +99,15 @@ def __init__( def build_request_id(self): return ExportRequestId( - action=RequestAction.EXPORT, target=TARGET, id=self.query_id, user_id=self.user_id, ).render() def validate_request_id(self, request_id, /) -> None: - parsed_request_id: ExportRequestId = ExportRequestId.parse( + parsed_request_id: ExportRequestId = ExportRequestId.parse_and_validate_queue( request_id, - queue=self.QUEUE_NAME, # try_legacy_format is not set here since deprecated API accepts query_id, not the whole Request ID + expected_queue=self.QUEUE_NAME, # try_legacy_format is not set here since deprecated API accepts query_id, not the whole Request ID ) if parsed_request_id.action != RequestAction.EXPORT or parsed_request_id.target != TARGET: @@ -164,7 +163,7 @@ def _init_callback_with_params(self): ) self.callback_args = (query_params, output_filename) - def where_to_redirect(self) -> str: + def get_result_endpoint_url(self) -> str: return reverse("events-download-file", request=self.request) def get_result_filename(self): diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 79803daea127..64c90ac0a3a7 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -57,8 +57,8 @@ AnnotationConflictType, AnnotationType, ) +from cvat.apps.quality_control.rq import QualityRequestId from cvat.apps.redis_handler.background import AbstractRequestManager -from cvat.apps.redis_handler.rq import RequestId, RequestIdWithSubresourceMixin class Serializable: @@ -2261,13 +2261,6 @@ def generate_report(self) -> ComparisonReport: ) -class QualityRequestId(RequestIdWithSubresourceMixin, RequestId): - LEGACY_FORMAT_PATTERNS = ( - r"quality-check-(?P(task))-(?P\d+)-user-(\d+)", # user id is excluded in the new format - ) - LEGACY_FORMAT_EXTRA = (("subresource", "quality"), ("action", "calculate")) - - class QualityReportRQJobManager(AbstractRequestManager): QUEUE_NAME = settings.CVAT_QUEUES.QUALITY_REPORTS.value SUPPORTED_TARGETS: ClassVar[set[RequestTarget]] = {RequestTarget.TASK} @@ -2278,8 +2271,8 @@ def job_result_ttl(self): def get_job_by_id(self, id_, /): try: - id_ = QualityRequestId.parse( - id_, queue=self.QUEUE_NAME, try_legacy_format=True + id_ = QualityRequestId.parse_and_validate_queue( + id_, expected_queue=self.QUEUE_NAME, try_legacy_format=True ).render() except ValueError: raise ValidationError("Provided request ID is invalid") @@ -2288,10 +2281,8 @@ def get_job_by_id(self, id_, /): def build_request_id(self): return QualityRequestId( - action="calculate", target=self.target, target_id=self.db_instance.pk, - extra={"subresource": "quality"}, ).render() def validate_request(self): diff --git a/cvat/apps/quality_control/rq.py b/cvat/apps/quality_control/rq.py new file mode 100644 index 000000000000..0212045764eb --- /dev/null +++ b/cvat/apps/quality_control/rq.py @@ -0,0 +1,27 @@ +# Copyright (C) CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from typing import ClassVar + +import attrs + +from cvat.apps.redis_handler.rq import RequestIdWithSubresource + + +@attrs.frozen(kw_only=True, slots=False) +class QualityRequestId(RequestIdWithSubresource): + ACTION_DEFAULT_VALUE: ClassVar[str] = "calculate" + ACTION_ALLOWED_VALUES: ClassVar[tuple[str]] = (ACTION_DEFAULT_VALUE,) + + SUBRESOURCE_DEFAULT_VALUE: ClassVar[str] = "quality" + SUBRESOURCE_ALLOWED_VALUES: ClassVar[tuple[str]] = (SUBRESOURCE_DEFAULT_VALUE,) + + QUEUE_SELECTORS: ClassVar[tuple[tuple[str, str]]] = ( + (ACTION_DEFAULT_VALUE, SUBRESOURCE_DEFAULT_VALUE), + ) + + # will be deleted after several releases + LEGACY_FORMAT_PATTERNS = ( + r"quality-check-(?P(task))-(?P\d+)-user-(\d+)", # user id is excluded in the new format + ) diff --git a/cvat/apps/redis_handler/apps.py b/cvat/apps/redis_handler/apps.py index 621ef693bbc0..38b6fc60da25 100644 --- a/cvat/apps/redis_handler/apps.py +++ b/cvat/apps/redis_handler/apps.py @@ -7,6 +7,8 @@ from django.apps import AppConfig from django.conf import settings +from django.core.exceptions import ImproperlyConfigured +from django.utils.module_loading import import_string class LayeredKeyDict(dict): @@ -20,25 +22,24 @@ def __getitem__(self, key: str | tuple) -> str: ACTION_TO_QUEUE = LayeredKeyDict() QUEUE_TO_PARSED_JOB_ID_CLS = {} -PARSED_JOB_ID_CLS_TO_QUEUE = {} def initialize_mappings(): + from cvat.apps.redis_handler.rq import RequestId + for queue_name, queue_conf in settings.RQ_QUEUES.items(): - # initialize ACTION_TO_QUEUE mapping - if supported_actions := queue_conf.get("SUPPORTED_ACTIONS"): - for action in supported_actions: - if isinstance(action, str): - ACTION_TO_QUEUE[action] = queue_name - continue - - assert isinstance(action, tuple) - ACTION_TO_QUEUE[action] = queue_name - - # initialize QUEUE_TO_PARSED_JOB_ID_CLS/PARSED_JOB_ID_CLS_TO_QUEUE mappings - if parsed_job_id_cls := queue_conf.get("PARSED_JOB_ID_CLASS"): + if path_to_parsed_job_id_cls := queue_conf.get("PARSED_JOB_ID_CLASS"): + parsed_job_id_cls = import_string(path_to_parsed_job_id_cls) + + if not issubclass(parsed_job_id_cls, RequestId): + raise ImproperlyConfigured( + f"The {path_to_parsed_job_id_cls!r} must be inherited from the RequestId class" + ) + + for queue_selector in parsed_job_id_cls.QUEUE_SELECTORS: + ACTION_TO_QUEUE[queue_selector] = queue_name + QUEUE_TO_PARSED_JOB_ID_CLS[queue_name] = parsed_job_id_cls - PARSED_JOB_ID_CLS_TO_QUEUE[parsed_job_id_cls] = queue_name class RedisHandlerConfig(AppConfig): diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 776c01f21554..b719b3d90138 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -68,9 +68,7 @@ def __init__( if db_instance: assert self.SUPPORTED_TARGETS, "Should be defined" self.target = RequestTarget(db_instance.__class__.__name__.lower()) - assert ( - self.target in self.SUPPORTED_TARGETS - ), f"Unsupported target: {self.target}" + assert self.target in self.SUPPORTED_TARGETS, f"Unsupported target: {self.target}" @classmethod def get_queue(cls) -> DjangoRQ: @@ -219,7 +217,10 @@ def __init__( def validate_request(self): # prevent architecture bugs - assert "GET" == self.request.method, "Only GET requests can be used to download a file" + assert self.request.method in ( + "GET", + "HEAD", + ), "Only GET/HEAD requests can be used to download a file" def download_file(self) -> Response: self.validate_request() @@ -296,10 +297,10 @@ def job_failed_ttl(self): def get_result_filename(self) -> str: ... @abstractmethod - def where_to_redirect(self) -> str: ... + def get_result_endpoint_url(self) -> str: ... def make_result_url(self, *, request_id: str) -> str: - return self.where_to_redirect() + f"?{self.REQUEST_ID_KEY}={quote(request_id)}" + return self.get_result_endpoint_url() + f"?{self.REQUEST_ID_KEY}={quote(request_id)}" def get_file_timestamp(self) -> str: # use only updated_date for the related resource, don't check children objects diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 6b9e63cb59fe..a3331fffb45a 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -2,84 +2,69 @@ import re import urllib.parse -from contextlib import suppress from types import NoneType -from typing import Any, ClassVar, Protocol, overload +from typing import Any, ClassVar, Protocol from uuid import UUID import attrs from django.core.exceptions import ImproperlyConfigured from django.utils.html import escape -from django.utils.module_loading import import_string from rq.job import Job as RQJob -from cvat.apps.redis_handler.apps import ( - ACTION_TO_QUEUE, - PARSED_JOB_ID_CLS_TO_QUEUE, - QUEUE_TO_PARSED_JOB_ID_CLS, -) - - -def convert_extra(value: dict) -> dict[str, Any]: - assert isinstance(value, dict), f"Unexpected type: {type(value)}" - - for k, v in value.items(): - assert v - if not isinstance(v, str): - value[k] = str(v) - - return value +from cvat.apps.redis_handler.apps import ACTION_TO_QUEUE, QUEUE_TO_PARSED_JOB_ID_CLS class IncorrectRequestIdError(ValueError): pass -class RequestIdWithSubresourceMixin: - TYPE_SEP: ClassVar[str] +def _default_from_class_attr(attr_name: str): + def factory(self): + cls = type(self) + if attrs_value := getattr(cls, attr_name, None): + return attrs_value + raise AttributeError( + f"[{cls.__name__}] Unable to set default value for the {attr_name} attribute" + ) - action: str - target: str - extra: dict[str, Any] + return attrs.Factory(factory, takes_self=True) - @property - def subresource(self) -> str: - return self.extra["subresource"] - - @property - def type(self) -> str: - return self.TYPE_SEP.join([self.action, self.subresource or self.target]) - -class RequestIdWithOptionalSubresourceMixin(RequestIdWithSubresourceMixin): - @property - def subresource(self) -> str | None: - with suppress(KeyError): - return super().subresource - - return None - - -@attrs.frozen(kw_only=True) +@attrs.frozen(kw_only=True, slots=False) # to be able to inherit from RequestId class RequestId: - FIELD_SEP: ClassVar[str] = "&" - KEY_VAL_SEP: ClassVar[str] = "=" - + # https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 - ALPHA / DIGIT / "-" / "." / "_" / "~" + UNRESERVED_BY_RFC3986_SPECIAL_CHARACTERS: ClassVar[tuple[str]] = ("-", ".", "_", "~") ENCODE_MAPPING = { - ".": "@", + ".": "~", # dot is a default DRF path parameter pattern " ": "_", } - DECODE_MAPPING = {v: k for k, v in ENCODE_MAPPING.items()} - NOT_ALLOWED_CHARS = {FIELD_SEP, KEY_VAL_SEP, "/"} | set(DECODE_MAPPING.keys()) + # "&" and "=" characters are reserved sub-delims symbols, but request ID is going to be used only as path parameter + FIELD_SEP: ClassVar[str] = "&" + KEY_VAL_SEP: ClassVar[str] = "=" TYPE_SEP: ClassVar[str] = ":" # used in serialization logic - # FUTURE-TODO: remove after several releases - # backward compatibility with previous ID formats - LEGACY_FORMAT_PATTERNS: ClassVar[tuple[str]] = () - LEGACY_FORMAT_EXTRA: tuple[tuple[str, str]] = () + STR_WITH_UNRESERVED_SPECIAL_CHARACTERS: ClassVar[str] = "".join( + re.escape(c) + for c in ( + set(UNRESERVED_BY_RFC3986_SPECIAL_CHARACTERS) + - {FIELD_SEP, KEY_VAL_SEP, *ENCODE_MAPPING.values()} + ) + ) + VALIDATION_PATTERN: ClassVar[str] = rf"[\w{STR_WITH_UNRESERVED_SPECIAL_CHARACTERS}]+" + + action: str = attrs.field( + validator=attrs.validators.instance_of(str), + default=_default_from_class_attr("ACTION_DEFAULT_VALUE"), + ) + ACTION_ALLOWED_VALUES: ClassVar[tuple[str]] + QUEUE_SELECTORS: ClassVar[tuple] + + @action.validator + def validate_action(self, attribute: attrs.Attribute, value: Any): + if hasattr(self, "ACTION_ALLOWED_VALUES") and value not in self.ACTION_ALLOWED_VALUES: + raise ValueError(f"Action must be one of {self.ACTION_ALLOWED_VALUES!r}") - action: str = attrs.field(validator=attrs.validators.instance_of(str)) target: str = attrs.field(validator=attrs.validators.instance_of(str)) target_id: int | None = attrs.field( converter=lambda x: x if x is None else int(x), default=None @@ -89,9 +74,12 @@ class RequestId: converter=lambda x: x if isinstance(x, (NoneType, UUID)) else UUID(x), default=None, ) # operation id - extra: dict[str, str] = attrs.field(converter=convert_extra, factory=dict) user_id: int | None = attrs.field(converter=lambda x: x if x is None else int(x), default=None) + # FUTURE-TODO: remove after several releases + # backward compatibility with previous ID formats + LEGACY_FORMAT_PATTERNS: ClassVar[tuple[str]] = () + def __attrs_post_init__(self): assert ( sum(1 for i in (self.target_id, self.id) if i) == 1 @@ -102,21 +90,19 @@ def type(self) -> str: return self.TYPE_SEP.join([self.action, self.target]) def to_dict(self) -> dict[str, Any]: - base = attrs.asdict(self, filter=lambda _, v: bool(v)) - extra_data = base.pop("extra", {}) - return {**base, **extra_data} + return attrs.asdict(self, filter=lambda _, v: bool(v)) @classmethod def normalize(cls, repr_: dict[str, Any]) -> None: for key, value in repr_.items(): str_value = str(value) - for reserved in cls.NOT_ALLOWED_CHARS: - if reserved in str_value: - raise IncorrectRequestIdError(f"{key} contains special character: {reserved!r}") + if not re.match(cls.VALIDATION_PATTERN, str_value): + raise IncorrectRequestIdError( + f"{key} does not match allowed format: {cls.VALIDATION_PATTERN}" + ) for from_char, to_char in cls.ENCODE_MAPPING.items(): - if from_char in str_value: - str_value = str_value.replace(from_char, to_char) + str_value = str_value.replace(from_char, to_char) repr_[key] = str_value @@ -132,37 +118,25 @@ def render(self) -> str: # b. readable return self.FIELD_SEP.join([f"{k}{self.KEY_VAL_SEP}{v}" for k, v in rq_id_repr.items()]) - @classmethod - @overload - def parse( - cls, request_id: str, /, *, queue: str, try_legacy_format: bool = False - ) -> RequestId: ... - - @classmethod - @overload - def parse( - cls, request_id: str, /, *, queue: None = None, try_legacy_format: bool = False - ) -> tuple[RequestId, str]: ... - @classmethod def parse( cls, request_id: str, /, *, - queue: str | None = None, try_legacy_format: bool = False, - ) -> RequestId | tuple[RequestId, str]: - class _RequestIdForMapping(RequestIdWithOptionalSubresourceMixin, RequestId): - pass + ) -> tuple[RequestId, str]: - queue_provided = bool(queue) - is_new_format = True - common_keys = set(attrs.fields_dict(cls).keys()) - {"extra"} + actual_cls = cls + subclasses = set() + queue: str | None = None dict_repr = {} fragments = {} - actual_cls = cls + def init_subclasses(cur_cls: type[RequestId] = RequestId): + for subclass in cur_cls.__subclasses__(): + subclasses.add(subclass) + init_subclasses(subclass) try: # try to parse ID as key=value pairs (newly introduced format) @@ -176,60 +150,108 @@ class _RequestIdForMapping(RequestIdWithOptionalSubresourceMixin, RequestId): ) match: re.Match | None = None - subclasses = (cls,) if cls is not RequestId else RequestId.__subclasses__() + + if cls is RequestId: + init_subclasses() + else: + subclasses = (cls,) for subclass in subclasses: for pattern in subclass.LEGACY_FORMAT_PATTERNS: match = re.match(pattern, request_id) if match: + actual_cls = subclass break if match: break - - if not match: + else: raise IncorrectRequestIdError( f"Unable to parse request ID: {escape(request_id)!r}" ) - is_new_format = False - fragments = {**match.groupdict(), **dict(subclass.LEGACY_FORMAT_EXTRA)} - - queue = PARSED_JOB_ID_CLS_TO_QUEUE.get(f"{subclass.__module__}.{subclass.__name__}") + queue = ACTION_TO_QUEUE.get( + actual_cls.QUEUE_SELECTORS[0] + ) # each selector match the same queue if not queue: raise ImproperlyConfigured( "Job ID class must be set in the related queue config" ) - actual_cls = subclass + + fragments = match.groupdict() # init dict representation for request ID for key, value in fragments.items(): - for from_char, to_char in cls.DECODE_MAPPING.items(): - if from_char in value: - value = value.replace(from_char, to_char) + for to_char, from_char in cls.ENCODE_MAPPING.items(): + value = value.replace(from_char, to_char) - if key in common_keys: - dict_repr[key] = value - else: - dict_repr.setdefault("extra", {})[key] = value + dict_repr[key] = value - if is_new_format: + if not queue: # try to define queue dynamically based on action/target/subresource - if not queue_provided: - _parsed = _RequestIdForMapping(**dict_repr) - queue = ACTION_TO_QUEUE[(_parsed.action, _parsed.target, _parsed.subresource)] + queue = ACTION_TO_QUEUE[ + (dict_repr["action"], dict_repr["target"], dict_repr.get("subresource")) + ] - if actual_cls_path := QUEUE_TO_PARSED_JOB_ID_CLS.get(queue): - actual_cls = import_string(actual_cls_path) + if queue in QUEUE_TO_PARSED_JOB_ID_CLS: + actual_cls = QUEUE_TO_PARSED_JOB_ID_CLS[queue] result = actual_cls(**dict_repr) - return (result, queue) if not queue_provided else result + return (result, queue) except ImproperlyConfigured: raise except Exception as ex: raise IncorrectRequestIdError from ex + @classmethod + def parse_and_validate_queue( + cls, + request_id: str, + /, + *, + expected_queue: str, + try_legacy_format: bool = False, + ) -> RequestId: + parsed_request_id, queue = cls.parse(request_id, try_legacy_format=try_legacy_format) + assert queue == expected_queue + return parsed_request_id + + +@attrs.frozen(kw_only=True, slots=False) +class RequestIdWithSubresource(RequestId): + SUBRESOURCE_ALLOWED_VALUES: ClassVar[tuple[str]] + + subresource: str = attrs.field( + validator=attrs.validators.instance_of(str), + default=_default_from_class_attr("SUBRESOURCE_DEFAULT_VALUE"), + ) + + @subresource.validator + def validate_subresource(self, attribute: attrs.Attribute, value: Any): + if value not in self.SUBRESOURCE_ALLOWED_VALUES: + raise ValueError(f"Subresource must be one of {self.SUBRESOURCE_ALLOWED_VALUES!r}") + + @property + def type(self) -> str: + return self.TYPE_SEP.join([self.action, self.subresource]) + + +@attrs.frozen(kw_only=True, slots=False) +class RequestIdWithOptionalSubresource(RequestIdWithSubresource): + subresource: str | None = attrs.field( + validator=attrs.validators.instance_of((str, NoneType)), default=None + ) + + @subresource.validator + def validate_subresource(self, attribute: attrs.Attribute, value: Any): + if value is not None: + super().validate_subresource(attribute, value) + + @property + def type(self) -> str: + return self.TYPE_SEP.join([self.action, self.subresource or self.target]) + class _WithParsedId(Protocol): parsed_id: RequestId diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index db326e4cbc5d..45975f72cbba 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -18,7 +18,7 @@ from cvat.apps.engine import models from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import RequestAction -from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta, RequestIdWithFormatMixin +from cvat.apps.engine.rq import BaseRQMeta, ExportRQMeta, ImportRQMeta, RequestIdWithOptionalFormat from cvat.apps.engine.serializers import BasicUserSerializer from cvat.apps.engine.utils import parse_exception_message from cvat.apps.lambda_manager.rq import LambdaRQMeta @@ -61,7 +61,7 @@ def to_representation(self, rq_job: CustomRQJob) -> dict[str, Any]: } if parsed_request_id.action == RequestAction.AUTOANNOTATE: representation["function_id"] = LambdaRQMeta.for_job(rq_job).function_id - elif isinstance(parsed_request_id, RequestIdWithFormatMixin): + elif isinstance(parsed_request_id, RequestIdWithOptionalFormat): representation["format"] = parsed_request_id.format return representation diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 1d93990b447f..58144408c4f7 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -124,7 +124,9 @@ def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: if job and is_rq_job_owner(job, user_id): job = cast(CustomRQJob, job) try: - parsed_request_id = RequestId.parse(job.id, queue=queue.name) + parsed_request_id = RequestId.parse_and_validate_queue( + job.id, expected_queue=queue.name + ) except Exception: # nosec B112 continue @@ -162,7 +164,6 @@ def _get_rq_job_by_id(self, rq_id: str) -> RQJob | None: """ try: parsed_request_id, queue_name = RequestId.parse(rq_id, try_legacy_format=True) - # TODO: return flag that legacy format is used rq_id = parsed_request_id.render() except Exception: return None diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 8b8b3fad03f4..61465c5dbe51 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -297,20 +297,16 @@ class CVAT_QUEUES(Enum): "DEFAULT_TIMEOUT": "4h", # custom fields "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ImportRequestId", - "SUPPORTED_ACTIONS": ["create", "import"], }, CVAT_QUEUES.EXPORT_DATA.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "4h", # custom fields "PARSED_JOB_ID_CLASS": "cvat.apps.engine.rq.ExportRequestId", - "SUPPORTED_ACTIONS": ["export"], }, CVAT_QUEUES.AUTO_ANNOTATION.value: { **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "24h", - # custom fields - "SUPPORTED_ACTIONS": ["autoannotate"], }, CVAT_QUEUES.WEBHOOKS.value: { **REDIS_INMEM_SETTINGS, @@ -324,10 +320,7 @@ class CVAT_QUEUES(Enum): **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", # custom fields - "PARSED_JOB_ID_CLASS": "cvat.apps.quality_control.quality_reports.QualityRequestId", - "SUPPORTED_ACTIONS": [ - ("calculate", "quality"), - ], + "PARSED_JOB_ID_CLASS": "cvat.apps.quality_control.rq.QualityRequestId", }, CVAT_QUEUES.CLEANING.value: { **REDIS_INMEM_SETTINGS, @@ -341,7 +334,7 @@ class CVAT_QUEUES(Enum): **REDIS_INMEM_SETTINGS, "DEFAULT_TIMEOUT": "1h", # custom fields - "SUPPORTED_ACTIONS": ["merge"], + "PARSED_JOB_ID_CLASS": "cvat.apps.consensus.rq.ConsensusRequestId", }, } From 68af8d52f009eba39cb0e757cdb85fdd51e75166 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 22 Apr 2025 20:46:51 +0200 Subject: [PATCH 080/103] Add some REST API tests to check legacy rq_id format usage --- cvat/apps/consensus/merging_manager.py | 1 - cvat/apps/engine/rq.py | 3 +- cvat/apps/redis_handler/rq.py | 3 + tests/python/rest_api/test_requests.py | 116 ++++++++++++++++++++++++- 4 files changed, 116 insertions(+), 7 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index 542948e235fa..bc681992e242 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -29,7 +29,6 @@ from cvat.apps.profiler import silk_profile from cvat.apps.quality_control.quality_reports import ComparisonParameters, JobDataProvider from cvat.apps.redis_handler.background import AbstractRequestManager -from cvat.apps.redis_handler.rq import RequestId class _TaskMerger: diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index ea821d3abe28..c96c187e2f6a 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -348,8 +348,7 @@ class ExportRequestId( # will be deleted after several releases LEGACY_FORMAT_PATTERNS: ClassVar[tuple[str]] = ( - r"export:(?P(task|project))-(?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})" - + r"-(?P(backup))-by-(?P\d+)", + r"export:(?P(task|project))-(?P\d+)-(?P(backup))-by-(?P\d+)", r"export:(?P(project|task|job))-(?P\d+)-(?P(annotations|dataset))" + r"-in-(?P[\w@]+)-format-by-(?P\d+)", ) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index a3331fffb45a..3024e8a76ef7 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -178,6 +178,9 @@ def init_subclasses(cur_cls: type[RequestId] = RequestId): ) fragments = match.groupdict() + # "." was replaced with "@" in previous format + if "format" in fragments: + fragments["format"] = fragments["format"].replace("@", cls.ENCODE_MAPPING["."]) # init dict representation for request ID for key, value in fragments.items(): diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index dcc56d78231b..f319d266ca1a 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -282,10 +282,14 @@ def test_list_requests_when_there_is_job_with_non_regular_or_corrupted_meta( @pytest.mark.usefixtures("restore_redis_inmem_per_function") class TestGetRequests: - def _test_get_request_200(self, api_client: ApiClient, rq_id: str, **kwargs) -> models.Request: + def _test_get_request_200( + self, api_client: ApiClient, rq_id: str, validate_rq_id: bool = True, **kwargs + ) -> models.Request: (background_request, response) = api_client.requests_api.retrieve(rq_id, **kwargs) assert response.status == HTTPStatus.OK - assert background_request.id == rq_id + + if validate_rq_id: + assert background_request.id == rq_id return background_request @@ -314,6 +318,7 @@ def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, p id=project["id"], download_result=False, return_request_id=True, + format=format_name, ) with make_api_client(owner["username"]) as owner_client: @@ -337,8 +342,7 @@ def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, p parsed_url = urlparse(bg_request.result_url) assert all([parsed_url.scheme, parsed_url.netloc, parsed_url.path, parsed_url.query]) - @pytest.mark.parametrize("format_name", ("CVAT for images 1.1",)) - def test_non_owner_cannot_retrieve_request(self, find_users, projects, format_name: str): + def test_non_owner_cannot_retrieve_request(self, find_users, projects): project = next( ( p @@ -358,3 +362,107 @@ def test_non_owner_cannot_retrieve_request(self, find_users, projects, format_na ) with make_api_client(malefactor["username"]) as malefactor_client: self._test_get_request_403(malefactor_client, request_id) + + @pytest.mark.parametrize("target_type", ("project", "task", "job")) + @pytest.mark.parametrize("save_images", (True, False)) + @pytest.mark.parametrize("format_name", ("CVAT for images 1.1",)) + def test_can_retrieve_export_dataset_requests_using_legacy_ids( + self, + target_type: str, + save_images: bool, + format_name: str, + projects, + tasks, + jobs, + ): + def build_legacy_request_id( + target_type: str, + target_id: int, + subresource: str, + format_name: str, + user_id: int, + ): + return f"export:{target_type}-{target_id}-{subresource}-in-{format_name.replace(' ', '_').replace('.', '@')}-format-by-{user_id}" + + if target_type == "project": + export_func = export_project_dataset + target = next(iter(projects)) + owner = target["owner"] + elif target_type == "task": + export_func = export_task_dataset + target = next(iter(tasks)) + owner = target["owner"] + else: + assert target_type == "job" + export_func = export_job_dataset + target = next(iter(jobs)) + owner = tasks[target["task_id"]]["owner"] + + request_id = export_func( + owner["username"], + save_images=save_images, + format=format_name, + id=target["id"], + download_result=False, + return_request_id=True, + ) + + legacy_request_id = build_legacy_request_id( + target_type, + target["id"], + "dataset" if save_images else "annotations", + format_name, + owner["id"], + ) + + with make_api_client(owner["username"]) as owner_client: + self._test_get_request_200(owner_client, request_id) + bg_request = self._test_get_request_200( + owner_client, legacy_request_id, validate_rq_id=False + ) + assert bg_request.id == request_id + + @pytest.mark.parametrize("target_type", ("project", "task")) + def test_can_retrieve_export_backup_requests_using_legacy_ids( + self, + target_type: str, + projects, + tasks, + ): + def build_legacy_request_id( + target_type: str, + target_id: int, + user_id: int, + ): + return f"export:{target_type}-{target_id}-backup-by-{user_id}" + + if target_type == "project": + export_func = export_project_backup + target = next(iter(projects)) + else: + assert target_type == "task" + export_func = export_task_backup + target = next(iter(tasks)) + + owner = target["owner"] + + request_id = export_func( + owner["username"], + id=target["id"], + download_result=False, + return_request_id=True, + ) + + legacy_request_id = build_legacy_request_id(target_type, target["id"], owner["id"]) + + with make_api_client(owner["username"]) as owner_client: + self._test_get_request_200(owner_client, request_id) + bg_request = self._test_get_request_200( + owner_client, legacy_request_id, validate_rq_id=False + ) + assert bg_request.id == request_id + + # TODO: + # - quality + # - task creation + # - import From 27fdb93e344cde3c30ec8bb3eac74ab63d9e7fee Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 10:59:14 +0200 Subject: [PATCH 081/103] Move RequestStatus into redis_handler --- cvat/apps/engine/models.py | 6 ------ cvat/apps/redis_handler/serializers.py | 9 ++++++++- cvat/apps/redis_handler/views.py | 2 +- cvat/settings/base.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 02909b1cc1d0..b7c043b1b458 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -1350,12 +1350,6 @@ def organization_id(self): def get_asset_dir(self): return os.path.join(settings.ASSETS_ROOT, str(self.uuid)) -class RequestStatus(TextChoices): - QUEUED = "queued" - STARTED = "started" - FAILED = "failed" - FINISHED = "finished" - class RequestAction(TextChoices): AUTOANNOTATE = "autoannotate" CREATE = "create" diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 45975f72cbba..230da1f7c8df 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -23,9 +23,16 @@ from cvat.apps.engine.utils import parse_exception_message from cvat.apps.lambda_manager.rq import LambdaRQMeta from cvat.apps.redis_handler.rq import CustomRQJob, RequestId +from django.db.models import TextChoices slogger = ServerLogManager(__name__) +class RequestStatus(TextChoices): + QUEUED = "queued" + STARTED = "started" + FAILED = "failed" + FINISHED = "finished" + class RqIdSerializer(serializers.Serializer): rq_id = serializers.CharField(help_text="Request id") @@ -72,7 +79,7 @@ class RequestSerializer(serializers.Serializer): # Marking them as read_only leads to generating type as allOf with one reference to RequestStatus component. # The client generated using openapi-generator from such a schema contains wrong type like: # status (bool, date, datetime, dict, float, int, list, str, none_type): [optional] - status = serializers.ChoiceField(source="get_status", choices=models.RequestStatus.choices) + status = serializers.ChoiceField(source="get_status", choices=RequestStatus.choices) message = serializers.SerializerMethodField() id = serializers.CharField() operation = RequestDataOperationSerializer(source="*") diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 58144408c4f7..384ec0a3f181 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -23,7 +23,7 @@ NonModelSimpleFilter, ) from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.models import RequestStatus # todo: move to the app +from cvat.apps.redis_handler.serializers import RequestStatus from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest from cvat.apps.redis_handler.apps import ACTION_TO_QUEUE diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 61465c5dbe51..976fc58d629f 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -673,7 +673,7 @@ class CVAT_QUEUES(Enum): "SortingMethod": "cvat.apps.engine.models.SortingMethod", "WebhookType": "cvat.apps.webhooks.models.WebhookTypeChoice", "WebhookContentType": "cvat.apps.webhooks.models.WebhookContentTypeChoice", - "RequestStatus": "cvat.apps.engine.models.RequestStatus", + "RequestStatus": "cvat.apps.redis_handler.serializers.RequestStatus", "ValidationMode": "cvat.apps.engine.models.ValidationMode", "FrameSelectionMethod": "cvat.apps.engine.models.JobFrameSelectionMethod", }, From 4ef2279cfe8452827ae312bcbfc6b9d762af5dcd Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 11:01:54 +0200 Subject: [PATCH 082/103] Add REQUEST_ID_SUBCLASSES --- cvat/apps/redis_handler/apps.py | 26 ++++++++++++++++++++ cvat/apps/redis_handler/rq.py | 34 +++++++------------------- cvat/apps/redis_handler/serializers.py | 3 ++- cvat/apps/redis_handler/views.py | 3 +-- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/cvat/apps/redis_handler/apps.py b/cvat/apps/redis_handler/apps.py index 38b6fc60da25..67131856aa31 100644 --- a/cvat/apps/redis_handler/apps.py +++ b/cvat/apps/redis_handler/apps.py @@ -4,6 +4,7 @@ from contextlib import suppress +from typing import cast from django.apps import AppConfig from django.conf import settings @@ -23,10 +24,17 @@ def __getitem__(self, key: str | tuple) -> str: ACTION_TO_QUEUE = LayeredKeyDict() QUEUE_TO_PARSED_JOB_ID_CLS = {} +REQUEST_ID_SUBCLASSES = set() + def initialize_mappings(): from cvat.apps.redis_handler.rq import RequestId + def init_subclasses(cur_cls: type[RequestId] = RequestId): + for subclass in cur_cls.__subclasses__(): + REQUEST_ID_SUBCLASSES.add(subclass) + init_subclasses(subclass) + for queue_name, queue_conf in settings.RQ_QUEUES.items(): if path_to_parsed_job_id_cls := queue_conf.get("PARSED_JOB_ID_CLASS"): parsed_job_id_cls = import_string(path_to_parsed_job_id_cls) @@ -37,10 +45,28 @@ def initialize_mappings(): ) for queue_selector in parsed_job_id_cls.QUEUE_SELECTORS: + if not isinstance(queue_selector, (tuple, str)): + raise ImproperlyConfigured("Wrong queue selector, must be either tuple or str") ACTION_TO_QUEUE[queue_selector] = queue_name QUEUE_TO_PARSED_JOB_ID_CLS[queue_name] = parsed_job_id_cls + init_subclasses() + # check that each subclass that has QUEUE_SELECTORS can be used to determine the queue + for subclass in REQUEST_ID_SUBCLASSES: + subclass = cast(RequestId, subclass) + if subclass.LEGACY_FORMAT_PATTERNS and not subclass.QUEUE_SELECTORS: + raise ImproperlyConfigured( + f"Subclass {subclass.__name__} has LEGACY_FORMAT_PATTERNS - QUEUE_SELECTORS must be defined" + ) + + if subclass.QUEUE_SELECTORS: + for queue_selector in subclass.QUEUE_SELECTORS: + if not ACTION_TO_QUEUE.get(queue_selector): + raise ImproperlyConfigured( + f"Queue selector {queue_selector!r} for the class {subclass.__name__!r} is missed in the queue configuration" + ) + class RedisHandlerConfig(AppConfig): name = "cvat.apps.redis_handler" diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 3024e8a76ef7..84c887a84f39 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -7,11 +7,14 @@ from uuid import UUID import attrs -from django.core.exceptions import ImproperlyConfigured from django.utils.html import escape from rq.job import Job as RQJob -from cvat.apps.redis_handler.apps import ACTION_TO_QUEUE, QUEUE_TO_PARSED_JOB_ID_CLS +from cvat.apps.redis_handler.apps import ( + ACTION_TO_QUEUE, + QUEUE_TO_PARSED_JOB_ID_CLS, + REQUEST_ID_SUBCLASSES, +) class IncorrectRequestIdError(ValueError): @@ -58,7 +61,7 @@ class RequestId: default=_default_from_class_attr("ACTION_DEFAULT_VALUE"), ) ACTION_ALLOWED_VALUES: ClassVar[tuple[str]] - QUEUE_SELECTORS: ClassVar[tuple] + QUEUE_SELECTORS: ClassVar[tuple] = () @action.validator def validate_action(self, attribute: attrs.Attribute, value: Any): @@ -128,16 +131,10 @@ def parse( ) -> tuple[RequestId, str]: actual_cls = cls - subclasses = set() queue: str | None = None dict_repr = {} fragments = {} - def init_subclasses(cur_cls: type[RequestId] = RequestId): - for subclass in cur_cls.__subclasses__(): - subclasses.add(subclass) - init_subclasses(subclass) - try: # try to parse ID as key=value pairs (newly introduced format) fragments = dict(urllib.parse.parse_qsl(request_id)) @@ -151,12 +148,7 @@ def init_subclasses(cur_cls: type[RequestId] = RequestId): match: re.Match | None = None - if cls is RequestId: - init_subclasses() - else: - subclasses = (cls,) - - for subclass in subclasses: + for subclass in REQUEST_ID_SUBCLASSES if cls is RequestId else (cls,): for pattern in subclass.LEGACY_FORMAT_PATTERNS: match = re.match(pattern, request_id) if match: @@ -169,14 +161,9 @@ def init_subclasses(cur_cls: type[RequestId] = RequestId): f"Unable to parse request ID: {escape(request_id)!r}" ) - queue = ACTION_TO_QUEUE.get( + queue = ACTION_TO_QUEUE[ actual_cls.QUEUE_SELECTORS[0] - ) # each selector match the same queue - if not queue: - raise ImproperlyConfigured( - "Job ID class must be set in the related queue config" - ) - + ] # each selector match the same queue fragments = match.groupdict() # "." was replaced with "@" in previous format if "format" in fragments: @@ -201,9 +188,6 @@ def init_subclasses(cur_cls: type[RequestId] = RequestId): result = actual_cls(**dict_repr) return (result, queue) - - except ImproperlyConfigured: - raise except Exception as ex: raise IncorrectRequestIdError from ex diff --git a/cvat/apps/redis_handler/serializers.py b/cvat/apps/redis_handler/serializers.py index 230da1f7c8df..f7ebd9b1fcf6 100644 --- a/cvat/apps/redis_handler/serializers.py +++ b/cvat/apps/redis_handler/serializers.py @@ -10,6 +10,7 @@ from uuid import UUID import rq.defaults as rq_defaults +from django.db.models import TextChoices from django.utils import timezone from drf_spectacular.utils import extend_schema_field from rest_framework import serializers @@ -23,10 +24,10 @@ from cvat.apps.engine.utils import parse_exception_message from cvat.apps.lambda_manager.rq import LambdaRQMeta from cvat.apps.redis_handler.rq import CustomRQJob, RequestId -from django.db.models import TextChoices slogger = ServerLogManager(__name__) + class RequestStatus(TextChoices): QUEUED = "queued" STARTED = "started" diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 384ec0a3f181..2135a6f03586 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -23,12 +23,11 @@ NonModelSimpleFilter, ) from cvat.apps.engine.log import ServerLogManager -from cvat.apps.redis_handler.serializers import RequestStatus from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest from cvat.apps.redis_handler.apps import ACTION_TO_QUEUE from cvat.apps.redis_handler.rq import CustomRQJob, RequestId -from cvat.apps.redis_handler.serializers import RequestSerializer +from cvat.apps.redis_handler.serializers import RequestSerializer, RequestStatus slogger = ServerLogManager(__name__) From 620b81bb0b9bf58f71e278223cd4e76dff38c25b Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 11:42:02 +0200 Subject: [PATCH 083/103] Rename to SELECTOR_TO_QUEUE --- cvat/apps/redis_handler/apps.py | 6 +++--- cvat/apps/redis_handler/rq.py | 6 +++--- cvat/apps/redis_handler/views.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cvat/apps/redis_handler/apps.py b/cvat/apps/redis_handler/apps.py index 67131856aa31..f6edc97d9311 100644 --- a/cvat/apps/redis_handler/apps.py +++ b/cvat/apps/redis_handler/apps.py @@ -21,7 +21,7 @@ def __getitem__(self, key: str | tuple) -> str: return super().__getitem__(key) -ACTION_TO_QUEUE = LayeredKeyDict() +SELECTOR_TO_QUEUE = LayeredKeyDict() QUEUE_TO_PARSED_JOB_ID_CLS = {} REQUEST_ID_SUBCLASSES = set() @@ -47,7 +47,7 @@ def init_subclasses(cur_cls: type[RequestId] = RequestId): for queue_selector in parsed_job_id_cls.QUEUE_SELECTORS: if not isinstance(queue_selector, (tuple, str)): raise ImproperlyConfigured("Wrong queue selector, must be either tuple or str") - ACTION_TO_QUEUE[queue_selector] = queue_name + SELECTOR_TO_QUEUE[queue_selector] = queue_name QUEUE_TO_PARSED_JOB_ID_CLS[queue_name] = parsed_job_id_cls @@ -62,7 +62,7 @@ def init_subclasses(cur_cls: type[RequestId] = RequestId): if subclass.QUEUE_SELECTORS: for queue_selector in subclass.QUEUE_SELECTORS: - if not ACTION_TO_QUEUE.get(queue_selector): + if not SELECTOR_TO_QUEUE.get(queue_selector): raise ImproperlyConfigured( f"Queue selector {queue_selector!r} for the class {subclass.__name__!r} is missed in the queue configuration" ) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 84c887a84f39..11faaa9945f5 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -11,7 +11,7 @@ from rq.job import Job as RQJob from cvat.apps.redis_handler.apps import ( - ACTION_TO_QUEUE, + SELECTOR_TO_QUEUE, QUEUE_TO_PARSED_JOB_ID_CLS, REQUEST_ID_SUBCLASSES, ) @@ -161,7 +161,7 @@ def parse( f"Unable to parse request ID: {escape(request_id)!r}" ) - queue = ACTION_TO_QUEUE[ + queue = SELECTOR_TO_QUEUE[ actual_cls.QUEUE_SELECTORS[0] ] # each selector match the same queue fragments = match.groupdict() @@ -178,7 +178,7 @@ def parse( if not queue: # try to define queue dynamically based on action/target/subresource - queue = ACTION_TO_QUEUE[ + queue = SELECTOR_TO_QUEUE[ (dict_repr["action"], dict_repr["target"], dict_repr.get("subresource")) ] diff --git a/cvat/apps/redis_handler/views.py b/cvat/apps/redis_handler/views.py index 2135a6f03586..d73bab153db4 100644 --- a/cvat/apps/redis_handler/views.py +++ b/cvat/apps/redis_handler/views.py @@ -25,7 +25,7 @@ from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.rq import is_rq_job_owner from cvat.apps.engine.types import ExtendedRequest -from cvat.apps.redis_handler.apps import ACTION_TO_QUEUE +from cvat.apps.redis_handler.apps import SELECTOR_TO_QUEUE from cvat.apps.redis_handler.rq import CustomRQJob, RequestId from cvat.apps.redis_handler.serializers import RequestSerializer, RequestStatus @@ -107,7 +107,7 @@ def get_queryset(self): @property def queues(self) -> Iterable[DjangoRQ]: - return (django_rq.get_queue(queue_name) for queue_name in set(ACTION_TO_QUEUE.values())) + return (django_rq.get_queue(queue_name) for queue_name in set(SELECTOR_TO_QUEUE.values())) def _get_rq_jobs_from_queue(self, queue: DjangoRQ, user_id: int) -> list[RQJob]: job_ids = set( From e48dffc2e19514a44b7d4e5ff02dd6e3db5c58f6 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 13:44:46 +0200 Subject: [PATCH 084/103] More REST API tests --- cvat/apps/engine/rq.py | 8 +- cvat/apps/quality_control/rq.py | 2 +- cvat/apps/redis_handler/rq.py | 2 +- tests/python/rest_api/test_requests.py | 121 +++++++++++++++++++++---- tests/python/rest_api/utils.py | 9 +- 5 files changed, 114 insertions(+), 28 deletions(-) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index c96c187e2f6a..ce3a91ca82d8 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -348,7 +348,7 @@ class ExportRequestId( # will be deleted after several releases LEGACY_FORMAT_PATTERNS: ClassVar[tuple[str]] = ( - r"export:(?P(task|project))-(?P\d+)-(?P(backup))-by-(?P\d+)", + r"export:(?P(task|project))-(?P\d+)-(?Pbackup)-by-(?P\d+)", r"export:(?P(project|task|job))-(?P\d+)-(?P(annotations|dataset))" + r"-in-(?P[\w@]+)-format-by-(?P\d+)", ) @@ -365,9 +365,9 @@ class ImportRequestId( # will be deleted after several releases LEGACY_FORMAT_PATTERNS = ( - r"create:task-(?P\d+)", - r"import:(?P(task|project|job))-(?P\d+)-(?P(annotations|dataset))", - r"import:(?P(task|project))-(?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})-(?P(backup))", + r"(?Pcreate):(?Ptask)-(?P\d+)", + r"(?Pimport):(?P(task|project|job))-(?P\d+)-(?P(annotations|dataset))", + r"(?Pimport):(?P(task|project))-(?P[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})-(?Pbackup)", ) diff --git a/cvat/apps/quality_control/rq.py b/cvat/apps/quality_control/rq.py index 0212045764eb..9242db551205 100644 --- a/cvat/apps/quality_control/rq.py +++ b/cvat/apps/quality_control/rq.py @@ -23,5 +23,5 @@ class QualityRequestId(RequestIdWithSubresource): # will be deleted after several releases LEGACY_FORMAT_PATTERNS = ( - r"quality-check-(?P(task))-(?P\d+)-user-(\d+)", # user id is excluded in the new format + r"quality-check-(?Ptask)-(?P\d+)-user-(\d+)", # user id is excluded in the new format ) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 11faaa9945f5..ff393fb0bb0f 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -11,9 +11,9 @@ from rq.job import Job as RQJob from cvat.apps.redis_handler.apps import ( - SELECTOR_TO_QUEUE, QUEUE_TO_PARSED_JOB_ID_CLS, REQUEST_ID_SUBCLASSES, + SELECTOR_TO_QUEUE, ) diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index f319d266ca1a..2f8cf9da1be7 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -5,7 +5,7 @@ import io import json from http import HTTPStatus -from urllib.parse import urlparse +from urllib.parse import parse_qsl, urlparse import pytest from cvat_sdk.api_client import ApiClient, models @@ -25,7 +25,9 @@ export_project_dataset, export_task_backup, export_task_dataset, + import_project_backup, import_task_backup, + wait_background_request, ) @@ -366,7 +368,7 @@ def test_non_owner_cannot_retrieve_request(self, find_users, projects): @pytest.mark.parametrize("target_type", ("project", "task", "job")) @pytest.mark.parametrize("save_images", (True, False)) @pytest.mark.parametrize("format_name", ("CVAT for images 1.1",)) - def test_can_retrieve_export_dataset_requests_using_legacy_ids( + def test_can_retrieve_dataset_import_export_requests_using_legacy_ids( # todo: self, target_type: str, save_images: bool, @@ -423,46 +425,129 @@ def build_legacy_request_id( assert bg_request.id == request_id @pytest.mark.parametrize("target_type", ("project", "task")) - def test_can_retrieve_export_backup_requests_using_legacy_ids( + def test_can_retrieve_backup_import_export_requests_using_legacy_ids( self, target_type: str, projects, tasks, ): - def build_legacy_request_id( + def build_legacy_id_for_export_request( target_type: str, target_id: int, user_id: int, ): return f"export:{target_type}-{target_id}-backup-by-{user_id}" + def build_legacy_id_for_import_request( + target_type: str, + uuid_: str, + ): + return f"import:{target_type}-{uuid_}-backup" + if target_type == "project": - export_func = export_project_backup + export_func, import_func = export_project_backup, import_project_backup target = next(iter(projects)) else: assert target_type == "task" - export_func = export_task_backup + export_func, import_func = export_task_backup, import_task_backup target = next(iter(tasks)) owner = target["owner"] - request_id = export_func( + # check export requests + backup_file = io.BytesIO( + export_func( + owner["username"], + id=target["id"], + ) + ) + backup_file.name = "file.zip" + + legacy_request_id = build_legacy_id_for_export_request( + target_type, target["id"], owner["id"] + ) + + with make_api_client(owner["username"]) as api_client: + paginated_list, _ = api_client.requests_api.list(action="export", target=target_type) + assert len(paginated_list.results) == 1 + request_id = paginated_list.results[0].id + + bg_request = self._test_get_request_200( + api_client, legacy_request_id, validate_rq_id=False + ) + assert bg_request.id == request_id + + # check import requests + result_id = import_func( owner["username"], - id=target["id"], - download_result=False, - return_request_id=True, + file_content=backup_file, + ).id + legacy_request_id = build_legacy_id_for_import_request( + target_type, dict(parse_qsl(result_id))["id"] + ) + + with make_api_client(owner["username"]) as api_client: + bg_request = self._test_get_request_200( + api_client, legacy_request_id, validate_rq_id=False + ) + assert bg_request.id == result_id + + def test_can_retrieve_task_creation_requests_using_legacy_ids(self, admin_user: str): + task_id = create_task( + admin_user, + spec={"name": f"Test task", "labels": [{"name": "car"}]}, + data={ + "image_quality": 75, + "client_files": generate_image_files(2), + "segment_size": 1, + }, + )[0] + + legacy_request_id = f"create:task-{task_id}" + + with make_api_client(admin_user) as api_client: + paginated_list, _ = api_client.requests_api.list(action="create", target="task") + assert len(paginated_list.results) == 1 + request_id = paginated_list.results[0].id + + bg_request = self._test_get_request_200( + api_client, legacy_request_id, validate_rq_id=False + ) + assert bg_request.id == request_id + + def test_can_retrieve_quality_calculation_requests_using_legacy_ids(self, jobs, tasks): + gt_job = next( + j + for j in jobs + if ( + j["type"] == "ground_truth" + and j["stage"] == "acceptance" + and j["state"] == "completed" + ) ) + task_id = gt_job["task_id"] + owner = tasks[task_id]["owner"] - legacy_request_id = build_legacy_request_id(target_type, target["id"], owner["id"]) + legacy_request_id = f"quality-check-task-{task_id}-user-{owner['id']}" - with make_api_client(owner["username"]) as owner_client: - self._test_get_request_200(owner_client, request_id) + with make_api_client(owner["username"]) as api_client: + # initiate quality report calculation + (_, response) = api_client.quality_api.create_report( + quality_report_create_request=models.QualityReportCreateRequest(task_id=task_id), + _parse_response=False, + ) + assert response.status == HTTPStatus.ACCEPTED + request_id = json.loads(response.data)["rq_id"] + + # get background request details using common request API bg_request = self._test_get_request_200( - owner_client, legacy_request_id, validate_rq_id=False + api_client, legacy_request_id, validate_rq_id=False ) assert bg_request.id == request_id - # TODO: - # - quality - # - task creation - # - import + # get quality report by legacy request ID using the deprecated API endpoint + wait_background_request(api_client, request_id) + api_client.quality_api.create_report( + quality_report_create_request=models.QualityReportCreateRequest(task_id=task_id), + rq_id=request_id, + ) diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 3d4d560f4a82..75d2f8c0654e 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -204,7 +204,7 @@ def import_resource( expect_forbidden: bool = False, wait_result: bool = True, **kwargs, -) -> None: +) -> Optional[models.Request]: # initialize background process and ensure that the first request returns 403 code if request should be forbidden (_, response) = endpoint.call_with_http_info( **kwargs, @@ -240,6 +240,7 @@ def import_resource( f"Import process was not finished within allowed time ({interval * max_retries}, sec). " + f"Last status was: {background_request.status.value}" ) + return background_request def import_backup( @@ -248,19 +249,19 @@ def import_backup( max_retries: int = 50, interval: float = 0.1, **kwargs, -) -> None: +): endpoint = api.create_backup_endpoint return import_resource(endpoint, max_retries=max_retries, interval=interval, **kwargs) -def import_project_backup(username: str, file_content: BytesIO, **kwargs) -> None: +def import_project_backup(username: str, file_content: BytesIO, **kwargs): with make_api_client(username) as api_client: return import_backup( api_client.projects_api, project_file_request={"project_file": file_content}, **kwargs ) -def import_task_backup(username: str, file_content: BytesIO, **kwargs) -> None: +def import_task_backup(username: str, file_content: BytesIO, **kwargs): with make_api_client(username) as api_client: return import_backup( api_client.tasks_api, task_file_request={"task_file": file_content}, **kwargs From bd69c9607c92331e38e2d6782044c945afe80361 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 18:02:02 +0200 Subject: [PATCH 085/103] Update cvat/apps/engine/rq.py Co-authored-by: Roman Donchenko --- cvat/apps/engine/rq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/rq.py b/cvat/apps/engine/rq.py index ce3a91ca82d8..d070a0e51355 100644 --- a/cvat/apps/engine/rq.py +++ b/cvat/apps/engine/rq.py @@ -50,7 +50,7 @@ class RequestField: STATUS = "status" PROGRESS = "progress" - # import specific fields + # import-specific fields TASK_PROGRESS = "task_progress" # export specific fields From 779ef18bed205d3641dbe43497b269bdf340b8dd Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 18:02:17 +0200 Subject: [PATCH 086/103] Update cvat/apps/engine/views.py Co-authored-by: Roman Donchenko --- cvat/apps/engine/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index e85831297eb1..cdc03d250441 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -465,7 +465,7 @@ def export_backup(self, request: ExtendedRequest, pk: int): serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): - if request.query_params.get("rq_id"): + if "rq_id" in request.query_params: return get_410_response_when_checking_process_status("import") return self.upload_data(request) From 48dd5774f08e4511870df27a7f62ae76351dd7d1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 21:13:57 +0200 Subject: [PATCH 087/103] Rename LocationConfig.storage_id -> cloud_storage_id --- cvat/apps/engine/background.py | 6 +++--- cvat/apps/engine/location.py | 12 ++++++------ cvat/apps/redis_handler/background.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index fead28291be3..cdee5eaa3dba 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -156,7 +156,7 @@ def finalize_request(self): handle_dataset_export( self.db_instance, format_name=self.export_args.format, - cloud_storage_id=self.export_args.location_config.storage_id, + cloud_storage_id=self.export_args.location_config.cloud_storage_id, save_images=self.export_args.save_images, ) @@ -318,7 +318,7 @@ def validate_request(self): raise ValidationError("The filename was not specified") def _handle_cloud_storage_file_upload(self): - storage_id = self.import_args.location_config.storage_id + storage_id = self.import_args.location_config.cloud_storage_id db_storage = get_cloud_storage_for_import_or_export( storage_id=storage_id, request=self.request, @@ -455,7 +455,7 @@ def finalize_request(self): handle_dataset_import( self.db_instance, format_name=self.import_args.format, - cloud_storage_id=self.import_args.location_config.storage_id, + cloud_storage_id=self.import_args.location_config.cloud_storage_id, ) diff --git a/cvat/apps/engine/location.py b/cvat/apps/engine/location.py index 5edf0665a4d8..b26d598fae6a 100644 --- a/cvat/apps/engine/location.py +++ b/cvat/apps/engine/location.py @@ -20,14 +20,14 @@ def __str__(self): @attrs.frozen(kw_only=True) class LocationConfig: - is_default: bool = attrs.field(validator=attrs.validators.instance_of(bool), default=True) + is_default: bool = attrs.field(validator=attrs.validators.instance_of(bool)) location: Location = attrs.field(converter=Location) - storage_id: int | None = attrs.field( + cloud_storage_id: int | None = attrs.field( converter=lambda x: x if x is None else int(x), default=None ) def __attrs_post_init__(self): - if self.location == Location.CLOUD_STORAGE and not self.storage_id: + if self.location == Location.CLOUD_STORAGE and not self.cloud_storage_id: raise ValueError( "Trying to use undefined cloud storage (cloud_storage_id was not provided)" ) @@ -54,11 +54,11 @@ def get_location_configuration( else getattr(db_instance.segment.task, field_name) ) return ( - LocationConfig(location=Location.LOCAL) + LocationConfig(is_default=True, location=Location.LOCAL) if storage is None - else LocationConfig(location=storage.location, storage_id=storage.cloud_storage_id) + else LocationConfig(is_default=True, location=storage.location, cloud_storage_id=storage.cloud_storage_id) ) return LocationConfig( - is_default=False, location=location, storage_id=query_params.get("cloud_storage_id") + is_default=False, location=location, cloud_storage_id=query_params.get("cloud_storage_id") ) diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index b719b3d90138..780856503a08 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -336,7 +336,7 @@ def init_callback_with_params(self): self._init_callback_with_params() if self.export_args.location_config.location == Location.CLOUD_STORAGE: - storage_id = self.export_args.location_config.storage_id + storage_id = self.export_args.location_config.cloud_storage_id db_storage = get_cloud_storage_for_import_or_export( storage_id=storage_id, request=self.request, From db76a560c995cef0a31de25ffc40e1a6cd995af2 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 21:14:41 +0200 Subject: [PATCH 088/103] [unit tests] pass query_params to post method --- cvat/apps/engine/tests/utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cvat/apps/engine/tests/utils.py b/cvat/apps/engine/tests/utils.py index 3333712f1f8f..0c22feadf1fa 100644 --- a/cvat/apps/engine/tests/utils.py +++ b/cvat/apps/engine/tests/utils.py @@ -200,14 +200,11 @@ def _import( query_params: dict[str, Any] | None = None, expected_4xx_status_code: int | None = None, ): - if query_params: - assert "?" not in api_path - api_path += "?" + urlencode(query_params) - response = self._post_request( api_path, user, data={through_field: file_content}, format="multipart", + query_params=query_params, ) self.assertEqual(response.status_code, expected_4xx_status_code or status.HTTP_202_ACCEPTED) From 087d53cd1e0c62749c7874d3f4852fb5cb8ea1e8 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 21:56:44 +0200 Subject: [PATCH 089/103] Add assert --- cvat/apps/redis_handler/rq.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index ff393fb0bb0f..15668a70fbdb 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -182,9 +182,12 @@ def parse( (dict_repr["action"], dict_repr["target"], dict_repr.get("subresource")) ] - if queue in QUEUE_TO_PARSED_JOB_ID_CLS: - actual_cls = QUEUE_TO_PARSED_JOB_ID_CLS[queue] + # queue that could be determined using SELECTOR_TO_QUEUE + # must also be included into QUEUE_TO_PARSED_JOB_ID_CLS + assert queue in QUEUE_TO_PARSED_JOB_ID_CLS + actual_cls = QUEUE_TO_PARSED_JOB_ID_CLS[queue] + assert issubclass(actual_cls, cls) result = actual_cls(**dict_repr) return (result, queue) From a21950924fadb44a07b57f68807dca2954389cd4 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 22:02:13 +0200 Subject: [PATCH 090/103] Distinguish drf ValidationError from django one --- cvat/apps/consensus/merging_manager.py | 4 ++-- cvat/apps/engine/background.py | 13 +++++++------ cvat/apps/events/views.py | 4 ++-- cvat/apps/quality_control/quality_reports.py | 8 ++++---- cvat/apps/redis_handler/background.py | 8 ++++---- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/cvat/apps/consensus/merging_manager.py b/cvat/apps/consensus/merging_manager.py index bc681992e242..98a304ef4943 100644 --- a/cvat/apps/consensus/merging_manager.py +++ b/cvat/apps/consensus/merging_manager.py @@ -8,7 +8,7 @@ import datumaro as dm from django.conf import settings from django.db import transaction -from rest_framework.serializers import ValidationError +from rest_framework import serializers from cvat.apps.consensus.intersect_merge import IntersectMerge from cvat.apps.consensus.models import ConsensusSettings @@ -186,7 +186,7 @@ def validate_request(self): try: _TaskMerger(task=task).check_merging_available(parent_job_id=job.pk if job else None) except MergingNotAvailable as ex: - raise ValidationError(str(ex)) from ex + raise serializers.ValidationError(str(ex)) from ex def _split_to_task_and_job(self) -> tuple[Task, Job | None]: if isinstance(self.db_instance, Job): diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index cdee5eaa3dba..d4c4bd5d6107 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -12,7 +12,8 @@ from attrs.converters import to_bool from django.conf import settings from django.db.models import Model -from rest_framework.exceptions import MethodNotAllowed, ValidationError +from rest_framework.exceptions import MethodNotAllowed +from rest_framework import serializers from rest_framework.reverse import reverse from rq.job import Job as RQJob @@ -100,7 +101,7 @@ def validate_request(self): self.export_args.format ) if format_desc is None: - raise ValidationError("Unknown format specified for the request") + raise serializers.ValidationError("Unknown format specified for the request") elif not format_desc.ENABLED: raise MethodNotAllowed(self.request.method, detail="Format is disabled") @@ -189,7 +190,7 @@ class BackupExporter(AbstractExporter): # super().validate_request() # if isinstance(self.db_instance, Task) and self.db_instance.data is None: - # raise ValidationError("Backup of a task without data is not allowed") + # raise serializers.ValidationError("Backup of a task without data is not allowed") # elif isinstance(self.db_instance, Project) and Data.objects.filter(): # pass @@ -294,7 +295,7 @@ def init_request_args(self): field_name=StorageType.SOURCE, ) except ValueError as ex: - raise ValidationError(str(ex)) from ex + raise serializers.ValidationError(str(ex)) from ex if filename := self.request.query_params.get("filename"): file_path = ( @@ -315,7 +316,7 @@ def validate_request(self): self.import_args.location_config.location == Location.CLOUD_STORAGE and not self.import_args.file_path ): - raise ValidationError("The filename was not specified") + raise serializers.ValidationError("The filename was not specified") def _handle_cloud_storage_file_upload(self): storage_id = self.import_args.location_config.cloud_storage_id @@ -435,7 +436,7 @@ def validate_request(self): self.import_args.format ) if format_desc is None: - raise ValidationError(f"Unknown input format {self.import_args.format!r}") + raise serializers.ValidationError(f"Unknown input format {self.import_args.format!r}") elif not format_desc.ENABLED: raise MethodNotAllowed(self.request.method, detail="Format is disabled") diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index 618779c6792e..d0a77fb480e2 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -6,7 +6,7 @@ from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema from rest_framework import status, viewsets from rest_framework.decorators import action -from rest_framework.exceptions import ValidationError +from rest_framework import serializers from rest_framework.renderers import JSONRenderer from rest_framework.response import Response @@ -151,7 +151,7 @@ def list(self, request: ExtendedRequest): request.query_params.get("cloud_storage_id") or request.query_params.get("location") == Location.CLOUD_STORAGE ): - raise ValidationError( + raise serializers.ValidationError( "This endpoint does not support exporting events to cloud storage" ) diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 64c90ac0a3a7..c047e7bc660e 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -22,7 +22,7 @@ from datumaro.util import dump_json, parse_json from django.conf import settings from django.db import transaction -from rest_framework.serializers import ValidationError +from rest_framework import serializers from scipy.optimize import linear_sum_assignment from cvat.apps.dataset_manager.bindings import ( @@ -2275,7 +2275,7 @@ def get_job_by_id(self, id_, /): id_, expected_queue=self.QUEUE_NAME, try_legacy_format=True ).render() except ValueError: - raise ValidationError("Provided request ID is invalid") + raise serializers.ValidationError("Provided request ID is invalid") return super().get_job_by_id(id_) @@ -2289,13 +2289,13 @@ def validate_request(self): super().validate_request() if self.db_instance.dimension != DimensionType.DIM_2D: - raise ValidationError("Quality reports are only supported in 2d tasks") + raise serializers.ValidationError("Quality reports are only supported in 2d tasks") gt_job = self.db_instance.gt_job if gt_job is None or not ( gt_job.stage == StageChoice.ACCEPTANCE and gt_job.state == StatusChoice.COMPLETED ): - raise ValidationError( + raise serializers.ValidationError( "Quality reports require a Ground Truth job in the task " f"at the {StageChoice.ACCEPTANCE} stage " f"and in the {StatusChoice.COMPLETED} state" diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 780856503a08..9fb3797c6bdf 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -18,7 +18,7 @@ from django_rq.queues import DjangoRQ, DjangoScheduler from rest_framework import status from rest_framework.response import Response -from rest_framework.serializers import ValidationError +from rest_framework import serializers from rq.job import Job as RQJob from rq.job import JobStatus as RQJobStatus @@ -316,7 +316,7 @@ def init_request_args(self) -> None: field_name=StorageType.TARGET, ) except ValueError as ex: - raise ValidationError(str(ex)) from ex + raise serializers.ValidationError(str(ex)) from ex self.export_args = AbstractExporter.ExportArgs( location_config=location_config, filename=self.request.query_params.get("filename") @@ -362,11 +362,11 @@ def get_downloader(self): request_id = self.request.query_params.get(self.REQUEST_ID_KEY) if not request_id: - raise ValidationError("Missing request id in the query parameters") + raise serializers.ValidationError("Missing request id in the query parameters") try: self.validate_request_id(request_id) except ValueError: - raise ValidationError("Invalid export request id") + raise serializers.ValidationError("Invalid export request id") return self.Downloader(request=self.request, queue=self.get_queue(), request_id=request_id) From 6746b00c610e741dd39dba49871b1dbb16cde13f Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 22:27:49 +0200 Subject: [PATCH 091/103] Change link --- cvat/apps/engine/view_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/view_utils.py b/cvat/apps/engine/view_utils.py index 2904e8595d06..4ebe944d167a 100644 --- a/cvat/apps/engine/view_utils.py +++ b/cvat/apps/engine/view_utils.py @@ -124,7 +124,7 @@ def __init__(self, deprecation_date: datetime, ): headers = headers or {} - # https://greenbytes.de/tech/webdav/draft-ietf-httpapi-deprecation-header-latest.html#the-deprecation-http-response-header-field + # https://www.rfc-editor.org/rfc/rfc9745 deprecation_timestamp = int(deprecation_date.timestamp()) headers["Deprecation"] = f"@{deprecation_timestamp}" From f40dcdc3fd473875103c8b4201f71acd2703d8ae Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 22:29:59 +0200 Subject: [PATCH 092/103] isort && black --- cvat/apps/engine/background.py | 2 +- cvat/apps/engine/location.py | 6 +++++- cvat/apps/events/views.py | 3 +-- cvat/apps/redis_handler/background.py | 3 +-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index d4c4bd5d6107..5686aacfb5f4 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -12,8 +12,8 @@ from attrs.converters import to_bool from django.conf import settings from django.db.models import Model -from rest_framework.exceptions import MethodNotAllowed from rest_framework import serializers +from rest_framework.exceptions import MethodNotAllowed from rest_framework.reverse import reverse from rq.job import Job as RQJob diff --git a/cvat/apps/engine/location.py b/cvat/apps/engine/location.py index b26d598fae6a..ffc7ae39ea5f 100644 --- a/cvat/apps/engine/location.py +++ b/cvat/apps/engine/location.py @@ -56,7 +56,11 @@ def get_location_configuration( return ( LocationConfig(is_default=True, location=Location.LOCAL) if storage is None - else LocationConfig(is_default=True, location=storage.location, cloud_storage_id=storage.cloud_storage_id) + else LocationConfig( + is_default=True, + location=storage.location, + cloud_storage_id=storage.cloud_storage_id, + ) ) return LocationConfig( diff --git a/cvat/apps/events/views.py b/cvat/apps/events/views.py index d0a77fb480e2..22de4cfb76eb 100644 --- a/cvat/apps/events/views.py +++ b/cvat/apps/events/views.py @@ -4,9 +4,8 @@ from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema -from rest_framework import status, viewsets +from rest_framework import serializers, status, viewsets from rest_framework.decorators import action -from rest_framework import serializers from rest_framework.renderers import JSONRenderer from rest_framework.response import Response diff --git a/cvat/apps/redis_handler/background.py b/cvat/apps/redis_handler/background.py index 9fb3797c6bdf..29a717263cf3 100644 --- a/cvat/apps/redis_handler/background.py +++ b/cvat/apps/redis_handler/background.py @@ -16,9 +16,8 @@ from django.http.response import HttpResponseBadRequest from django.utils import timezone from django_rq.queues import DjangoRQ, DjangoScheduler -from rest_framework import status +from rest_framework import serializers, status from rest_framework.response import Response -from rest_framework import serializers from rq.job import Job as RQJob from rq.job import JobStatus as RQJobStatus From 06ee6379ccb81b17caba50516d28a457605f97c3 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 23 Apr 2025 22:41:28 +0200 Subject: [PATCH 093/103] t --- cvat/apps/redis_handler/rq.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cvat/apps/redis_handler/rq.py b/cvat/apps/redis_handler/rq.py index 15668a70fbdb..15b82d50b4d1 100644 --- a/cvat/apps/redis_handler/rq.py +++ b/cvat/apps/redis_handler/rq.py @@ -191,6 +191,8 @@ def parse( result = actual_cls(**dict_repr) return (result, queue) + except AssertionError: + raise except Exception as ex: raise IncorrectRequestIdError from ex From f89a73b7eb661c04420479e7fefe0f5697b0b5b6 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 24 Apr 2025 11:48:53 +0200 Subject: [PATCH 094/103] remove comment --- cvat/apps/engine/task.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 020ef1229a51..ee13c8fc8365 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -57,9 +57,6 @@ slogger = ServerLogManager(__name__) - -############################# Internal implementation for server API - JobFileMapping = list[list[str]] class SegmentParams(NamedTuple): From 38bac3f4960014df20229e2b4363296f8ea089fd Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 24 Apr 2025 11:58:16 +0200 Subject: [PATCH 095/103] Update test_cannot_export_backup_for_task_without_data --- cvat/apps/engine/background.py | 11 +++++------ tests/python/rest_api/test_requests.py | 2 +- tests/python/rest_api/test_tasks.py | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 5686aacfb5f4..8d9e49057089 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -186,13 +186,12 @@ def get_result_endpoint_url(self) -> str: class BackupExporter(AbstractExporter): SUPPORTED_TARGETS = {RequestTarget.PROJECT, RequestTarget.TASK} - # def validate_request(self): - # super().validate_request() + def validate_request(self): + super().validate_request() - # if isinstance(self.db_instance, Task) and self.db_instance.data is None: - # raise serializers.ValidationError("Backup of a task without data is not allowed") - # elif isinstance(self.db_instance, Project) and Data.objects.filter(): - # pass + # do not add this check when a project is backed up, as empty tasks are skipped + if isinstance(self.db_instance, Task) and not self.db_instance.data: + raise serializers.ValidationError("Backup of a task without data is not allowed") def validate_request_id(self, request_id, /) -> None: # FUTURE-TODO: optimize, request_id is parsed 2 times (first one when checking permissions) diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index 2f8cf9da1be7..65fca7c64710 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -495,7 +495,7 @@ def build_legacy_id_for_import_request( def test_can_retrieve_task_creation_requests_using_legacy_ids(self, admin_user: str): task_id = create_task( admin_user, - spec={"name": f"Test task", "labels": [{"name": "car"}]}, + spec={"name": "Test task", "labels": [{"name": "car"}]}, data={ "image_quality": 75, "client_files": generate_image_files(2), diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index 3f2339774589..0658dd605bef 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -4116,8 +4116,8 @@ def test_cannot_export_backup_for_task_without_data(self, tasks): with pytest.raises(ApiException) as exc: self._test_can_export_backup(task_id) - assert exc.status == HTTPStatus.BAD_REQUEST - assert "Backup of a task without data is not allowed" == exc.body.encode() + assert exc.value.status == HTTPStatus.BAD_REQUEST + assert "Backup of a task without data is not allowed" == exc.value.body.encode() @pytest.mark.with_external_services def test_can_export_and_import_backup_task_with_cloud_storage(self, tasks): From 30fc0dc07c200a644150d270d734d889830e65b6 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 24 Apr 2025 12:28:21 +0200 Subject: [PATCH 096/103] Add FileId class with attribute declaration --- cvat/apps/dataset_manager/util.py | 38 +++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/cvat/apps/dataset_manager/util.py b/cvat/apps/dataset_manager/util.py index 34c1e5c2b4c2..4e938ce70f5e 100644 --- a/cvat/apps/dataset_manager/util.py +++ b/cvat/apps/dataset_manager/util.py @@ -15,7 +15,7 @@ from datetime import timedelta from enum import Enum from threading import Lock -from typing import Any +from typing import Any, Protocol from uuid import UUID import attrs @@ -109,6 +109,9 @@ def get_export_cache_lock( class OperationType(str, Enum): EXPORT = "export" + def __str__(self): + return self.value + class ExportFileType(str, Enum): ANNOTATIONS = "annotations" @@ -120,6 +123,9 @@ class ExportFileType(str, Enum): def values(cls) -> list[str]: return list(map(lambda x: x.value, cls)) + def __str__(self): + return self.value + class InstanceType(str, Enum): PROJECT = "project" TASK = "task" @@ -129,30 +135,34 @@ class InstanceType(str, Enum): def values(cls) -> list[str]: return list(map(lambda x: x.value, cls)) + def __str__(self): + return self.value -@attrs.define(kw_only=True) -class SimpleFileId: - value: str = attrs.field(converter=str) +class FileId(Protocol): + value: str -@attrs.define(kw_only=True) -class ConstructedFileId(SimpleFileId): - value: str = attrs.field(init=False) +@attrs.frozen(kw_only=True) +class SimpleFileId(FileId): + value: str = attrs.field() - instance_type: InstanceType = attrs.field(converter=InstanceType, on_setattr=attrs.setters.frozen) - instance_id: int = attrs.field(converter=int, on_setattr=attrs.setters.frozen) - instance_timestamp: float = attrs.field(converter=float, on_setattr=attrs.setters.frozen) +@attrs.frozen(kw_only=True) +class ConstructedFileId(FileId): + instance_type: InstanceType = attrs.field(converter=InstanceType) + instance_id: int = attrs.field(converter=int) + instance_timestamp: float = attrs.field(converter=float) - def __attrs_post_init__(self): - self.value = "-".join(map(str, [self.instance_type, self.instance_id, self.instance_timestamp])) + @property + def value(self): + return "-".join(map(str, [self.instance_type, self.instance_id, self.instance_timestamp])) -@attrs.frozen +@attrs.frozen(kw_only=True) class ParsedExportFilename: file_type: ExportFileType = attrs.field(converter=ExportFileType) file_ext: str file_id: SimpleFileId = attrs.field(validator=attrs.validators.instance_of(SimpleFileId)) -@attrs.frozen +@attrs.frozen(kw_only=True) class ParsedExportFilenameWithConstructedId(ParsedExportFilename): file_id: ConstructedFileId = attrs.field(validator=attrs.validators.instance_of(ConstructedFileId)) From 719dda2737b3ab9d2bf1cc7c88fdebd2401a8848 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 24 Apr 2025 13:22:47 +0200 Subject: [PATCH 097/103] t --- tests/python/rest_api/test_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index 0658dd605bef..337e3769c6d8 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -4117,7 +4117,7 @@ def test_cannot_export_backup_for_task_without_data(self, tasks): self._test_can_export_backup(task_id) assert exc.value.status == HTTPStatus.BAD_REQUEST - assert "Backup of a task without data is not allowed" == exc.value.body.encode() + assert "Backup of a task without data is not allowed" in exc.value.body.decode() @pytest.mark.with_external_services def test_can_export_and_import_backup_task_with_cloud_storage(self, tasks): From f89f7cf70ce6e98fc4295715da293aae446f5945 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 28 Apr 2025 12:33:43 +0200 Subject: [PATCH 098/103] [REST API tests] add missing test && reduce code duplication --- tests/python/rest_api/test_requests.py | 164 +++++++++++++++++-------- tests/python/rest_api/utils.py | 27 ++++ 2 files changed, 139 insertions(+), 52 deletions(-) diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index 65fca7c64710..00a6a382edf9 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -5,6 +5,7 @@ import io import json from http import HTTPStatus +from typing import Optional from urllib.parse import parse_qsl, urlparse import pytest @@ -25,7 +26,10 @@ export_project_dataset, export_task_backup, export_task_dataset, + import_job_annotations, import_project_backup, + import_project_dataset, + import_task_annotations, import_task_backup, wait_background_request, ) @@ -365,19 +369,44 @@ def test_non_owner_cannot_retrieve_request(self, find_users, projects): with make_api_client(malefactor["username"]) as malefactor_client: self._test_get_request_403(malefactor_client, request_id) + def _test_get_request_using_legacy_id( + self, + legacy_request_id: str, + username: str, + *, + action: str, + target_type: str, + subresource: Optional[str] = None, + ): + with make_api_client(username) as api_client: + bg_requests, _ = api_client.requests_api.list( + target=target_type, + action=action, + **({"subresource": subresource} if subresource else {}), + ) + assert len(bg_requests.results) == 1 + request_id = bg_requests.results[0].id + bg_request = self._test_get_request_200( + api_client, legacy_request_id, validate_rq_id=False + ) + assert bg_request.id == request_id + @pytest.mark.parametrize("target_type", ("project", "task", "job")) @pytest.mark.parametrize("save_images", (True, False)) - @pytest.mark.parametrize("format_name", ("CVAT for images 1.1",)) - def test_can_retrieve_dataset_import_export_requests_using_legacy_ids( # todo: + @pytest.mark.parametrize("export_format", ("CVAT for images 1.1",)) + @pytest.mark.parametrize("import_format", ("CVAT 1.1",)) + def test_can_retrieve_dataset_import_export_requests_using_legacy_ids( self, target_type: str, save_images: bool, - format_name: str, + export_format: str, + import_format: str, projects, tasks, jobs, ): - def build_legacy_request_id( + def build_legacy_id_for_export_request( + *, target_type: str, target_id: int, subresource: str, @@ -386,43 +415,80 @@ def build_legacy_request_id( ): return f"export:{target_type}-{target_id}-{subresource}-in-{format_name.replace(' ', '_').replace('.', '@')}-format-by-{user_id}" + def build_legacy_id_for_import_request( + *, + target_type: str, + target_id: int, + subresource: str, + ): + return f"import:{target_type}-{target_id}-{subresource}" + if target_type == "project": - export_func = export_project_dataset + export_func, import_func = export_project_dataset, import_project_dataset target = next(iter(projects)) owner = target["owner"] elif target_type == "task": - export_func = export_task_dataset + export_func, import_func = export_task_dataset, import_task_annotations target = next(iter(tasks)) owner = target["owner"] else: assert target_type == "job" - export_func = export_job_dataset + export_func, import_func = export_job_dataset, import_job_annotations target = next(iter(jobs)) owner = tasks[target["task_id"]]["owner"] - request_id = export_func( + target_id = target["id"] + subresource = "dataset" if save_images else "annotations" + file_content = io.BytesIO( + export_func( + owner["username"], + save_images=save_images, + format=export_format, + id=target_id, + ) + ) + file_content.name = "file.zip" + + legacy_request_id = build_legacy_id_for_export_request( + target_type=target_type, + target_id=target["id"], + subresource=subresource, + format_name=export_format, + user_id=owner["id"], + ) + + self._test_get_request_using_legacy_id( + legacy_request_id, owner["username"], - save_images=save_images, - format=format_name, - id=target["id"], - download_result=False, - return_request_id=True, + action="export", + target_type=target_type, + subresource=subresource, ) - legacy_request_id = build_legacy_request_id( - target_type, - target["id"], - "dataset" if save_images else "annotations", - format_name, - owner["id"], + # check import requests + if not save_images and target_type == "project" or save_images and target_type != "project": + # skip: + # importing annotations into a project + # importing datasets into a task or job + return + + import_func( + owner["username"], + file_content=file_content, + id=target_id, + format=import_format, ) - with make_api_client(owner["username"]) as owner_client: - self._test_get_request_200(owner_client, request_id) - bg_request = self._test_get_request_200( - owner_client, legacy_request_id, validate_rq_id=False - ) - assert bg_request.id == request_id + legacy_request_id = build_legacy_id_for_import_request( + target_type=target_type, target_id=target_id, subresource=subresource + ) + self._test_get_request_using_legacy_id( + legacy_request_id, + owner["username"], + action="import", + target_type=target_type, + subresource=subresource, + ) @pytest.mark.parametrize("target_type", ("project", "task")) def test_can_retrieve_backup_import_export_requests_using_legacy_ids( @@ -432,6 +498,7 @@ def test_can_retrieve_backup_import_export_requests_using_legacy_ids( tasks, ): def build_legacy_id_for_export_request( + *, target_type: str, target_id: int, user_id: int, @@ -439,6 +506,7 @@ def build_legacy_id_for_export_request( return f"export:{target_type}-{target_id}-backup-by-{user_id}" def build_legacy_id_for_import_request( + *, target_type: str, uuid_: str, ): @@ -464,18 +532,15 @@ def build_legacy_id_for_import_request( backup_file.name = "file.zip" legacy_request_id = build_legacy_id_for_export_request( - target_type, target["id"], owner["id"] + target_type=target_type, target_id=target["id"], user_id=owner["id"] + ) + self._test_get_request_using_legacy_id( + legacy_request_id, + owner["username"], + action="export", + target_type=target_type, + subresource="backup", ) - - with make_api_client(owner["username"]) as api_client: - paginated_list, _ = api_client.requests_api.list(action="export", target=target_type) - assert len(paginated_list.results) == 1 - request_id = paginated_list.results[0].id - - bg_request = self._test_get_request_200( - api_client, legacy_request_id, validate_rq_id=False - ) - assert bg_request.id == request_id # check import requests result_id = import_func( @@ -483,14 +548,16 @@ def build_legacy_id_for_import_request( file_content=backup_file, ).id legacy_request_id = build_legacy_id_for_import_request( - target_type, dict(parse_qsl(result_id))["id"] + target_type=target_type, uuid_=dict(parse_qsl(result_id))["id"] ) - with make_api_client(owner["username"]) as api_client: - bg_request = self._test_get_request_200( - api_client, legacy_request_id, validate_rq_id=False - ) - assert bg_request.id == result_id + self._test_get_request_using_legacy_id( + legacy_request_id, + owner["username"], + action="import", + target_type=target_type, + subresource="backup", + ) def test_can_retrieve_task_creation_requests_using_legacy_ids(self, admin_user: str): task_id = create_task( @@ -504,16 +571,9 @@ def test_can_retrieve_task_creation_requests_using_legacy_ids(self, admin_user: )[0] legacy_request_id = f"create:task-{task_id}" - - with make_api_client(admin_user) as api_client: - paginated_list, _ = api_client.requests_api.list(action="create", target="task") - assert len(paginated_list.results) == 1 - request_id = paginated_list.results[0].id - - bg_request = self._test_get_request_200( - api_client, legacy_request_id, validate_rq_id=False - ) - assert bg_request.id == request_id + self._test_get_request_using_legacy_id( + legacy_request_id, admin_user, action="create", target_type="task" + ) def test_can_retrieve_quality_calculation_requests_using_legacy_ids(self, jobs, tasks): gt_job = next( diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 75d2f8c0654e..f5ff47a7a07b 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -268,6 +268,33 @@ def import_task_backup(username: str, file_content: BytesIO, **kwargs): ) +def import_project_dataset(username: str, file_content: BytesIO, **kwargs): + with make_api_client(username) as api_client: + return import_resource( + api_client.projects_api.create_dataset_endpoint, + dataset_file_request={"dataset_file": file_content}, + **kwargs, + ) + + +def import_task_annotations(username: str, file_content: BytesIO, **kwargs): + with make_api_client(username) as api_client: + return import_resource( + api_client.tasks_api.create_annotations_endpoint, + annotation_file_request={"annotation_file": file_content}, + **kwargs, + ) + + +def import_job_annotations(username: str, file_content: BytesIO, **kwargs): + with make_api_client(username) as api_client: + return import_resource( + api_client.jobs_api.create_annotations_endpoint, + annotation_file_request={"annotation_file": file_content}, + **kwargs, + ) + + FieldPath = Sequence[Union[str, Callable]] From 6c93d3570d3b33f09aa9174aed5619dfbe8534ee Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 29 Apr 2025 12:32:50 +0200 Subject: [PATCH 099/103] Use deprecate_response func --- cvat/apps/engine/view_utils.py | 21 ++++----------------- cvat/apps/engine/views.py | 2 +- cvat/apps/events/export.py | 26 +++++++++++++++----------- cvat/apps/quality_control/views.py | 28 +++++++++++++++++----------- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/cvat/apps/engine/view_utils.py b/cvat/apps/engine/view_utils.py index 4ebe944d167a..2503c0f0b226 100644 --- a/cvat/apps/engine/view_utils.py +++ b/cvat/apps/engine/view_utils.py @@ -112,20 +112,7 @@ def get_410_response_when_checking_process_status(process_type: str, /) -> HttpR where rq_id is obtained from the response of the initializing request. """)) -class DeprecatedResponse(Response): - def __init__(self, - data=None, - status=None, - template_name=None, - headers=None, - exception=False, - content_type=None, - *, - deprecation_date: datetime, - ): - headers = headers or {} - # https://www.rfc-editor.org/rfc/rfc9745 - deprecation_timestamp = int(deprecation_date.timestamp()) - headers["Deprecation"] = f"@{deprecation_timestamp}" - - super().__init__(data, status, template_name, headers, exception, content_type) +def deprecate_response(response: Response, *, deprecation_date: datetime) -> None: + # https://www.rfc-editor.org/rfc/rfc9745 + deprecation_timestamp = int(deprecation_date.timestamp()) + response.headers["Deprecation"] = f"@{deprecation_timestamp}" diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index cdc03d250441..3e5f011b579f 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -814,7 +814,7 @@ def get_queryset(self): serializer_class=None, parser_classes=_UPLOAD_PARSER_CLASSES) def import_backup(self, request: ExtendedRequest): - if request.query_params.get("rq_id"): + if "rq_id" in request.query_params: return get_410_response_when_checking_process_status("import") return self.upload_data(request) diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 2d98793e9193..9ac4d383476d 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -12,6 +12,7 @@ from django.conf import settings from django.utils import timezone from rest_framework import serializers, status +from rest_framework.response import Response from rest_framework.reverse import reverse from cvat.apps.dataset_manager.util import ExportCacheManager @@ -21,7 +22,7 @@ from cvat.apps.engine.rq import ExportRequestId, RQMetaWithFailureInfo from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import sendfile -from cvat.apps.engine.view_utils import DeprecatedResponse +from cvat.apps.engine.view_utils import deprecate_response from cvat.apps.events.permissions import EventsPermission from cvat.apps.redis_handler.background import AbstractExporter @@ -202,24 +203,27 @@ def export(request: ExtendedRequest): return sendfile(request, file_path, attachment=True, attachment_filename=filename) else: if os.path.exists(file_path): - return DeprecatedResponse( - status=status.HTTP_201_CREATED, deprecation_date=deprecation_date - ) + response = Response(status=status.HTTP_201_CREATED) + deprecate_response(response, deprecation_date=deprecation_date) + return response + elif rq_job.is_failed: rq_job_meta = RQMetaWithFailureInfo.for_job(rq_job) exc_info = rq_job_meta.formatted_exception or str(rq_job.exc_info) rq_job.delete() - return DeprecatedResponse( + response = Response( exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR, - deprecation_date=deprecation_date, ) + deprecate_response(response, deprecation_date=deprecation_date) + return response else: - return DeprecatedResponse( + response = Response( data=response_data, status=status.HTTP_202_ACCEPTED, - deprecation_date=deprecation_date, ) + deprecate_response(response, deprecation_date=deprecation_date) + return response manager.init_request_args() # request validation is missed here since exporting to a cloud_storage is disabled @@ -227,6 +231,6 @@ def export(request: ExtendedRequest): manager.init_callback_with_params() manager.setup_new_job(queue, request_id) - return DeprecatedResponse( - data=response_data, status=status.HTTP_202_ACCEPTED, deprecation_date=deprecation_date - ) + response = Response(data=response_data, status=status.HTTP_202_ACCEPTED) + deprecate_response(response, deprecation_date=deprecation_date) + return response diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 0d71940464a2..8f60bf5ee088 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -18,6 +18,7 @@ from rest_framework import mixins, status, viewsets from rest_framework.decorators import action from rest_framework.exceptions import NotFound, ValidationError +from rest_framework.response import Response from rq.job import JobStatus as RqJobStatus from cvat.apps.engine.mixins import PartialUpdateModelMixin @@ -25,7 +26,7 @@ from cvat.apps.engine.rq import BaseRQMeta from cvat.apps.engine.types import ExtendedRequest from cvat.apps.engine.utils import get_server_url -from cvat.apps.engine.view_utils import DeprecatedResponse +from cvat.apps.engine.view_utils import deprecate_response from cvat.apps.quality_control import quality_reports as qc from cvat.apps.quality_control.models import ( AnnotationConflict, @@ -304,22 +305,24 @@ def create(self, request: ExtendedRequest, *args, **kwargs): .allow ): # We should not provide job existence information to unauthorized users - return DeprecatedResponse( + response = Response( "Unknown request id", status=status.HTTP_404_NOT_FOUND, - deprecation_date=deprecation_date, ) + deprecate_response(response, deprecation_date=deprecation_date) + return response rq_job_status = rq_job.get_status(refresh=False) if rq_job_status == RqJobStatus.FAILED: message = str(rq_job.exc_info) rq_job.delete() - return DeprecatedResponse( + response = Response( message, status=status.HTTP_500_INTERNAL_SERVER_ERROR, - deprecation_date=deprecation_date, ) + deprecate_response(response, deprecation_date=deprecation_date) + return response elif rq_job_status in ( RqJobStatus.QUEUED, @@ -327,32 +330,35 @@ def create(self, request: ExtendedRequest, *args, **kwargs): RqJobStatus.SCHEDULED, RqJobStatus.DEFERRED, ): - return DeprecatedResponse( + response = Response( serializer.data, status=status.HTTP_202_ACCEPTED, - deprecation_date=deprecation_date, ) + deprecate_response(response, deprecation_date=deprecation_date) + return response elif rq_job_status == RqJobStatus.FINISHED: return_value = rq_job.return_value() rq_job.delete() if not return_value: - return DeprecatedResponse( + response = Response( "No report has been computed", status=status.HTTP_500_INTERNAL_SERVER_ERROR, - deprecation_date=deprecation_date, ) + deprecate_response(response, deprecation_date=deprecation_date) + return response report = self.get_queryset().get(pk=return_value) report_serializer = QualityReportSerializer( instance=report, context={"request": request} ) - return DeprecatedResponse( + response = Response( data=report_serializer.data, status=status.HTTP_201_CREATED, headers=self.get_success_headers(report_serializer.data), - deprecation_date=deprecation_date, ) + deprecate_response(response, deprecation_date=deprecation_date) + return response raise AssertionError(f"Unexpected rq job '{rq_id}' status '{rq_job_status}'") From 6d9e5c221a2a3678658676b5148ff6c78a84409f Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 29 Apr 2025 13:00:42 +0200 Subject: [PATCH 100/103] empty commit: check notifications From 8bc0b7dca22c1537832a00221f3a23c1a1974d22 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 29 Apr 2025 16:19:25 +0200 Subject: [PATCH 101/103] Remove return_request_id arg --- tests/python/rest_api/test_requests.py | 2 - tests/python/rest_api/utils.py | 54 +++++++++++--------------- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index 00a6a382edf9..797307e3c60e 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -323,7 +323,6 @@ def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, p save_images=save_images, id=project["id"], download_result=False, - return_request_id=True, format=format_name, ) @@ -364,7 +363,6 @@ def test_non_owner_cannot_retrieve_request(self, find_users, projects): save_images=True, id=project["id"], download_result=False, - return_request_id=True, ) with make_api_client(malefactor["username"]) as malefactor_client: self._test_get_request_403(malefactor_client, request_id) diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index f5ff47a7a07b..822dd97dfc52 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -72,26 +72,19 @@ def wait_and_download_v2( *, max_retries: int = 50, interval: float = 0.1, - download_result: bool = True, -) -> Optional[bytes]: +) -> bytes: background_request, _ = wait_background_request( api_client, rq_id, max_retries=max_retries, interval=interval ) - if not download_result: - return None - - # return downloaded file in case of local downloading or None otherwise - if download_result and background_request.result_url: - response = requests.get( - background_request.result_url, - auth=(api_client.configuration.username, api_client.configuration.password), - ) - assert response.status_code == HTTPStatus.OK, f"Status: {response.status_code}" - - return response.content - - return None + # return downloaded file in case of local downloading + assert background_request.result_url + response = requests.get( + background_request.result_url, + auth=(api_client.configuration.username, api_client.configuration.password), + ) + assert response.status_code == HTTPStatus.OK, f"Status: {response.status_code}" + return response.content def export_v2( @@ -102,9 +95,8 @@ def export_v2( expect_forbidden: bool = False, wait_result: bool = True, download_result: bool = True, - return_request_id: bool = False, **kwargs, -) -> Optional[Union[bytes, str]]: +) -> Union[bytes, str]: """Export datasets|annotations|backups using the second version of export API Args: @@ -116,27 +108,27 @@ def export_v2( Returns: bytes: The content of the file if downloaded locally. - None: If `wait_result` or `download_result` were False or the file is downloaded to cloud storage. - str: If `download_result` was False and `return_request_id` was True. + str: If `wait_result` or `download_result` were False. """ # initialize background process and ensure that the first request returns 403 code if request should be forbidden rq_id = initialize_export(endpoint, expect_forbidden=expect_forbidden, **kwargs) if not wait_result: - return None + return rq_id # check status of background process - result = wait_and_download_v2( - endpoint.api_client, - rq_id, - max_retries=max_retries, - interval=interval, - download_result=download_result, - ) - if not download_result and return_request_id: - return rq_id + if download_result: + return wait_and_download_v2( + endpoint.api_client, + rq_id, + max_retries=max_retries, + interval=interval, + ) - return result + background_request, _ = wait_background_request( + endpoint.api_client, rq_id, max_retries=max_retries, interval=interval + ) + return background_request.id def export_dataset( From 1b9a9cd72a5088f597ee02c3a18c88cf50ee1e55 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 29 Apr 2025 16:30:28 +0200 Subject: [PATCH 102/103] Remove ParsedExportFilenameWithConstructedId class --- cvat/apps/dataset_manager/util.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/cvat/apps/dataset_manager/util.py b/cvat/apps/dataset_manager/util.py index 4e938ce70f5e..49d55ba885b6 100644 --- a/cvat/apps/dataset_manager/util.py +++ b/cvat/apps/dataset_manager/util.py @@ -160,11 +160,7 @@ def value(self): class ParsedExportFilename: file_type: ExportFileType = attrs.field(converter=ExportFileType) file_ext: str - file_id: SimpleFileId = attrs.field(validator=attrs.validators.instance_of(SimpleFileId)) - -@attrs.frozen(kw_only=True) -class ParsedExportFilenameWithConstructedId(ParsedExportFilename): - file_id: ConstructedFileId = attrs.field(validator=attrs.validators.instance_of(ConstructedFileId)) + file_id: FileId class TmpDirManager: @@ -303,7 +299,7 @@ def make_file_path( @classmethod def parse_filename( cls, filename: str, - ) -> ParsedExportFilename | ParsedExportFilenameWithConstructedId: + ) -> ParsedExportFilename: basename, file_ext = osp.splitext(filename) file_ext = file_ext.strip(".").lower() @@ -340,7 +336,7 @@ def parse_filename( # no need to use it after filename parsing, so just drop it. instance_timestamp, _ = unparsed.split(cls.SPLITTER, maxsplit=1) - parsed_file_name = ParsedExportFilenameWithConstructedId( + parsed_file_name = ParsedExportFilename( file_type=fragments.pop("file_type"), file_id=ConstructedFileId( instance_timestamp=instance_timestamp, From 678ec1b18e456f9ef17aed031f969b954267f5de Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 29 Apr 2025 16:53:48 +0200 Subject: [PATCH 103/103] update changelog --- ...1_maria_reusable_requests_functionality.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 changelog.d/20250429_163951_maria_reusable_requests_functionality.md diff --git a/changelog.d/20250429_163951_maria_reusable_requests_functionality.md b/changelog.d/20250429_163951_maria_reusable_requests_functionality.md new file mode 100644 index 000000000000..d136d0f8928d --- /dev/null +++ b/changelog.d/20250429_163951_maria_reusable_requests_functionality.md @@ -0,0 +1,36 @@ +### Removed + +- The `POST /api/consensus/merges?rq_id=rq_id` endpoint no longer supports + process status checking + () +- The `GET /api/projects/id/dataset?action=import_status` endpoint no longer + supports process status checking + () +- The `POST /api/projects/backup?rq_id=rq_id` endpoint no longer supports + process status checking + () +- The `POST /api/tasks/backup?rq_id=rq_id` endpoint no longer supports + process status checking + () +- The `PUT /api/tasks/id/annotations?rq_id=rq_id&format=format` endpoint + no longer supports process status checking + () +- The `PUT /api/jobs/id/annotations?rq_id=rq_id&format=format` endpoint + no longer supports process status checking + () + +### Deprecated + +- The `GET /api/events` endpoint is deprecated in favor of the `POST /api/events/export`, + `GET /api/requests/rq_id`, and `GET result_url`, where `result_url` is obtained from + background request details + () +- The `POST /api/quality/reports/rq_id=rq_id` is deprecated in favor of + `GET /api/requests/rq_id` + () + +### Changed +- Cache files with exported events now are stored in `/data/cache/export/` instead of + `/data/tmp/`. These files are periodically deleted by the + `cleanup_export_cache_directory` cron job + ()