PyPI - supervisely - Versions diffs - 6.73.357__py3-none-any.whl → 6.73.358__py3-none-any.whl - Mend

supervisely 6.73.357py3-none-any.whl → 6.73.358py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

supervisely/_utils.py +12 -0
supervisely/api/annotation_api.py +3 -0
supervisely/api/api.py +2 -2
supervisely/api/app_api.py +27 -2
supervisely/api/entity_annotation/tag_api.py +0 -1
supervisely/api/nn/__init__.py +0 -0
supervisely/api/nn/deploy_api.py +821 -0
supervisely/api/nn/neural_network_api.py +248 -0
supervisely/api/task_api.py +26 -467
supervisely/app/fastapi/subapp.py +1 -0
supervisely/nn/__init__.py +2 -1
supervisely/nn/artifacts/artifacts.py +5 -5
supervisely/nn/benchmark/object_detection/metric_provider.py +3 -0
supervisely/nn/experiments.py +28 -5
supervisely/nn/inference/cache.py +178 -114
supervisely/nn/inference/gui/gui.py +18 -35
supervisely/nn/inference/gui/serving_gui.py +3 -1
supervisely/nn/inference/inference.py +1421 -1265
supervisely/nn/inference/inference_request.py +412 -0
supervisely/nn/inference/object_detection_3d/object_detection_3d.py +31 -24
supervisely/nn/inference/session.py +2 -2
supervisely/nn/inference/tracking/base_tracking.py +45 -79
supervisely/nn/inference/tracking/bbox_tracking.py +220 -155
supervisely/nn/inference/tracking/mask_tracking.py +274 -250
supervisely/nn/inference/tracking/tracker_interface.py +23 -0
supervisely/nn/inference/uploader.py +164 -0
supervisely/nn/model/__init__.py +0 -0
supervisely/nn/model/model_api.py +259 -0
supervisely/nn/model/prediction.py +311 -0
supervisely/nn/model/prediction_session.py +632 -0
supervisely/nn/tracking/__init__.py +1 -0
supervisely/nn/tracking/boxmot.py +114 -0
supervisely/nn/tracking/tracking.py +24 -0
supervisely/nn/training/train_app.py +61 -19
supervisely/nn/utils.py +43 -3
supervisely/task/progress.py +12 -2
supervisely/video/video.py +107 -1
{supervisely-6.73.357.dist-info → supervisely-6.73.358.dist-info}/METADATA +2 -1
{supervisely-6.73.357.dist-info → supervisely-6.73.358.dist-info}/RECORD +43 -32
supervisely/api/neural_network_api.py +0 -202
{supervisely-6.73.357.dist-info → supervisely-6.73.358.dist-info}/LICENSE +0 -0
{supervisely-6.73.357.dist-info → supervisely-6.73.358.dist-info}/WHEEL +0 -0
{supervisely-6.73.357.dist-info → supervisely-6.73.358.dist-info}/entry_points.txt +0 -0
{supervisely-6.73.357.dist-info → supervisely-6.73.358.dist-info}/top_level.txt +0 -0

supervisely/nn/inference/inference.py CHANGED Viewed

@@ -1,21 +1,22 @@
+from __future__ import annotations
 import argparse
 import asyncio
 import inspect
 import json
 import os
 import re
+import shutil
 import subprocess
-import sys
+import tempfile
 import threading
 import time
-import uuid
 from collections import OrderedDict, defaultdict
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import asdict, dataclass
 from functools import partial, wraps
 from pathlib import Path
-from queue import Queue
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 from urllib.request import urlopen
 import numpy as np
@@ -25,6 +26,7 @@ import yaml
 from fastapi import Form, HTTPException, Request, Response, UploadFile, status
 from fastapi.responses import JSONResponse
 from requests.structures import CaseInsensitiveDict
+from tqdm import tqdm
 import supervisely.app.development as sly_app_development
 import supervisely.imaging.image as sly_image
@@ -32,7 +34,7 @@ import supervisely.io.env as sly_env
 import supervisely.io.fs as sly_fs
 import supervisely.io.json as sly_json
 import supervisely.nn.inference.gui as GUI
-from supervisely import DatasetInfo, ProjectInfo, VideoAnnotation, batched
+from supervisely import DatasetInfo, batched
 from supervisely._utils import (
     add_callback,
     get_filename_from_headers,
@@ -49,8 +51,7 @@ from supervisely.annotation.tag_meta import TagMeta, TagValueType
 from supervisely.api.api import Api, ApiField
 from supervisely.api.app_api import WorkflowMeta, WorkflowSettings
 from supervisely.api.image_api import ImageInfo
-from supervisely.app.content import StateJson, get_data_dir
-from supervisely.app.exceptions import DialogWindowError
+from supervisely.app.content import get_data_dir
 from supervisely.app.fastapi.subapp import (
     Application,
     call_on_autostart,
@@ -68,7 +69,13 @@ from supervisely.geometry.any_geometry import AnyGeometry
 from supervisely.imaging.color import get_predefined_colors
 from supervisely.io.fs import list_files
 from supervisely.nn.inference.cache import InferenceImageCache
-from supervisely.nn.prediction_dto import Prediction
+from supervisely.nn.inference.inference_request import (
+    InferenceRequest,
+    InferenceRequestsManager,
+)
+from supervisely.nn.inference.uploader import Uploader
+from supervisely.nn.model.model_api import Prediction
+from supervisely.nn.prediction_dto import Prediction as PredictionDTO
 from supervisely.nn.utils import (
     CheckpointInfo,
     DeployInfo,
@@ -76,13 +83,15 @@ from supervisely.nn.utils import (
     ModelSource,
     RuntimeType,
     _get_model_name,
+    get_gpu_usage,
+    get_ram_usage,
 )
 from supervisely.project import ProjectType
 from supervisely.project.download import download_to_cache, read_from_cached_project
 from supervisely.project.project_meta import ProjectMeta
 from supervisely.sly_logger import logger
 from supervisely.task.progress import Progress
-from supervisely.video.video import ALLOWED_VIDEO_EXTENSIONS
+from supervisely.video.video import ALLOWED_VIDEO_EXTENSIONS, VideoFrameReader
 try:
     from typing import Literal
@@ -283,6 +292,8 @@ class Inference:
             log_progress=True,
         )
+        self.inference_requests_manager = InferenceRequestsManager(executor=self._executor)
     def get_batch_size(self):
         if self.max_batch_size is not None:
             return min(self.DEFAULT_BATCH_SIZE, self.max_batch_size)
@@ -595,10 +606,55 @@ class Inference:
     def _checkpoints_cache_dir(self):
         return os.path.join(os.path.expanduser("~"), ".cache", "supervisely", "checkpoints")
+    def _build_deploy_params_from_api(self, model_name: str, deploy_params: dict = None) -> dict:
+        if deploy_params is None:
+            deploy_params = {}
+        selected_model = None
+        for model in self.pretrained_models:
+            if model["meta"]["model_name"].lower() == model_name.lower():
+                selected_model = model
+                break
+        if selected_model is None:
+            raise ValueError(f"Model {model_name} not found in models.json of serving app")
+        deploy_params["model_files"] = selected_model["meta"]["model_files"]
+        deploy_params["model_info"] = selected_model
+        return deploy_params
+    def _build_legacy_deploy_params_from_api(self, model_name: str) -> dict:
+        selected_model = None
+        if hasattr(self, "pretrained_models_table"):
+            selected_model = self.pretrained_models_table.get_by_model_name(model_name)
+            if selected_model is None:
+                # @TODO: Improve error message
+                raise ValueError("This app doesn't support new deploy api")
+            self.pretrained_models_table.set_by_model_name(model_name)
+            deploy_params = self.pretrained_models_table.get_selected_model_params()
+            return deploy_params
+    # @TODO: method name should be better?
+    def _set_common_deploy_params(self, deploy_params: dict) -> dict:
+        load_model_params = inspect.signature(self.load_model).parameters
+        has_runtime_param = "runtime" in load_model_params
+        if has_runtime_param:
+            if deploy_params.get("runtime", None) is None:
+                deploy_params["runtime"] = RuntimeType.PYTORCH
+        if deploy_params.get("device", None) is None:
+            deploy_params["device"] = "cuda:0" if get_gpu_count() > 0 else "cpu"
+        return deploy_params
     def _download_model_files(self, deploy_params: dict, log_progress: bool = True) -> dict:
-        if deploy_params["runtime"] != RuntimeType.PYTORCH:
-            export = deploy_params["model_info"].get("export", {})
-            if export is not None:
+        if deploy_params["model_source"] == ModelSource.PRETRAINED:
+            headless = self.gui is None
+            return self._download_pretrained_model(
+                deploy_params["model_files"], log_progress, headless
+            )
+        elif deploy_params["model_source"] == ModelSource.CUSTOM:
+            if deploy_params["runtime"] != RuntimeType.PYTORCH:
+                export = deploy_params["model_info"].get("export", {})
+                if export is None:
+                    export = {}
                 export_model = export.get(deploy_params["runtime"], None)
                 if export_model is not None:
                     if sly_fs.get_file_name(export_model) == sly_fs.get_file_name(
@@ -608,13 +664,11 @@ class Inference:
                             deploy_params["model_info"]["artifacts_dir"] + export_model
                         )
                         logger.info(f"Found model checkpoint for '{deploy_params['runtime']}'")
-        if deploy_params["model_source"] == ModelSource.PRETRAINED:
-            return self._download_pretrained_model(deploy_params["model_files"], log_progress)
-        elif deploy_params["model_source"] == ModelSource.CUSTOM:
             return self._download_custom_model(deploy_params["model_files"], log_progress)
-    def _download_pretrained_model(self, model_files: dict, log_progress: bool = True):
+    def _download_pretrained_model(
+        self, model_files: dict, log_progress: bool = True, headless: bool = False
+    ):
         """
         Downloads the pretrained model data.
         """
@@ -642,26 +696,39 @@ class Inference:
                         continue
                     if log_progress:
-                        with self.gui.download_progress(
-                            message=f"Downloading: '{file_name}'",
-                            total=file_size,
-                            unit="bytes",
-                            unit_scale=True,
-                        ) as download_pbar:
-                            self.gui.download_progress.show()
-                            sly_fs.download(
-                                url=file_url,
-                                save_path=file_path,
-                                progress=download_pbar.update,
-                            )
+                        if not headless:
+                            with self.gui.download_progress(
+                                message=f"Downloading: '{file_name}'",
+                                total=file_size,
+                                unit="bytes",
+                                unit_scale=True,
+                            ) as download_pbar:
+                                self.gui.download_progress.show()
+                                sly_fs.download(
+                                    url=file_url,
+                                    save_path=file_path,
+                                    progress=download_pbar.update,
+                                )
+                        else:
+                            with tqdm(
+                                total=file_size,
+                                unit="bytes",
+                                unit_scale=True,
+                            ) as download_pbar:
+                                logger.info(f"Downloading: '{file_name}'")
+                                sly_fs.download(
+                                    url=file_url, save_path=file_path, progress=download_pbar.update
+                                )
                     else:
+                        logger.info(f"Downloading: '{file_name}'")
                         sly_fs.download(url=file_url, save_path=file_path)
                     local_model_files[file] = file_path
             else:
                 local_model_files[file] = file_url
         if log_progress:
-            self.gui.download_progress.hide()
+            if self.gui is not None:
+                self.gui.download_progress.hide()
         return local_model_files
     def _download_custom_model(self, model_files: dict, log_progress: bool = True):
@@ -732,7 +799,7 @@ class Inference:
             self.gui.show_deployed_model_info(self)
     def load_custom_checkpoint(
-        self, model_files: dict, model_meta: dict, device: str = "cuda", **kwargs
+        self, model_files: dict, model_meta: dict, device: Optional[str] = None, **kwargs
     ):
         """
         Loads local custom model checkpoint.
@@ -886,7 +953,8 @@ class Inference:
         classes = None
         try:
             classes = self.get_classes()
-            num_classes = len(classes)
+            if classes is not None:
+                num_classes = len(classes)
         except NotImplementedError:
             logger.warn(f"get_classes() function not implemented for {type(self)} object.")
         except AttributeError:
@@ -1002,13 +1070,13 @@ class Inference:
             self._model_meta = self._model_meta.add_tag_meta(tag_meta)
         return tag_meta
-    def _create_label(self, dto: Prediction) -> Label:
+    def _create_label(self, dto: PredictionDTO) -> Label:
         raise NotImplementedError("Have to be implemented in child class")
     def _predictions_to_annotation(
         self,
         image_path: Union[str, np.ndarray],
-        predictions: List[Prediction],
+        predictions: List[PredictionDTO],
         classes_whitelist: Optional[List[str]] = None,
     ) -> Annotation:
         labels = []
@@ -1067,6 +1135,15 @@ class Inference:
             logger.error(f"Error in {func.__name__} function: {e}", exc_info=True)
             raise e
+    def api_from_request(self, request) -> Api:
+        """
+        Get API from request. If not found, use self.api.
+        """
+        api = request.state.api
+        if api is None:
+            api = self.api
+        return api
     def _inference_auto(
         self,
         source: List[Union[str, np.ndarray]],
@@ -1117,10 +1194,12 @@ class Inference:
             settings = self._get_inference_settings({})
         if isinstance(source[0], int):
-            ann_jsons = self._inference_batch_ids(
-                self.api, {"batch_ids": source, "settings": settings}
+            results = self.inference_requests_manager.run(
+                self._inference_image_ids, self.api, {"batch_ids": source, "settings": settings}
             )
-            anns = [Annotation.from_json(ann_json, self.model_meta) for ann_json in ann_jsons]
+            anns = [
+                Annotation.from_json(result["annotation"], self.model_meta) for result in results
+            ]
         else:
             anns, _ = self._inference_auto(source, settings)
         if not input_is_list:
@@ -1240,17 +1319,17 @@ class Inference:
         return anns, benchmark
     # pylint: disable=method-hidden
-    def predict(self, image_path: str, settings: Dict[str, Any]) -> List[Prediction]:
+    def predict(self, image_path: str, settings: Dict[str, Any]) -> List[PredictionDTO]:
         raise NotImplementedError("Have to be implemented in child class")
-    def predict_raw(self, image_path: str, settings: Dict[str, Any]) -> List[Prediction]:
+    def predict_raw(self, image_path: str, settings: Dict[str, Any]) -> List[PredictionDTO]:
         raise NotImplementedError(
             "Have to be implemented in child class If sliding_window_mode is 'advanced'."
         )
     def predict_batch(
         self, images_np: List[np.ndarray], settings: Dict[str, Any]
-    ) -> List[List[Prediction]]:
+    ) -> List[List[PredictionDTO]]:
         """Predict batch of images. `images_np` is a list of numpy arrays in RGB format
         If this method is not overridden in a subclass, the following fallback logic works:
@@ -1267,7 +1346,7 @@ class Inference:
     def predict_batch_raw(
         self, images_np: List[np.ndarray], settings: Dict[str, Any]
-    ) -> List[List[Prediction]]:
+    ) -> List[List[PredictionDTO]]:
         """Predict batch of images. `source` is a list of numpy arrays in RGB format"""
         raise NotImplementedError(
             "Have to be implemented in child class If sliding_window_mode is 'advanced'."
@@ -1275,7 +1354,7 @@ class Inference:
     def predict_benchmark(
         self, images_np: List[np.ndarray], settings: dict
-    ) -> Tuple[List[List[Prediction]], dict]:
+    ) -> Tuple[List[List[PredictionDTO]], dict]:
         """
         Inference a batch of images with speedtest benchmarking.
@@ -1318,15 +1397,24 @@ class Inference:
         )
         return is_predict_batch_overridden or is_predict_benchmark_overridden
+    def set_conf_auto(self, conf: float, inference_settings: dict):
+        conf_names = ["conf", "confidence", "confidence_threshold", "confidence_thresh"]
+        for name in conf_names:
+            if name in inference_settings:
+                inference_settings[name] = conf
+        return inference_settings
     # pylint: enable=method-hidden
     def _get_inference_settings(self, state: dict):
-        settings = state.get("settings", {})
+        settings = state.get("settings")
         if settings is None:
             settings = {}
         if "rectangle" in state.keys():
             settings["rectangle"] = state["rectangle"]
+        conf = settings.get("conf", None)
+        if conf is not None:
+            settings = self.set_conf_auto(conf, settings)
         settings["sliding_window_mode"] = self.sliding_window_mode
         for key, value in self.custom_inference_settings_dict.items():
             if key not in settings:
                 logger.debug(
@@ -1335,13 +1423,19 @@ class Inference:
                 settings[key] = value
         return settings
+    def _get_batch_size_from_state(self, state: dict):
+        batch_size = state.get("batch_size", None)
+        if batch_size is None:
+            batch_size = self.get_batch_size()
+        return batch_size
     @property
     def app(self) -> Application:
         return self._app
     def visualize(
         self,
-        predictions: List[Prediction],
+        predictions: List[PredictionDTO],
         image_path: str,
         vis_path: str,
         thickness: Optional[int] = None,
@@ -1358,194 +1452,79 @@ class Inference:
     def _format_output(
         self,
-        anns: List[Annotation],
-        slides_data: List[dict] = None,
+        predictions: List[Prediction],
     ) -> List[dict]:
-        if not slides_data:
-            slides_data = [{} for _ in range(len(anns))]
-        assert len(anns) == len(slides_data)
-        return [{"annotation": ann.to_json(), "data": data} for ann, data in zip(anns, slides_data)]
-    def _inference_image(self, state: dict, file: UploadFile):
-        logger.debug("Inferring image...", extra={"state": state})
-        settings = self._get_inference_settings(state)
-        image_np = sly_image.read_bytes(file.file.read())
-        logger.debug("Inference settings:", extra=settings)
-        logger.debug("Image info:", extra={"w": image_np.shape[1], "h": image_np.shape[0]})
-        anns, slides_data = self._inference_auto(
-            [image_np],
-            settings=settings,
-        )
-        results = self._format_output(anns, slides_data)
-        return results[0]
+        output = [
+            {
+                **pred.to_json(),
+                "data": pred.extra_data.get("slides_data", {}),
+            }
+            for pred in predictions
+        ]
+        return output
-    def _inference_batch(self, state: dict, files: List[UploadFile]):
+    def _inference_images(
+        self,
+        images: Iterable[Union[np.ndarray, str]],
+        state: dict,
+        inference_request: InferenceRequest,
+    ):
         logger.debug("Inferring batch...", extra={"state": state})
         settings = self._get_inference_settings(state)
-        images = [sly_image.read_bytes(file.file.read()) for file in files]
-        anns, slides_data = self._inference_auto(
-            images,
-            settings=settings,
-        )
-        return self._format_output(anns, slides_data)
-    def _inference_batch_ids(self, api: Api, state: dict):
-        logger.debug("Inferring batch_ids...", extra={"state": state})
-        settings = self._get_inference_settings(state)
-        ids = state["batch_ids"]
-        infos = api.image.get_info_by_id_batch(ids)
-        datasets = defaultdict(list)
-        for info in infos:
-            datasets[info.dataset_id].append(info.id)
-        results = []
-        for dataset_id, ids in datasets.items():
-            images_np = api.image.download_nps(dataset_id, ids)
+        logger.debug("Inference settings:", extra={"inference_settings": settings})
+        batch_size = self._get_batch_size_from_state(state)
+        inference_request.set_stage(InferenceRequest.Stage.INFERENCE, 0, len(images))
+        for batch in batched_iter(images, batch_size=batch_size):
+            batch = [
+                self.cache.get_image_path(image) if isinstance(image, str) else image
+                for image in batch
+            ]
             anns, slides_data = self._inference_auto(
-                source=images_np,
+                batch,
                 settings=settings,
             )
-            anns = self._exclude_duplicated_predictions(api, anns, settings, dataset_id, ids)
-            results.extend(self._format_output(anns, slides_data))
-        return results
-    def _inference_image_id(self, api: Api, state: dict, async_inference_request_uuid: str = None):
-        logger.debug("Inferring image_id...", extra={"state": state})
-        settings = self._get_inference_settings(state)
-        upload = state.get("upload", False)
-        image_id = state["image_id"]
-        image_info = api.image.get_info_by_id(image_id)
-        image_np = api.image.download_np(image_id)
-        logger.debug("Inference settings:", extra=settings)
-        logger.debug(
-            "Image info:",
-            extra={"id": image_id, "w": image_info.width, "h": image_info.height},
-        )
-        inference_request = {}
-        if async_inference_request_uuid is not None:
-            try:
-                inference_request = self._inference_requests[async_inference_request_uuid]
-            except Exception as ex:
-                import traceback
-                logger.error(traceback.format_exc())
-                raise RuntimeError(
-                    f"async_inference_request_uuid {async_inference_request_uuid} was given, "
-                    f"but there is no such uuid in 'self._inference_requests' ({len(self._inference_requests)} items)"
-                )
-        anns, slides_data = self._inference_auto(
-            [image_np],
-            settings=settings,
-        )
-        ann = anns[0]
-        if upload:
-            ds_info = api.dataset.get_info_by_id(image_info.dataset_id, raise_error=True)
-            output_project_id = ds_info.project_id
-            output_project_meta = self.cache.get_project_meta(api, output_project_id)
-            logger.debug("Merging project meta...")
-            output_project_meta, ann, meta_changed = update_meta_and_ann(output_project_meta, ann)
-            if meta_changed:
-                output_project_meta = api.project.update_meta(
-                    output_project_id, output_project_meta
-                )
-                self.cache.set_project_meta(output_project_id, output_project_meta)
-            ann = self._exclude_duplicated_predictions(
-                api, anns, settings, ds_info.id, [image_id], output_project_meta
-            )[0]
-            logger.debug(
-                "Uploading annotation...",
-                extra={
-                    "image_id": image_id,
-                    "dataset_id": ds_info.id,
-                    "project_id": output_project_id,
-                },
-            )
-            api.annotation.upload_ann(image_id, ann)
-        else:
-            ann = self._exclude_duplicated_predictions(
-                api, anns, settings, image_info.dataset_id, [image_id]
-            )[0]
-        result = self._format_output(anns, slides_data)[0]
-        if async_inference_request_uuid is not None and ann is not None:
-            inference_request["result"] = result
-        return result
-    def _inference_image_url(self, api: Api, state: dict):
-        logger.debug("Inferring image_url...", extra={"state": state})
-        settings = self._get_inference_settings(state)
-        image_url = state["image_url"]
-        ext = sly_fs.get_file_ext(image_url)
-        if ext == "":
-            ext = ".jpg"
-        image_path = os.path.join(get_data_dir(), rand_str(15) + ext)
-        sly_fs.download(image_url, image_path)
-        logger.debug("Inference settings:", extra=settings)
-        logger.debug(f"Downloaded path: {image_path}")
-        anns, slides_data = self._inference_auto(
-            [image_path],
-            settings=settings,
-        )
-        sly_fs.silent_remove(image_path)
-        return self._format_output(anns, slides_data)[0]
-    def _inference_video_id(self, api: Api, state: dict, async_inference_request_uuid: str = None):
-        from supervisely.nn.inference.video_inference import InferenceVideoInterface
-        logger.debug("Inferring video_id...", extra={"state": state})
-        video_info = api.video.get_info_by_id(state["videoId"])
-        n_frames = state.get("framesCount", video_info.frames_count)
+            predictions = [Prediction(ann, model_meta=self.model_meta) for ann in anns]
+            for pred, this_slides_data in zip(predictions, slides_data):
+                pred.extra_data["slides_data"] = this_slides_data
+            batch_results = self._format_output(predictions)
+            inference_request.add_results(batch_results)
+            inference_request.done(len(batch_results))
+    def _inference_video(
+        self,
+        path: str,
+        state: Dict,
+        inference_request: InferenceRequest,
+    ):
+        logger.debug("Inferring video...", extra={"path": path, "state": state})
+        inference_settings = self._get_inference_settings(state)
+        logger.debug(f"Inference settings:", extra=inference_settings)
+        batch_size = self._get_batch_size_from_state(state)
         start_frame_index = state.get("startFrameIndex", 0)
-        direction = state.get("direction", "forward")
-        logger.debug(
-            f"Video info:",
-            extra=dict(
-                w=video_info.frame_width,
-                h=video_info.frame_height,
-                start_frame_index=start_frame_index,
-                n_frames=n_frames,
-            ),
-        )
+        step = state.get("stride", None)
+        if step is None:
+            step = state.get("step", None)
+        if step is None:
+            step = 1
+        end_frame_index = state.get("endFrameIndex", None)
+        duration = state.get("duration", None)
+        frames_count = state.get("framesCount", None)
         tracking = state.get("tracker", None)
+        direction = state.get("direction", "forward")
+        direction = 1 if direction == "forward" else -1
-        preparing_progress = {"current": 0, "total": 1}
-        if async_inference_request_uuid is not None:
-            try:
-                inference_request = self._inference_requests[async_inference_request_uuid]
-            except Exception as ex:
-                import traceback
-                logger.error(traceback.format_exc())
-                raise RuntimeError(
-                    f"async_inference_request_uuid {async_inference_request_uuid} was given, "
-                    f"but there is no such uuid in 'self._inference_requests' ({len(self._inference_requests)} items)"
-                )
-            sly_progress: Progress = inference_request["progress"]
-            sly_progress.total = n_frames
-            inference_request["preparing_progress"]["total"] = n_frames
-            preparing_progress = inference_request["preparing_progress"]
-        # progress
-        preparing_progress["status"] = "download_video"
-        preparing_progress["current"] = 0
-        preparing_progress["total"] = int(video_info.file_meta["size"])
-        def _progress_cb(chunk_size):
-            preparing_progress["current"] += chunk_size
-        self.cache.download_video(api, video_info.id, return_images=False, progress_cb=_progress_cb)
-        preparing_progress["status"] = "inference"
-        settings = self._get_inference_settings(state)
-        logger.debug(f"Inference settings:", extra=settings)
-        logger.debug(f"Total frames to infer: {n_frames}")
+        frames_reader = VideoFrameReader(path)
+        video_height, video_witdth = frames_reader.frame_size()
+        if frames_count is not None:
+            n_frames = frames_count
+        elif end_frame_index is not None:
+            n_frames = end_frame_index - start_frame_index
+        elif duration is not None:
+            fps = frames_reader.fps()
+            n_frames = int(duration * fps)
+        else:
+            n_frames = frames_reader.frames_count()
         if tracking == "bot":
             from supervisely.nn.tracker import BoTTracker
@@ -1557,444 +1536,374 @@ class Inference:
             tracker = DeepSortTracker(state)
         else:
             if tracking is not None:
-                logger.warn(f"Unknown tracking type: {tracking}. Tracking is disabled.")
+                logger.warning(f"Unknown tracking type: {tracking}. Tracking is disabled.")
             tracker = None
+        progress_total = (n_frames + step - 1) // step
+        inference_request.set_stage(InferenceRequest.Stage.INFERENCE, 0, progress_total)
         results = []
-        batch_size = state.get("batch_size", None)
-        if batch_size is None:
-            batch_size = self.get_batch_size()
         tracks_data = {}
-        direction = 1 if direction == "forward" else -1
         for batch in batched(
-            range(start_frame_index, start_frame_index + direction * n_frames, direction),
+            range(start_frame_index, start_frame_index + direction * n_frames, direction * step),
             batch_size,
         ):
-            if (
-                async_inference_request_uuid is not None
-                and inference_request["cancel_inference"] is True
-            ):
+            if inference_request.is_stopped():
                 logger.debug(
-                    f"Cancelling inference video...",
-                    extra={"inference_request_uuid": async_inference_request_uuid},
+                    f"Cancelling inference...",
+                    extra={"inference_request_uuid": inference_request.uuid},
                 )
                 results = []
                 break
             logger.debug(
                 f"Inferring frames {batch[0]}-{batch[-1]}:",
             )
-            frames = self.cache.download_frames(api, video_info.id, batch, redownload_video=True)
+            frames = frames_reader.read_frames(batch)
             anns, slides_data = self._inference_auto(
                 source=frames,
-                settings=settings,
+                settings=inference_settings,
             )
+            predictions = [
+                Prediction(ann, model_meta=self.model_meta, frame_index=frame_index)
+                for ann, frame_index in zip(anns, batch)
+            ]
+            for pred, this_slides_data in zip(predictions, slides_data):
+                pred.extra_data["slides_data"] = this_slides_data
+            batch_results = self._format_output(predictions)
             if tracker is not None:
                 for frame_index, frame, ann in zip(batch, frames, anns):
                     tracks_data = tracker.update(frame, ann, frame_index, tracks_data)
-            batch_results = self._format_output(anns, slides_data)
-            results.extend(batch_results)
-            if async_inference_request_uuid is not None:
-                sly_progress.iters_done(len(batch))
-                inference_request["pending_results"].extend(batch_results)
+            inference_request.add_results(batch_results)
+            inference_request.done(len(batch_results))
             logger.debug(f"Frames {batch[0]}-{batch[-1]} done.")
         video_ann_json = None
         if tracker is not None:
+            inference_request.set_stage("Postprocess...", 0, 1)
             video_ann_json = tracker.get_annotation(
-                tracks_data, (video_info.frame_height, video_info.frame_width), n_frames
+                tracks_data, (video_height, video_witdth), n_frames
             ).to_json()
+            inference_request.done()
         result = {"ann": results, "video_ann": video_ann_json}
-        if async_inference_request_uuid is not None and len(results) > 0:
-            inference_request["result"] = result.copy()
-        return result
+        inference_request.final_result = result.copy()
-    def _inference_images_ids(
+    def _inference_image_ids(
         self,
         api: Api,
         state: dict,
-        images_ids: List[int],
-        async_inference_request_uuid: str = None,
+        inference_request: InferenceRequest,
     ):
         """Inference images by ids.
         If "output_project_id" in state, upload images and annotations to the output project.
         If "output_project_id" equal to source project id, upload annotations to the source project.
         If "output_project_id" is None, write annotations to inference request object.
         """
-        logger.debug("Inferring images...", extra={"state": state})
-        batch_size = state.get("batch_size", None)
-        if batch_size is None:
-            batch_size = self.get_batch_size()
-        output_project_id = state.get("output_project_id", None)
-        images_infos = api.image.get_info_by_id_batch(images_ids)
+        logger.debug("Inferring batch_ids", extra={"state": state})
+        inference_settings = self._get_inference_settings(state)
+        logger.debug("Inference settings:", extra={"inference_settings": inference_settings})
+        batch_size = self._get_batch_size_from_state(state)
+        image_ids = get_value_for_keys(
+            state, ["batch_ids", "image_ids", "images_ids", "imageIds", "image_id", "imageId"]
+        )
+        if image_ids is None:
+            raise ValueError("Image ids are not provided")
+        if not isinstance(image_ids, list):
+            image_ids = [image_ids]
+        upload_mode = state.get("upload_mode", None)
+        iou_merge_threshold = inference_settings.get("existing_objects_iou_thresh", None)
+        if upload_mode == "iou_merge" and iou_merge_threshold is None:
+            iou_merge_threshold = 0.7
+        images_infos = api.image.get_info_by_id_batch(image_ids)
         images_infos_dict = {im_info.id: im_info for im_info in images_infos}
+        inference_request.context.setdefault("image_info", {}).update(images_infos_dict)
         dataset_infos_dict = {
             ds_id: api.dataset.get_info_by_id(ds_id)
             for ds_id in set([im_info.dataset_id for im_info in images_infos])
         }
+        inference_request.context.setdefault("dataset_info", {}).update(dataset_infos_dict)
-        if async_inference_request_uuid is not None:
-            try:
-                inference_request = self._inference_requests[async_inference_request_uuid]
-            except Exception as ex:
-                import traceback
-                logger.error(traceback.format_exc())
-                raise RuntimeError(
-                    f"async_inference_request_uuid {async_inference_request_uuid} was given, "
-                    f"but there is no such uuid in 'self._inference_requests' ({len(self._inference_requests)} items)"
-                )
-            sly_progress: Progress = inference_request["progress"]
-            sly_progress.total = len(images_ids)
-        def _download_images(images_ids):
-            with ThreadPoolExecutor(max(8, min(batch_size, 64))) as executor:
-                for image_id in images_ids:
-                    executor.submit(
-                        self.cache.download_image,
-                        api,
-                        image_id,
-                    )
-        # start downloading in parallel
-        threading.Thread(target=_download_images, args=[images_ids], daemon=True).start()
-        output_project_metas_dict = {}
-        def _upload_results_to_source(results: List[Dict]):
-            nonlocal output_project_metas_dict
-            for result in results:
-                image_id = result["image_id"]
-                image_info: ImageInfo = images_infos_dict[image_id]
-                dataset_info: DatasetInfo = dataset_infos_dict[image_info.dataset_id]
-                project_id = dataset_info.project_id
-                ann = Annotation.from_json(result["annotation"], self.model_meta)
-                output_project_meta = output_project_metas_dict.get(project_id, None)
-                if output_project_meta is None:
-                    output_project_meta = ProjectMeta.from_json(
-                        api.project.get_meta(output_project_id)
-                    )
-                output_project_meta, ann, meta_changed = update_meta_and_ann(
-                    output_project_meta, ann
-                )
-                output_project_metas_dict[project_id] = output_project_meta
-                if meta_changed:
-                    output_project_meta = api.project.update_meta(project_id, output_project_meta)
-                ann = update_classes(api, ann, output_project_meta, project_id)
-                api.annotation.append_labels(image_id, ann.labels)
-                if async_inference_request_uuid is not None:
-                    sly_progress.iters_done(1)
-                    inference_request["pending_results"].append(
-                        {
-                            "annotation": None,  # to less response size
-                            "data": None,  # to less response size
-                            "image_id": image_id,
-                            "image_name": result["image_name"],
-                            "dataset_id": result["dataset_id"],
-                        }
-                    )
-        def _add_results_to_request(results: List[Dict]):
-            if async_inference_request_uuid is None:
-                return
-            inference_request["pending_results"].extend(results)
-            sly_progress.iters_done(len(results))
-        new_dataset_id = {}
-        def _get_or_create_new_dataset(output_project_id, src_dataset_id):
-            """Copy dataset in output project if not exists and return its id"""
-            if src_dataset_id in new_dataset_id:
-                return new_dataset_id[src_dataset_id]
-            dataset_info = api.dataset.get_info_by_id(src_dataset_id)
-            def _create_parent_recursively(output_project_id, src_parent_id):
-                """Create parent datasets recursively and return the ID of the top-level parent"""
-                if src_parent_id in new_dataset_id:
-                    return new_dataset_id[src_parent_id]
-                src_parent_info = dataset_infos_dict.get(src_parent_id)
-                if src_parent_info is None:
-                    src_parent_info = api.dataset.get_info_by_id(src_parent_id)
-                if src_parent_info.parent_id is not None:
-                    parent_id = _create_parent_recursively(
-                        output_project_id, src_parent_info.parent_id
-                    )
-                else:
-                    parent_id = None
-                dst_parent = api.dataset.create(
-                    output_project_id,
-                    src_parent_info.name,
-                    change_name_if_conflict=True,
-                    parent_id=parent_id,
-                )
-                new_dataset_id[src_parent_info.id] = dst_parent.id
-                return dst_parent.id
-            parent_id = None
-            if dataset_info.parent_id is not None:
-                parent_id = _create_parent_recursively(output_project_id, dataset_info.parent_id)
-            output_dataset_id = api.dataset.create(
-                output_project_id,
-                dataset_info.name,
+        output_project_id = state.get("output_project_id", None)
+        output_dataset_id = None
+        inference_request.context.setdefault("project_meta", {})
+        if output_project_id is not None:
+            if upload_mode is None:
+                upload_mode = "append"
+        if output_project_id is None and upload_mode == "create":
+            image_info = images_infos[0]
+            dataset_info = dataset_infos_dict[image_info.dataset_id]
+            output_project_info = api.project.create(
+                dataset_info.workspace_id,
+                name=f"Predictions from task #{self.task_id}",
+                description=f"Auto created project from inference request {inference_request.uuid}",
                 change_name_if_conflict=True,
-                parent_id=parent_id,
-            ).id
-            new_dataset_id[src_dataset_id] = output_dataset_id
-            return output_dataset_id
-        def _copy_images_to_dst(
-            src_dataset_id, dst_dataset_id, image_infos, dst_names
-        ) -> List[ImageInfo]:
-            return api.image.copy_batch_optimized(
-                src_dataset_id,
-                image_infos,
-                dst_dataset_id,
-                dst_names=dst_names,
-                with_annotations=False,
-                skip_validation=True,
             )
-        def _upload_results_to_other(results: List[Dict]):
-            nonlocal output_project_metas_dict
-            if len(results) == 0:
-                return
-            src_dataset_id = results[0]["dataset_id"]
-            dataset_id = _get_or_create_new_dataset(output_project_id, src_dataset_id)
-            src_image_infos = [images_infos_dict[result["image_id"]] for result in results]
-            image_names = [result["image_name"] for result in results]
-            image_infos = _copy_images_to_dst(
-                src_dataset_id, dataset_id, src_image_infos, image_names
+            output_project_id = output_project_info.id
+            inference_request.context.setdefault("project_info", {})[
+                output_project_id
+            ] = output_project_info
+            output_dataset_info = api.dataset.create(
+                output_project_id,
+                "Predictions",
+                description=f"Auto created dataset from inference request {inference_request.uuid}",
+                change_name_if_conflict=True,
             )
-            image_infos.sort(key=lambda x: image_names.index(x.name))
-            api.logger.debug(
-                "Uploading results to other project...",
-                extra={
-                    "src_dataset_id": src_dataset_id,
-                    "dst_project_id": output_project_id,
-                    "dst_dataset_id": dataset_id,
-                    "items_count": len(image_infos),
-                },
+            output_dataset_id = output_dataset_info.id
+            inference_request.context.setdefault("dataset_info", {})[
+                output_dataset_id
+            ] = output_dataset_info
+        # start download to cache in background
+        dataset_image_infos: Dict[int, List[ImageInfo]] = defaultdict(list)
+        for image_info in images_infos:
+            dataset_image_infos[image_info.dataset_id].append(image_info)
+        for dataset_id, ds_image_infos in dataset_image_infos.items():
+            self.cache.run_cache_task_manually(
+                api, [info.id for info in ds_image_infos], dataset_id=dataset_id
             )
-            meta_changed = False
-            anns = []
-            for result in results:
-                ann = Annotation.from_json(result["annotation"], self.model_meta)
-                output_project_meta = output_project_metas_dict.get(output_project_id, None)
-                if output_project_meta is None:
-                    output_project_meta = ProjectMeta.from_json(
-                        api.project.get_meta(output_project_id)
-                    )
-                output_project_meta, ann, c = update_meta_and_ann(output_project_meta, ann)
-                output_project_metas_dict[output_project_id] = output_project_meta
-                meta_changed = meta_changed or c
-                anns.append(ann)
-            if meta_changed:
-                api.project.update_meta(output_project_id, output_project_meta)
-            # upload in batches to update progress with each batch
-            # api.annotation.upload_anns() uploads in same batches anyways
-            for batch in batched(list(zip(anns, results, image_infos))):
-                batch_anns, batch_results, batch_image_infos = zip(*batch)
-                api.annotation.upload_anns(
-                    img_ids=[info.id for info in batch_image_infos],
-                    anns=batch_anns,
-                )
-                if async_inference_request_uuid is not None:
-                    sly_progress.iters_done(len(batch_results))
-                    inference_request["pending_results"].extend(
-                        [{**result, "annotation": None, "data": None} for result in batch_results]
-                    )
-        def upload_results_to_source_or_other(results: List[Dict]):
-            if len(results) == 0:
-                return
-            dataset_id = results[0]["dataset_id"]
-            dataset_info: DatasetInfo = dataset_infos_dict[dataset_id]
-            project_id = dataset_info.project_id
-            if project_id == output_project_id:
-                _upload_results_to_source(results)
-            else:
-                _upload_results_to_other(results)
-        if output_project_id is None:
-            upload_f = _add_results_to_request
-        else:
-            upload_f = upload_results_to_source_or_other
-        def _upload_loop(q: Queue, stop_event: threading.Event, api: Api, upload_f: Callable):
-            try:
-                while True:
-                    items = []
-                    while not q.empty():
-                        items.append(q.get_nowait())
-                    if len(items) > 0:
-                        ds_batches = {}
-                        for batch in items:
-                            if len(batch) == 0:
-                                continue
-                            for each in batch:
-                                ds_batches.setdefault(each["dataset_id"], []).append(each)
-                        for _, joined_batch in ds_batches.items():
-                            upload_f(joined_batch)
-                        continue
-                    if stop_event.is_set():
-                        self._on_inference_end(None, async_inference_request_uuid)
-                        return
-                    time.sleep(1)
-            except Exception as e:
-                api.logger.error("Error in upload loop: %s", str(e), exc_info=True)
-                raise
-        upload_queue = Queue()
-        stop_upload_event = threading.Event()
-        upload_thread = threading.Thread(
-            target=_upload_loop,
-            args=[upload_queue, stop_upload_event, api, upload_f],
-            daemon=True,
+        _upload_predictions = partial(
+            self.upload_predictions,
+            api=api,
+            upload_mode=upload_mode,
+            context=inference_request.context,
+            dst_dataset_id=output_dataset_id,
+            dst_project_id=output_project_id,
+            progress_cb=inference_request.done,
+            iou_merge_threshold=iou_merge_threshold,
+            inference_request=inference_request,
         )
-        upload_thread.start()
-        settings = self._get_inference_settings(state)
-        logger.debug(f"Inference settings:", extra=settings)
+        _add_results_to_request = partial(
+            self.add_results_to_request, inference_request=inference_request
+        )
-        results = []
-        stop = False
-        try:
-            for image_ids_batch in batched(images_ids, batch_size=batch_size):
-                if stop:
-                    break
-                if (
-                    async_inference_request_uuid is not None
-                    and inference_request["cancel_inference"] is True
-                ):
+        if upload_mode is None:
+            upload_f = _add_results_to_request
+        else:
+            upload_f = _upload_predictions
+        inference_request.set_stage(InferenceRequest.Stage.INFERENCE, 0, len(image_ids))
+        with Uploader(upload_f, logger=logger) as uploader:
+            for image_ids_batch in batched(image_ids, batch_size=batch_size):
+                if uploader.has_exception():
+                    exception = uploader.exception()
+                    raise RuntimeError(f"Error in upload loop: {exception}") from exception
+                if inference_request.is_stopped():
                     logger.debug(
                         f"Cancelling inference project...",
-                        extra={"inference_request_uuid": async_inference_request_uuid},
+                        extra={"inference_request_uuid": inference_request.uuid},
                     )
-                    results = []
-                    stop = True
                     break
                 images_nps = [self.cache.download_image(api, img_id) for img_id in image_ids_batch]
                 anns, slides_data = self._inference_auto(
                     source=images_nps,
-                    settings=settings,
+                    settings=inference_settings,
                 )
-                batch_results = []
-                for i, ann in enumerate(anns):
-                    image_info: ImageInfo = images_infos_dict[image_ids_batch[i]]
-                    ds_info = dataset_infos_dict[image_info.dataset_id]
-                    meta = output_project_metas_dict.get(ds_info.project_id, None)
-                    iou = settings.get("existing_objects_iou_thresh")
-                    if meta is None and isinstance(iou, float) and iou > 0:
-                        meta = ProjectMeta.from_json(api.project.get_meta(ds_info.project_id))
-                        output_project_metas_dict[ds_info.project_id] = meta
-                    ann = self._exclude_duplicated_predictions(
-                        api, [ann], settings, ds_info.id, [image_info.id], meta
-                    )[0]
-                    batch_results.append(
-                        {
-                            "annotation": ann.to_json(),
-                            "data": slides_data[i],
-                            "image_id": image_info.id,
-                            "image_name": image_info.name,
-                            "dataset_id": image_info.dataset_id,
-                        }
+                batch_predictions = []
+                for image_id, ann, this_slides_data in zip(image_ids_batch, anns, slides_data):
+                    image_info: ImageInfo = images_infos_dict[image_id]
+                    dataset_info = dataset_infos_dict[image_info.dataset_id]
+                    prediction = Prediction(
+                        ann,
+                        model_meta=self.model_meta,
+                        name=image_info.name,
+                        image_id=image_info.id,
+                        dataset_id=image_info.dataset_id,
+                        project_id=dataset_info.project_id,
                     )
-                results.extend(batch_results)
-                upload_queue.put(batch_results)
-        except Exception:
-            stop_upload_event.set()
-            upload_thread.join()
-            raise
-        if async_inference_request_uuid is not None and len(results) > 0:
-            inference_request["result"] = {"ann": results}
-        stop_upload_event.set()
-        upload_thread.join()
-        return results
+                    prediction.extra_data["slides_data"] = this_slides_data
+                    batch_predictions.append(prediction)
-    def _inference_project_id(
+                uploader.put(batch_predictions)
+    def _inference_video_id(
         self,
         api: Api,
         state: dict,
-        project_info: ProjectInfo = None,
-        async_inference_request_uuid: str = None,
+        inference_request: InferenceRequest,
     ):
+        logger.debug("Inferring video_id...", extra={"state": state})
+        inference_settings = self._get_inference_settings(state)
+        logger.debug(f"Inference settings:", extra=inference_settings)
+        batch_size = self._get_batch_size_from_state(state)
+        video_id = state["videoId"]
+        video_id = get_value_for_keys(state, ["videoId", "video_id"], ignore_none=True)
+        if video_id is None:
+            raise ValueError("Video id is not provided")
+        video_info = api.video.get_info_by_id(video_id)
+        start_frame_index = get_value_for_keys(
+            state, ["startFrameIndex", "start_frame_index", "start_frame"], ignore_none=True
+        )
+        if start_frame_index is None:
+            start_frame_index = 0
+        step = get_value_for_keys(state, ["stride", "step"], ignore_none=True)
+        if step is None:
+            step = 1
+        end_frame_index = get_value_for_keys(
+            state, ["endFrameIndex", "end_frame_index", "end_frame"], ignore_none=True
+        )
+        duration = state.get("duration", None)
+        frames_count = get_value_for_keys(
+            state, ["framesCount", "frames_count", "num_frames"], ignore_none=True
+        )
+        tracking = state.get("tracker", None)
+        direction = state.get("direction", "forward")
+        direction = 1 if direction == "forward" else -1
+        if frames_count is not None:
+            n_frames = frames_count
+        elif end_frame_index is not None:
+            n_frames = end_frame_index - start_frame_index
+        elif duration is not None:
+            fps = video_info.frames_count / video_info.duration
+            n_frames = int(duration * fps)
+        else:
+            n_frames = video_info.frames_count
+        if tracking == "bot":
+            from supervisely.nn.tracker import BoTTracker
+            tracker = BoTTracker(state)
+        elif tracking == "deepsort":
+            from supervisely.nn.tracker import DeepSortTracker
+            tracker = DeepSortTracker(state)
+        else:
+            if tracking is not None:
+                logger.warning(f"Unknown tracking type: {tracking}. Tracking is disabled.")
+            tracker = None
+        logger.debug(
+            f"Video info:",
+            extra=dict(
+                w=video_info.frame_width,
+                h=video_info.frame_height,
+                start_frame_index=start_frame_index,
+                n_frames=n_frames,
+            ),
+        )
+        # start downloading video in background
+        self.cache.run_cache_task_manually(api, None, video_id=video_id)
+        progress_total = (n_frames + step - 1) // step
+        inference_request.set_stage(InferenceRequest.Stage.INFERENCE, 0, progress_total)
+        tracks_data = {}
+        for batch in batched(
+            range(start_frame_index, start_frame_index + direction * n_frames, direction * step),
+            batch_size,
+        ):
+            if inference_request.is_stopped():
+                logger.debug(
+                    f"Cancelling inference video...",
+                    extra={"inference_request_uuid": inference_request.uuid},
+                )
+                break
+            logger.debug(
+                f"Inferring frames {batch[0]}-{batch[-1]}:",
+            )
+            frames = self.cache.download_frames(api, video_info.id, batch, redownload_video=True)
+            anns, slides_data = self._inference_auto(
+                source=frames,
+                settings=inference_settings,
+            )
+            predictions = [
+                Prediction(
+                    ann,
+                    model_meta=self.model_meta,
+                    frame_index=frame_index,
+                    video_id=video_info.id,
+                    dataset_id=video_info.dataset_id,
+                    project_id=video_info.project_id,
+                )
+                for ann, frame_index in zip(anns, batch)
+            ]
+            for pred, this_slides_data in zip(predictions, slides_data):
+                pred.extra_data["slides_data"] = this_slides_data
+            batch_results = self._format_output(predictions)
+            if tracker is not None:
+                for frame_index, frame, ann in zip(batch, frames, anns):
+                    tracks_data = tracker.update(frame, ann, frame_index, tracks_data)
+            inference_request.add_results(batch_results)
+            inference_request.done(len(batch_results))
+            logger.debug(f"Frames {batch[0]}-{batch[-1]} done.")
+        video_ann_json = None
+        if tracker is not None:
+            inference_request.set_stage("Postprocess...", 0, 1)
+            video_ann_json = tracker.get_annotation(
+                tracks_data, (video_info.frame_height, video_info.frame_width), n_frames
+            ).to_json()
+            inference_request.done()
+        inference_request.final_result = {"video_ann": video_ann_json}
+    def _inference_project_id(self, api: Api, state: dict, inference_request: InferenceRequest):
         """Inference project images.
         If "output_project_id" in state, upload images and annotations to the output project.
         If "output_project_id" equal to source project id, upload annotations to the source project.
         If "output_project_id" is None, write annotations to inference request object.
         """
         logger.debug("Inferring project...", extra={"state": state})
-        if project_info is None:
-            project_info = api.project.get_info_by_id(state["projectId"])
-        dataset_ids = state.get("dataset_ids", None)
+        inference_settings = self._get_inference_settings(state)
+        logger.debug("Inference settings:", extra={"inference_settings": inference_settings})
+        batch_size = self._get_batch_size_from_state(state)
+        project_id = get_value_for_keys(state, keys=["projectId", "project_id"])
+        if project_id is None:
+            raise ValueError("Project id is not provided")
+        project_info = api.project.get_info_by_id(project_id)
+        if project_info.type != str(ProjectType.IMAGES):
+            raise ValueError("Only images projects are supported.")
+        upload_mode = state.get("upload_mode", None)
+        iou_merge_threshold = inference_settings.get("existing_objects_iou_thresh", None)
+        if upload_mode == "iou_merge" and iou_merge_threshold is None:
+            iou_merge_threshold = 0.7
         cache_project_on_model = state.get("cache_project_on_model", False)
-        batch_size = state.get("batch_size", None)
-        if batch_size is None:
-            batch_size = self.get_batch_size()
+        project_info = api.project.get_info_by_id(project_id)
+        inference_request.context.setdefault("project_info", {})[project_id] = project_info
+        dataset_ids = state.get("dataset_ids", None)
+        if dataset_ids is None:
+            dataset_ids = state.get("datasetIds", None)
         datasets_infos = api.dataset.get_list(project_info.id, recursive=True)
+        inference_request.context.setdefault("dataset_info", {}).update(
+            {ds_info.id: ds_info for ds_info in datasets_infos}
+        )
         if dataset_ids is not None:
             datasets_infos = [ds_info for ds_info in datasets_infos if ds_info.id in dataset_ids]
-        # progress
-        preparing_progress = {"current": 0, "total": 1}
-        preparing_progress["status"] = "download_info"
-        preparing_progress["current"] = 0
-        preparing_progress["total"] = len(datasets_infos)
-        progress_cb = None
-        if async_inference_request_uuid is not None:
-            try:
-                inference_request = self._inference_requests[async_inference_request_uuid]
-            except Exception as ex:
-                import traceback
-                logger.error(traceback.format_exc())
-                raise RuntimeError(
-                    f"async_inference_request_uuid {async_inference_request_uuid} was given, "
-                    f"but there is no such uuid in 'self._inference_requests' ({len(self._inference_requests)} items)"
-                )
-            sly_progress: Progress = inference_request["progress"]
-            sly_progress.total = sum([ds_info.items_count for ds_info in datasets_infos])
-            inference_request["preparing_progress"]["total"] = len(datasets_infos)
-            preparing_progress = inference_request["preparing_progress"]
-            if cache_project_on_model:
-                progress_cb = sly_progress.iters_done
-                preparing_progress["total"] = sly_progress.total
-                preparing_progress["status"] = "download_project"
+        preparing_progress_total = sum([ds_info.items_count for ds_info in datasets_infos])
+        inference_progress_total = preparing_progress_total
+        inference_request.set_stage(InferenceRequest.Stage.PREPARING, 0, preparing_progress_total)
         output_project_id = state.get("output_project_id", None)
-        output_project_meta = None
+        inference_request.context.setdefault("project_meta", {})
         if output_project_id is not None:
-            logger.debug("Merging project meta...")
-            output_project_meta = ProjectMeta.from_json(api.project.get_meta(output_project_id))
-            changed = False
-            for obj_class in self.model_meta.obj_classes:
-                if output_project_meta.obj_classes.get(obj_class.name, None) is None:
-                    output_project_meta = output_project_meta.add_obj_class(obj_class)
-                    changed = True
-            for tag_meta in self.model_meta.tag_metas:
-                if output_project_meta.tag_metas.get(tag_meta.name, None) is None:
-                    output_project_meta = output_project_meta.add_tag_meta(tag_meta)
-                    changed = True
-            if changed:
-                output_project_meta = api.project.update_meta(
-                    output_project_id, output_project_meta
-                )
+            if upload_mode is None:
+                upload_mode = "append"
+        if output_project_id is None and upload_mode == "create":
+            output_project_info = api.project.create(
+                project_info.workspace_id,
+                name=f"Predictions from task #{self.task_id}",
+                description=f"Auto created project from inference request {inference_request.uuid}",
+                change_name_if_conflict=True,
+            )
+            output_project_id = output_project_info.id
+            inference_request.context.setdefault("project_info", {})[
+                output_project_id
+            ] = output_project_info
         if cache_project_on_model:
-            download_to_cache(api, project_info.id, datasets_infos, progress_cb=progress_cb)
+            download_to_cache(
+                api, project_info.id, datasets_infos, progress_cb=inference_request.done
+            )
         images_infos_dict = {}
         for dataset_info in datasets_infos:
             images_infos_dict[dataset_info.id] = api.image.get_list(dataset_info.id)
             if not cache_project_on_model:
-                preparing_progress["current"] += 1
-        preparing_progress["status"] = "inference"
-        preparing_progress["current"] = 0
+                inference_request.done(dataset_info.items_count)
         def _download_images(datasets_infos: List[DatasetInfo]):
             for dataset_info in datasets_infos:
@@ -2011,166 +1920,41 @@ class Inference:
             # start downloading in parallel
             threading.Thread(target=_download_images, args=[datasets_infos], daemon=True).start()
-        def _upload_results_to_source(results: List[Dict]):
-            nonlocal output_project_meta
-            for result in results:
-                image_id = result["image_id"]
-                ann = Annotation.from_json(result["annotation"], self.model_meta)
-                output_project_meta, ann, meta_changed = update_meta_and_ann(
-                    output_project_meta, ann
-                )
-                if meta_changed:
-                    output_project_meta = api.project.update_meta(
-                        project_info.id, output_project_meta
-                    )
-                ann = update_classes(api, ann, output_project_meta, output_project_id)
-                api.annotation.append_labels(image_id, ann.labels)
-                if async_inference_request_uuid is not None:
-                    sly_progress.iters_done(1)
-                    inference_request["pending_results"].append(
-                        {
-                            "annotation": None,  # to less response size
-                            "data": None,  # to less response size
-                            "image_id": image_id,
-                            "image_name": result["image_name"],
-                            "dataset_id": result["dataset_id"],
-                        }
-                    )
-        new_dataset_id = {}
-        def _get_or_create_new_dataset(output_project_id, src_dataset_id):
-            """Copy dataset in output project if not exists and return its id"""
-            if src_dataset_id in new_dataset_id:
-                return new_dataset_id[src_dataset_id]
-            dataset_info = api.dataset.get_info_by_id(src_dataset_id)
-            if dataset_info.parent_id is None:
-                output_dataset_id = api.dataset.copy(
-                    output_project_id,
-                    src_dataset_id,
-                    dataset_info.name,
-                    change_name_if_conflict=True,
-                ).id
-            else:
-                parent_dataset_id = _get_or_create_new_dataset(
-                    output_project_id, dataset_info.parent_id
-                )
-                output_dataset_info = api.dataset.create(
-                    output_project_id, dataset_info.name, parent_id=parent_dataset_id
-                )
-                api.image.copy_batch_optimized(
-                    dataset_info.id,
-                    images_infos_dict[dataset_info.id],
-                    output_dataset_info.id,
-                    with_annotations=False,
-                )
-                output_dataset_id = output_dataset_info.id
-            new_dataset_id[src_dataset_id] = output_dataset_id
-            return output_dataset_id
-        def _upload_results_to_other(results: List[Dict]):
-            nonlocal output_project_meta
-            if len(results) == 0:
-                return
-            src_dataset_id = results[0]["dataset_id"]
-            dataset_id = _get_or_create_new_dataset(output_project_id, src_dataset_id)
-            image_names = [result["image_name"] for result in results]
-            image_infos = api.image.get_list(
-                dataset_id,
-                filters=[{"field": "name", "operator": "in", "value": image_names}],
-            )
-            meta_changed = False
-            anns = []
-            for result in results:
-                ann = Annotation.from_json(result["annotation"], self.model_meta)
-                output_project_meta, ann, c = update_meta_and_ann(output_project_meta, ann)
-                meta_changed = meta_changed or c
-                anns.append(ann)
-            if meta_changed:
-                api.project.update_meta(output_project_id, output_project_meta)
-            # upload in batches to update progress with each batch
-            # api.annotation.upload_anns() uploads in same batches anyways
-            for batch in batched(list(zip(anns, results, image_infos))):
-                batch_anns, batch_results, batch_image_infos = zip(*batch)
-                api.annotation.upload_anns(
-                    img_ids=[info.id for info in batch_image_infos],
-                    anns=batch_anns,
-                )
-                if async_inference_request_uuid is not None:
-                    sly_progress.iters_done(len(batch_results))
-                    inference_request["pending_results"].extend(
-                        [{**result, "annotation": None, "data": None} for result in batch_results]
-                    )
-        def _add_results_to_request(results: List[Dict]):
-            if async_inference_request_uuid is None:
-                return
-            inference_request["pending_results"].extend(results)
-            sly_progress.iters_done(len(results))
+        _upload_predictions = partial(
+            self.upload_predictions,
+            api=api,
+            upload_mode=upload_mode,
+            context=inference_request.context,
+            dst_project_id=output_project_id,
+            progress_cb=inference_request.done,
+            iou_merge_threshold=iou_merge_threshold,
+            inference_request=inference_request,
+        )
-        def _upload_loop(q: Queue, stop_event: threading.Event, api: Api, upload_f: Callable):
-            try:
-                while True:
-                    items = []
-                    while not q.empty():
-                        items.append(q.get_nowait())
-                    if len(items) > 0:
-                        ds_batches = {}
-                        for batch in items:
-                            if len(batch) == 0:
-                                continue
-                            ds_batches.setdefault(batch[0].get("dataset_id"), []).extend(batch)
-                        for _, joined_batch in ds_batches.items():
-                            upload_f(joined_batch)
-                        continue
-                    if stop_event.is_set():
-                        self._on_inference_end(None, async_inference_request_uuid)
-                        return
-                    time.sleep(1)
-            except Exception as e:
-                api.logger.error("Error in upload loop: %s", str(e), exc_info=True)
-                raise
+        _add_results_to_request = partial(
+            self.add_results_to_request, inference_request=inference_request
+        )
-        if output_project_id is None:
+        if upload_mode is None:
             upload_f = _add_results_to_request
-        elif output_project_id != project_info.id:
-            upload_f = _upload_results_to_other
         else:
-            upload_f = _upload_results_to_source
-        upload_queue = Queue()
-        stop_upload_event = threading.Event()
-        upload_thread = threading.Thread(
-            target=_upload_loop,
-            args=[upload_queue, stop_upload_event, api, upload_f],
-            daemon=True,
-        )
-        upload_thread.start()
+            upload_f = _upload_predictions
-        settings = self._get_inference_settings(state)
-        logger.debug(f"Inference settings:", extra=settings)
-        results = []
-        data_to_return = {}
-        stop = False
-        try:
+        inference_request.set_stage(InferenceRequest.Stage.INFERENCE, 0, inference_progress_total)
+        with Uploader(upload_f, logger=logger) as uploader:
             for dataset_info in datasets_infos:
-                if stop:
-                    break
                 for images_infos_batch in batched(
                     images_infos_dict[dataset_info.id], batch_size=batch_size
                 ):
-                    if (
-                        async_inference_request_uuid is not None
-                        and inference_request["cancel_inference"] is True
-                    ):
+                    if inference_request.is_stopped():
                         logger.debug(
                             f"Cancelling inference project...",
-                            extra={"inference_request_uuid": async_inference_request_uuid},
+                            extra={"inference_request_uuid": inference_request.uuid},
                         )
-                        results = []
-                        stop = True
-                        break
+                        return
+                    if uploader.has_exception():
+                        exception = uploader.exception
+                        raise RuntimeError(f"Error in upload loop: {exception}") from exception
                     if cache_project_on_model:
                         images_paths, _ = zip(
                             *read_from_cached_project(
@@ -2189,52 +1973,36 @@ class Inference:
                         )
                     anns, slides_data = self._inference_auto(
                         source=images_nps,
-                        settings=settings,
+                        settings=inference_settings,
                     )
-                    iou = settings.get("existing_objects_iou_thresh")
-                    if output_project_meta is None and isinstance(iou, float) and iou > 0:
-                        output_project_meta = ProjectMeta.from_json(
-                            api.project.get_meta(project_info.id)
+                    predictions = [
+                        Prediction(
+                            ann,
+                            model_meta=self.model_meta,
+                            image_id=image_info.id,
+                            name=image_info.name,
+                            dataset_id=dataset_info.id,
+                            project_id=dataset_info.project_id,
+                            image_name=image_info.name,
                         )
-                    anns = self._exclude_duplicated_predictions(
-                        api,
-                        anns,
-                        settings,
-                        dataset_info.id,
-                        [ii.id for ii in images_infos_batch],
-                        output_project_meta,
-                    )
-                    batch_results = []
-                    for i, ann in enumerate(anns):
-                        batch_results.append(
-                            {
-                                "annotation": ann.to_json(),
-                                "data": slides_data[i],
-                                "image_id": images_infos_batch[i].id,
-                                "image_name": images_infos_batch[i].name,
-                                "dataset_id": dataset_info.id,
-                            }
-                        )
-                    results.extend(batch_results)
-                    upload_queue.put(batch_results)
-        except Exception:
-            stop_upload_event.set()
-            upload_thread.join()
-            raise
-        if async_inference_request_uuid is not None and len(results) > 0:
-            inference_request["result"] = {"ann": results}
-        stop_upload_event.set()
-        upload_thread.join()
-        return results
+                        for ann, image_info in zip(anns, images_infos_batch)
+                    ]
+                    for pred, this_slides_data in zip(predictions, slides_data):
+                        pred.extra_data["slides_data"] = this_slides_data
+                    uploader.put(predictions)
     def _run_speedtest(
         self,
         api: Api,
         state: dict,
-        async_inference_request_uuid: str = None,
+        inference_request: InferenceRequest,
     ):
         """Run speedtest on project images."""
         logger.debug("Running speedtest...", extra={"state": state})
+        settings = self._get_inference_settings(state)
+        logger.debug(f"Inference settings:", extra=settings)
         project_id = state["projectId"]
         batch_size = state["batch_size"]
         num_iterations = state["num_iterations"]
@@ -2252,49 +2020,22 @@ class Inference:
                 if dataset_id in datasets_infos_dict
             ]
-        # progress
-        preparing_progress = {"current": 0, "total": 1}
-        if async_inference_request_uuid is not None:
-            try:
-                inference_request = self._inference_requests[async_inference_request_uuid]
-            except Exception as ex:
-                import traceback
-                logger.error(traceback.format_exc())
-                raise RuntimeError(
-                    f"async_inference_request_uuid {async_inference_request_uuid} was given, "
-                    f"but there is no such uuid in 'self._inference_requests' ({len(self._inference_requests)} items)"
-                )
-            sly_progress: Progress = inference_request["progress"]
-            sly_progress.total = num_iterations
-            sly_progress.current = 0
-            preparing_progress = inference_request["preparing_progress"]
+        preparing_progress_total = len(datasets_infos)
+        if cache_project_on_model:
+            preparing_progress_total += sum(
+                dataset_info.items_count for dataset_info in datasets_infos
+            )
+        inference_request.set_stage(InferenceRequest.Stage.PREPARING, 0, preparing_progress_total)
-        preparing_progress["current"] = 0
-        preparing_progress["total"] = len(datasets_infos)
-        preparing_progress["status"] = "download_info"
         images_infos_dict = {}
         for dataset_info in datasets_infos:
             images_infos_dict[dataset_info.id] = api.image.get_list(dataset_info.id)
-            if not cache_project_on_model:
-                preparing_progress["current"] += 1
+            inference_request.done()
         if cache_project_on_model:
+            download_to_cache(api, project_id, datasets_infos, progress_cb=inference_request.done)
-            def _progress_cb(count: int = 1):
-                preparing_progress["current"] += count
-            preparing_progress["current"] = 0
-            preparing_progress["total"] = sum(
-                dataset_info.items_count for dataset_info in datasets_infos
-            )
-            preparing_progress["status"] = "download_project"
-            download_to_cache(api, project_id, datasets_infos, progress_cb=_progress_cb)
-        preparing_progress["status"] = "warmup"
-        preparing_progress["current"] = 0
-        preparing_progress["total"] = num_warmup
+        inference_request.set_stage("warmup", 0, num_warmup)
         images_infos: List[ImageInfo] = [
             image_info for infos in images_infos_dict.values() for image_info in infos
@@ -2313,44 +2054,9 @@ class Inference:
             # start downloading in parallel
             threading.Thread(target=_download_images, daemon=True).start()
-        def _add_results_to_request(results: List[Dict]):
-            if async_inference_request_uuid is None:
-                return
-            inference_request["pending_results"].append(results)
-            sly_progress.iters_done(1)
-        def _upload_loop(q: Queue, stop_event: threading.Event, api: Api, upload_f: Callable):
-            try:
-                while True:
-                    items = []
-                    while not q.empty():
-                        items.append(q.get_nowait())
-                    if len(items) > 0:
-                        for batch in items:
-                            upload_f(batch)
-                        continue
-                    if stop_event.is_set():
-                        self._on_inference_end(None, async_inference_request_uuid)
-                        return
-                    time.sleep(1)
-            except Exception as e:
-                api.logger.error("Error in upload loop: %s", str(e), exc_info=True)
-                raise
-        upload_f = _add_results_to_request
-        upload_queue = Queue()
-        stop_upload_event = threading.Event()
-        threading.Thread(
-            target=_upload_loop,
-            args=[upload_queue, stop_upload_event, api, upload_f],
-            daemon=True,
-        ).start()
-        settings = self._get_inference_settings(state)
-        logger.debug(f"Inference settings:", extra=settings)
-        results = []
-        stop = False
+        def upload_f(benchmarks: List):
+            inference_request.add_results(benchmarks)
+            inference_request.done(len(benchmarks))
         def image_batch_generator(batch_size):
             logger.debug(
@@ -2366,23 +2072,20 @@ class Inference:
                         batch = []
         batch_generator = image_batch_generator(batch_size)
-        try:
+        with Uploader(upload_f=upload_f, logger=logger) as uploader:
             for i in range(num_iterations + num_warmup):
-                if stop:
-                    break
-                if (
-                    async_inference_request_uuid is not None
-                    and inference_request["cancel_inference"] is True
-                ):
+                if inference_request.is_stopped():
                     logger.debug(
                         f"Cancelling inference project...",
-                        extra={"inference_request_uuid": async_inference_request_uuid},
+                        extra={"inference_request_uuid": inference_request.uuid},
                     )
-                    results = []
-                    stop = True
-                    break
+                    return
+                if uploader.has_exception():
+                    exception = uploader.exception
+                    raise RuntimeError(f"Error in upload loop: {exception}") from exception
                 if i == num_warmup:
-                    preparing_progress["status"] = "inference"
+                    inference_request.set_stage(InferenceRequest.Stage.INFERENCE, 0, num_iterations)
                 images_infos_batch: List[ImageInfo] = next(batch_generator)
@@ -2429,35 +2132,9 @@ class Inference:
                 )
                 # Collect results if warmup is done
                 if i >= num_warmup:
-                    results.append(benchmark)
-                    upload_queue.put(benchmark)
+                    uploader.put([benchmark])
                 else:
-                    preparing_progress["current"] += 1
-        except Exception:
-            stop_upload_event.set()
-            raise
-        if async_inference_request_uuid is not None and len(results) > 0:
-            inference_request["result"] = results
-        stop_upload_event.set()
-        return results
-    def _on_inference_start(self, inference_request_uuid):
-        inference_request = {
-            "progress": Progress("Inferring model...", total_cnt=1),
-            "is_inferring": True,
-            "cancel_inference": False,
-            "result": None,
-            "pending_results": [],
-            "preparing_progress": {"current": 0, "total": 1},
-            "exception": None,
-        }
-        self._inference_requests[inference_request_uuid] = inference_request
-    def _on_inference_end(self, future, inference_request_uuid):
-        logger.debug("callback: on_inference_end()")
-        inference_request = self._inference_requests.get(inference_request_uuid)
-        if inference_request is not None:
-            inference_request["is_inferring"] = False
+                    inference_request.done()
     def _check_serve_before_call(self, func):
         @wraps(func)
@@ -2481,6 +2158,24 @@ class Inference:
     def is_model_deployed(self):
         return self._model_served
+    def _on_inference_start(self, inference_request_uuid):
+        inference_request = {
+            "progress": Progress("Inferring model...", total_cnt=1),
+            "is_inferring": True,
+            "cancel_inference": False,
+            "result": None,
+            "pending_results": [],
+            "preparing_progress": {"current": 0, "total": 1},
+            "exception": None,
+        }
+        self._inference_requests[inference_request_uuid] = inference_request
+    def _on_inference_end(self, future, inference_request_uuid):
+        logger.debug("callback: on_inference_end()")
+        inference_request = self._inference_requests.get(inference_request_uuid)
+        if inference_request is not None:
+            inference_request["is_inferring"] = False
     def schedule_task(self, func, *args, **kwargs):
         inference_request_uuid = kwargs.get("inference_request_uuid", None)
         if inference_request_uuid is None:
@@ -2523,6 +2218,228 @@ class Inference:
             self.gui._success_label.hide()
             raise e
+    def validate_inference_state(self, state: Union[Dict, str], log_error=True):
+        try:
+            if isinstance(state, str):
+                try:
+                    state = json.loads(state)
+                except (json.decoder.JSONDecodeError, TypeError) as e:
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail=f"Cannot decode settings: {e}",
+                    )
+            if not isinstance(state, dict):
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST, detail="Settings is not json object"
+                )
+            batch_size = state.get("batch_size", None)
+            if batch_size is None:
+                batch_size = self.get_batch_size()
+            if self.max_batch_size is not None and batch_size > self.max_batch_size:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
+                )
+        except Exception as e:
+            if log_error:
+                logger.error(f"Error validating request state: {e}", exc_info=True)
+            raise
+    def upload_predictions(
+        self,
+        predictions: List[Prediction],
+        api: Api,
+        upload_mode: str,
+        context: Dict = None,
+        dst_dataset_id: int = None,
+        dst_project_id: int = None,
+        progress_cb=None,
+        iou_merge_threshold: float = None,
+        inference_request: InferenceRequest = None,
+    ):
+        ds_predictions: Dict[int, List[Prediction]] = defaultdict(list)
+        for prediction in predictions:
+            ds_predictions[prediction.dataset_id].append(prediction)
+        def _new_name(image_info: ImageInfo):
+            name = Path(image_info.name)
+            stem = name.stem
+            parent = name.parent
+            suffix = name.suffix
+            return str(parent / f"{stem}(dataset_id:{image_info.dataset_id}){suffix}")
+        def _get_or_create_dataset(src_dataset_id, dst_project_id):
+            if src_dataset_id is None:
+                return None
+            created_dataset_id = context.setdefault("created_dataset", {}).get(src_dataset_id, None)
+            if created_dataset_id is not None:
+                return created_dataset_id
+            src_dataset_info: DatasetInfo = context.setdefault("dataset_info", {}).get(
+                src_dataset_id
+            )
+            if src_dataset_info is None:
+                src_dataset_info = api.dataset.get_info_by_id(src_dataset_id)
+                context["dataset_info"][src_dataset_id] = src_dataset_info
+            src_parent_id = src_dataset_info.parent_id
+            dst_parent_id = _get_or_create_dataset(src_parent_id, dst_project_id)
+            created_dataset = api.dataset.create(
+                dst_project_id,
+                src_dataset_info.name,
+                description=f"Auto created dataset from inference request {inference_request.uuid if inference_request is not None else ''}",
+                change_name_if_conflict=True,
+                parent_id=dst_parent_id,
+            )
+            context["dataset_info"][created_dataset.id] = created_dataset
+            context.setdefault("created_dataset", {})[src_dataset_id] = created_dataset.id
+            return created_dataset.id
+        created_names = []
+        if context is None:
+            context = {}
+        for dataset_id, preds in ds_predictions.items():
+            if dst_project_id is not None:
+                # upload to the destination project
+                dst_dataset_id = _get_or_create_dataset(
+                    src_dataset_id=dataset_id, dst_project_id=dst_project_id
+                )
+            if dst_dataset_id is not None:
+                # upload to the destination dataset
+                dataset_info = context.setdefault("dataset_info", {}).get(dst_dataset_id, None)
+                if dataset_info is None:
+                    dataset_info = api.dataset.get_info_by_id(dst_dataset_id)
+                    context["dataset_info"][dst_dataset_id] = dataset_info
+                project_id = dataset_info.project_id
+                project_meta = context.setdefault("project_meta", {}).get(project_id, None)
+                if project_meta is None:
+                    project_meta = ProjectMeta.from_json(api.project.get_meta(project_id))
+                    context["project_meta"][project_id] = project_meta
+                meta_changed = False
+                for pred in preds:
+                    ann = pred.annotation
+                    project_meta, ann, meta_changed_ = update_meta_and_ann(project_meta, ann)
+                    meta_changed = meta_changed or meta_changed_
+                    pred.annotation = ann
+                    prediction.model_meta = project_meta
+                if meta_changed:
+                    project_meta = api.project.update_meta(project_id, project_meta)
+                    context["project_meta"][project_id] = project_meta
+                anns = _exclude_duplicated_predictions(
+                    api,
+                    [pred.annotation for pred in preds],
+                    dataset_id,
+                    [pred.image_id for pred in preds],
+                    iou=iou_merge_threshold,
+                    meta=project_meta,
+                )
+                for pred, ann in zip(preds, anns):
+                    pred.annotation = ann
+                context.setdefault("image_info", {})
+                missing = [
+                    pred.image_id for pred in preds if pred.image_id not in context["image_info"]
+                ]
+                if missing:
+                    context["image_info"].update(
+                        {
+                            image_info.id: image_info
+                            for image_info in api.image.get_info_by_id_batch(missing)
+                        }
+                    )
+                image_infos: List[ImageInfo] = [
+                    context["image_info"][pred.image_id] for pred in preds
+                ]
+                dst_names = [
+                    _new_name(image_info) if image_info.name in created_names else image_info.name
+                    for image_info in image_infos
+                ]
+                dst_image_infos = api.image.copy_batch_optimized(
+                    dataset_id,
+                    image_infos,
+                    dst_dataset_id,
+                    dst_names=dst_names,
+                    with_annotations=False,
+                    save_source_date=False,
+                )
+                created_names.extend([image_info.name for image_info in dst_image_infos])
+                api.annotation.upload_anns([image_info.id for image_info in dst_image_infos], anns)
+            else:
+                # upload to the source dataset
+                ds_info = context.setdefault("dataset_info", {}).get(dataset_id, None)
+                if ds_info is None:
+                    ds_info = api.dataset.get_info_by_id(dataset_id)
+                    context["dataset_info"][dataset_id] = ds_info
+                project_id = ds_info.project_id
+                project_meta = context.setdefault("project_meta", {}).get(project_id, None)
+                if project_meta is None:
+                    project_meta = ProjectMeta.from_json(api.project.get_meta(project_id))
+                    context["project_meta"][project_id] = project_meta
+                meta_changed = False
+                for pred in preds:
+                    ann = pred.annotation
+                    project_meta, ann, meta_changed_ = update_meta_and_ann(project_meta, ann)
+                    meta_changed = meta_changed or meta_changed_
+                    pred.annotation = ann
+                    prediction.model_meta = project_meta
+                if meta_changed:
+                    project_meta = api.project.update_meta(project_id, project_meta)
+                    context["project_meta"][project_id] = project_meta
+                anns = _exclude_duplicated_predictions(
+                    api,
+                    [pred.annotation for pred in preds],
+                    dataset_id,
+                    [pred.image_id for pred in preds],
+                    iou=iou_merge_threshold,
+                    meta=project_meta,
+                )
+                for pred, ann in zip(preds, anns):
+                    pred.annotation = ann
+                if upload_mode in ["iou_merge", "append"]:
+                    context.setdefault("annotation", {})
+                    missing = []
+                    for pred in preds:
+                        if pred.image_id not in context["annotation"]:
+                            missing.append(pred.image_id)
+                    for image_id, ann_info in zip(
+                        missing, api.annotation.download_batch(dataset_id, missing)
+                    ):
+                        context["annotation"][image_id] = Annotation.from_json(
+                            ann_info.annotation, project_meta
+                        )
+                    for pred in preds:
+                        pred.annotation = context["annotation"][pred.image_id].merge(
+                            pred.annotation
+                        )
+                api.annotation.upload_anns(
+                    [pred.image_id for pred in preds],
+                    [pred.annotation for pred in preds],
+                )
+            if progress_cb is not None:
+                progress_cb(len(preds))
+        if inference_request is not None:
+            results = self._format_output(predictions)
+            for result in results:
+                result["annotation"] = None
+                result["data"] = None
+            inference_request.add_results(results)
+    def add_results_to_request(
+        self, predictions: List[Prediction], inference_request: InferenceRequest
+    ):
+        results = self._format_output(predictions)
+        inference_request.add_results(results)
+        inference_request.done(len(results))
     def serve(self):
         if not self._use_gui and not self._is_local_deploy:
             Progress("Deploying model ...", 1)
@@ -2583,28 +2500,46 @@ class Inference:
         server = self._app.get_server()
         self._app.set_ready_check_function(self.is_model_deployed)
-        @call_on_autostart()
-        def autostart_func():
-            gpu_count = get_gpu_count()
-            if gpu_count > 1:
-                # run autostart after 5 min
-                def delayed_autostart():
-                    logger.debug("Found more than one GPU, autostart will be delayed.")
-                    time.sleep(self._autostart_delay_time)
-                    if not self._model_served:
-                        logger.debug("Deploying the model via autostart...")
-                        self.gui.deploy_with_current_params()
-                self._executor.submit(delayed_autostart)
-            else:
-                # run autostart immediately
-                self.gui.deploy_with_current_params()
+        if self.api is not None:
+            @call_on_autostart()
+            def autostart_func():
+                gpu_count = get_gpu_count()
+                if gpu_count > 1:
+                    # run autostart after 5 min
+                    def delayed_autostart():
+                        logger.debug("Found more than one GPU, autostart will be delayed.")
+                        time.sleep(self._autostart_delay_time)
+                        if not self._model_served:
+                            logger.debug("Deploying the model via autostart...")
+                            self.gui.deploy_with_current_params()
+                    self._executor.submit(delayed_autostart)
+                else:
+                    # run autostart immediately
+                    self.gui.deploy_with_current_params()
         if not self._use_gui:
             Progress("Model deployed", 1).iter_done_report()
         else:
             autostart_func()
+        @server.exception_handler(HTTPException)
+        def http_exception_handler(request: Request, exc: HTTPException):
+            response_content = {
+                "detail": exc.detail,
+                "success": False,
+            }
+            if isinstance(exc.detail, dict):
+                if "message" in exc.detail:
+                    response_content["message"] = exc.detail["message"]
+                if "success" in exc.detail:
+                    response_content["success"] = exc.detail["success"]
+            elif isinstance(exc.detail, str):
+                response_content["message"] = exc.detail
+            return JSONResponse(status_code=exc.status_code, content=response_content)
         self.cache.add_cache_endpoint(server)
         self.cache.add_cache_files_endpoint(server)
@@ -2617,311 +2552,353 @@ class Inference:
         def get_custom_inference_settings():
             return {"settings": self.custom_inference_settings}
+        @server.post("/get_model_meta")
         @server.post("/get_output_classes_and_tags")
         def get_output_classes_and_tags():
             return self.model_meta.to_json()
         @server.post("/inference_image_id")
         def inference_image_id(request: Request):
-            logger.debug(f"'inference_image_id' request in json format:{request.state.state}")
-            return self._inference_image_id(request.state.api, request.state.state)
+            state = request.state.state
+            logger.debug("Received a request to '/inference_image_id'", extra={"state": state})
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            return self.inference_requests_manager.run(self._inference_image_ids, api, state)[0]
+        @server.post("/inference_image_id_async")
+        def inference_image_id_async(request: Request):
+            state = request.state.state
+            logger.debug(
+                "Received a request to 'inference_image_id_async'",
+                extra={"state": state},
+            )
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            inference_request, _ = self.inference_requests_manager.schedule_task(
+                self._inference_image_ids,
+                api,
+                state,
+            )
+            return {
+                "message": "Scheduled inference task.",
+                "inference_request_uuid": inference_request.uuid,
+            }
+        @server.post("/inference_image")
+        def inference_image(
+            files: List[UploadFile], settings: str = Form("{}"), state: str = Form("{}")
+        ):
+            if state == "{}" or not state:
+                state = settings
+            state = str(state)
+            logger.debug("Received a request to 'inference_image'", extra={"state": state})
+            self.validate_inference_state(state)
+            state = json.loads(state)
+            if len(files) != 1:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"Only one file expected but got {len(files)}",
+                )
+            try:
+                file = files[0]
+                inference_request = self.inference_requests_manager.create()
+                inference_request.set_stage(InferenceRequest.Stage.PREPARING, 0, file.size)
+                img_bytes = b""
+                while buf := file.read(64 * 1024 * 1024):
+                    img_bytes += buf
+                    inference_request.done(len(buf))
+                image = sly_image.read_bytes(img_bytes)
+                inference_request, future = self.inference_requests_manager.schedule_task(
+                    self._inference_images, [image], state, inference_request=inference_request
+                )
+                future.result()
+                return inference_request.pop_pending_results()[0]
+            except sly_image.UnsupportedImageFormat:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"File has unsupported format. Supported formats: {sly_image.SUPPORTED_IMG_EXTS}",
+                )
         @server.post("/inference_image_url")
         def inference_image_url(request: Request):
-            logger.debug(f"'inference_image_url' request in json format:{request.state.state}")
-            return self._inference_image_url(request.state.api, request.state.state)
+            state = request.state.state
+            logger.debug("Received a request to 'inference_image_url'", extra={"state": state})
+            self.validate_inference_state(state)
+            image_url = state["image_url"]
+            ext = sly_fs.get_file_ext(image_url)
+            if ext == "":
+                ext = ".jpg"
+            with requests.get(image_url, stream=True) as response:
+                response.raise_for_status()
+                response.raw.decode_content = True
+                image = self.cache.add_image_to_cache(image_url, response.raw, ext=ext)
+            return self.inference_requests_manager.run(self._inference_images, [image], state)[0]
         @server.post("/inference_batch_ids")
-        def inference_batch_ids(response: Response, request: Request):
-            # check batch size
-            batch_size = len(request.state.state["batch_ids"])
-            if self.max_batch_size is not None and batch_size > self.max_batch_size:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return {
-                    "message": f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
-                    "success": False,
-                }
-            logger.debug(f"'inference_batch_ids' request in json format:{request.state.state}")
-            return self._inference_batch_ids(request.state.api, request.state.state)
+        def inference_batch_ids(request: Request):
+            state = request.state.state
+            logger.debug("Received a request to  'inference_batch_ids'", extra={"state": state})
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            return self.inference_requests_manager.run(self._inference_image_ids, api, state)
         @server.post("/inference_batch_ids_async")
-        def inference_batch_ids_async(response: Response, request: Request):
+        def inference_batch_ids_async(request: Request):
+            state = request.state.state
             logger.debug(
-                f"'inference_batch_ids_async' request in json format:{request.state.state}"
+                f"Received a request to 'inference_batch_ids_async'", extra={"state": state}
             )
-            images_ids = request.state.state["images_ids"]
-            # check batch size
-            batch_size = request.state.state.get("batch_size", None)
-            if batch_size is None:
-                batch_size = self.get_batch_size()
-            if self.max_batch_size is not None and batch_size > self.max_batch_size:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return {
-                    "message": f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
-                    "success": False,
-                }
-            inference_request_uuid = uuid.uuid5(
-                namespace=uuid.NAMESPACE_URL, name=f"{time.time()}"
-            ).hex
-            self._on_inference_start(inference_request_uuid)
-            future = self._executor.submit(
-                self._handle_error_in_async,
-                inference_request_uuid,
-                self._inference_images_ids,
-                request.state.api,
-                request.state.state,
-                images_ids,
-                inference_request_uuid,
-            )
-            end_callback = partial(
-                self._on_inference_end, inference_request_uuid=inference_request_uuid
-            )
-            future.add_done_callback(end_callback)
-            logger.debug(
-                "Inference has scheduled from 'inference_batch_ids_async' endpoint",
-                extra={"inference_request_uuid": inference_request_uuid},
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            inference_request, _ = self.inference_requests_manager.schedule_task(
+                self._inference_image_ids, api, state
             )
             return {
-                "message": "Inference has started.",
-                "inference_request_uuid": inference_request_uuid,
+                "message": "Scheduled inference task.",
+                "inference_request_uuid": inference_request.uuid,
             }
-        @server.post("/inference_video_id")
-        def inference_video_id(response: Response, request: Request):
-            logger.debug(f"'inference_video_id' request in json format:{request.state.state}")
-            # check batch size
-            batch_size = request.state.state.get("batch_size", None)
-            if batch_size is None:
-                batch_size = self.get_batch_size()
-            if self.max_batch_size is not None and batch_size > self.max_batch_size:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return {
-                    "message": f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
-                    "success": False,
-                }
-            return self._inference_video_id(request.state.api, request.state.state)
-        @server.post("/inference_image")
-        def inference_image(
-            response: Response, files: List[UploadFile], settings: str = Form("{}")
+        @server.post("/inference_batch")
+        def inference_batch(
+            response: Response,
+            files: List[UploadFile],
+            settings: str = Form("{}"),
+            state: str = Form("{}"),
         ):
-            if len(files) != 1:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return f"Only one file expected but got {len(files)}"
+            if state == "{}" or not state:
+                state = settings
+            state = str(state)
+            logger.debug("Received a request to 'inference_batch'", extra={"state": state})
+            self.validate_inference_state(state)
+            state = json.loads(state)
+            if len(files) == 0:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"At least one file is expected but got {len(files)}",
+                )
             try:
-                state = json.loads(settings)
-                if type(state) != dict:
-                    response.status_code = status.HTTP_400_BAD_REQUEST
-                    return "Settings is not json object"
-                return self._inference_image(state, files[0])
-            except (json.decoder.JSONDecodeError, TypeError) as e:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return f"Cannot decode settings: {e}"
+                inference_request = self.inference_requests_manager.create()
+                inference_request.set_stage(
+                    InferenceRequest.Stage.PREPARING, 0, sum([file.size for file in files])
+                )
+                names = []
+                for file in files:
+                    name = file.filename
+                    if name is None or name == "":
+                        name = rand_str(10)
+                    ext = Path(name).suffix
+                    img_bytes = b""
+                    while buf := file.file.read(64 * 1024 * 1024):
+                        img_bytes += buf
+                        inference_request.done(len(buf))
+                    self.cache.add_image_to_cache(name, img_bytes, ext=ext)
+                    names.append(name)
+                inference_request, future = self.inference_requests_manager.schedule_task(
+                    self._inference_images, names, state, inference_request=inference_request
+                )
+                future.result()
+                return inference_request.pop_pending_results()
             except sly_image.UnsupportedImageFormat:
                 response.status_code = status.HTTP_400_BAD_REQUEST
                 return f"File has unsupported format. Supported formats: {sly_image.SUPPORTED_IMG_EXTS}"
-        @server.post("/inference_batch")
-        def inference_batch(
-            response: Response, files: List[UploadFile], settings: str = Form("{}")
+        @server.post("/inference_batch_async")
+        def inference_batch_async(
+            response: Response,
+            files: List[UploadFile],
+            settings: str = Form("{}"),
+            state: str = Form("{}"),
         ):
+            if state == "{}" or not state:
+                state = settings
+            state = str(state)
+            logger.debug("Received a request to 'inference_batch'", extra={"state": state})
+            self.validate_inference_state(state)
+            state = json.loads(state)
+            if len(files) == 0:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"At least one file is expected but got {len(files)}",
+                )
             try:
-                state = json.loads(settings)
-                if type(state) != dict:
-                    response.status_code = status.HTTP_400_BAD_REQUEST
-                    return "Settings is not json object"
-                # check batch size
-                batch_size = len(files)
-                if self.max_batch_size is not None and batch_size > self.max_batch_size:
-                    response.status_code = status.HTTP_400_BAD_REQUEST
-                    return {
-                        "message": f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
-                        "success": False,
-                    }
-                return self._inference_batch(state, files)
-            except (json.decoder.JSONDecodeError, TypeError) as e:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return f"Cannot decode settings: {e}"
+                inference_request = self.inference_requests_manager.create()
+                inference_request.set_stage(
+                    InferenceRequest.Stage.PREPARING, 0, sum([file.size for file in files])
+                )
+                names = []
+                for file in files:
+                    name = file.filename
+                    if name is None or name == "":
+                        name = rand_str(10)
+                    ext = Path(name).suffix
+                    img_bytes = b""
+                    while buf := file.file.read(64 * 1024 * 1024):
+                        img_bytes += buf
+                        inference_request.done(len(buf))
+                    self.cache.add_image_to_cache(name, img_bytes, ext=ext)
+                    names.append(name)
+                inference_request, _ = self.inference_requests_manager.schedule_task(
+                    self._inference_images, names, state, inference_request=inference_request
+                )
+                return {
+                    "message": "Scheduled inference task.",
+                    "inference_request_uuid": inference_request.uuid,
+                }
             except sly_image.UnsupportedImageFormat:
                 response.status_code = status.HTTP_400_BAD_REQUEST
                 return f"File has unsupported format. Supported formats: {sly_image.SUPPORTED_IMG_EXTS}"
-        @server.post("/inference_image_id_async")
-        def inference_image_id_async(request: Request):
-            logger.debug(f"'inference_image_id_async' request in json format:{request.state.state}")
-            inference_request_uuid = uuid.uuid5(
-                namespace=uuid.NAMESPACE_URL, name=f"{time.time()}"
-            ).hex
-            self._on_inference_start(inference_request_uuid)
-            future = self._executor.submit(
-                self._handle_error_in_async,
-                inference_request_uuid,
-                self._inference_image_id,
-                request.state.api,
-                request.state.state,
-                inference_request_uuid,
+        @server.post("/inference_video_id")
+        def inference_video_id(request: Request):
+            state = request.state.state
+            logger.debug(f"Received a request to 'inference_video_id'", extra={"state": state})
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            inference_request, future = self.inference_requests_manager.schedule_task(
+                self._inference_video_id, api, state
             )
-            end_callback = partial(
-                self._on_inference_end, inference_request_uuid=inference_request_uuid
+            future.result()
+            results = {"ann": inference_request.pop_pending_results()}
+            final_result = inference_request.final_result
+            if final_result is not None:
+                results.update(final_result)
+            return results
+        @server.post("/inference_video_async")
+        def inference_video_async(
+            files: List[UploadFile],
+            settings: str = Form("{}"),
+            state: str = Form("{}"),
+        ):
+            if state == "{}" or not state:
+                state = settings
+            state = str(state)
+            logger.debug("Received a request to 'inference_video_async'", extra={"state": state})
+            self.validate_inference_state(state)
+            state = json.loads(state)
+            file = files[0]
+            video_name = files[0].filename
+            video_source = files[0].file
+            file_size = file.size
+            inference_request = self.inference_requests_manager.create()
+            inference_request.set_stage(InferenceRequest.Stage.PREPARING, 0, file_size)
+            video_source.read = progress_wrapper(
+                video_source.read, inference_request.progress.iters_done_report
             )
-            future.add_done_callback(end_callback)
-            logger.debug(
-                "Inference has scheduled from 'inference_image_id_async' endpoint",
-                extra={"inference_request_uuid": inference_request_uuid},
+            if self.cache.is_persistent:
+                self.cache.add_video_to_cache(video_name, video_source)
+                video_path = self.cache.get_video_path(video_name)
+            else:
+                video_path = os.path.join(tempfile.gettempdir(), video_name)
+                with open(video_path, "wb") as video_file:
+                    shutil.copyfileobj(
+                        video_source, open(video_path, "wb"), length=(64 * 1024 * 1024)
+                    )
+            inference_request, _ = self.inference_requests_manager.schedule_task(
+                self._inference_video,
+                path=video_path,
+                state=state,
+                inference_request=inference_request,
             )
             return {
-                "message": "Inference has started.",
-                "inference_request_uuid": inference_request_uuid,
+                "message": "Scheduled inference task.",
+                "inference_request_uuid": inference_request.uuid,
             }
         @server.post("/inference_video_id_async")
         def inference_video_id_async(response: Response, request: Request):
-            logger.debug(f"'inference_video_id_async' request in json format:{request.state.state}")
-            # check batch size
-            batch_size = request.state.state.get("batch_size", None)
-            if batch_size is None:
-                batch_size = self.get_batch_size()
-            if self.max_batch_size is not None and batch_size > self.max_batch_size:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return {
-                    "message": f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
-                    "success": False,
-                }
-            inference_request_uuid = uuid.uuid5(
-                namespace=uuid.NAMESPACE_URL, name=f"{time.time()}"
-            ).hex
-            self._on_inference_start(inference_request_uuid)
-            future = self._executor.submit(
-                self._handle_error_in_async,
-                inference_request_uuid,
-                self._inference_video_id,
-                request.state.api,
-                request.state.state,
-                inference_request_uuid,
-            )
-            end_callback = partial(
-                self._on_inference_end, inference_request_uuid=inference_request_uuid
-            )
-            future.add_done_callback(end_callback)
-            logger.debug(
-                "Inference has scheduled from 'inference_video_id_async' endpoint",
-                extra={"inference_request_uuid": inference_request_uuid},
+            state = request.state.state
+            logger.debug("Received a request to 'inference_video_id_async'", extra={"state": state})
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            inference_request, _ = self.inference_requests_manager.schedule_task(
+                self._inference_video_id, api, state
             )
             return {
                 "message": "Inference has started.",
-                "inference_request_uuid": inference_request_uuid,
+                "inference_request_uuid": inference_request.uuid,
             }
         @server.post("/inference_project_id_async")
         def inference_project_id_async(response: Response, request: Request):
+            state = request.state.state
             logger.debug(
-                f"'inference_project_id_async' request in json format:{request.state.state}"
+                "Received a request to 'inference_project_id_async'", extra={"state": state}
             )
-            project_id = request.state.state["projectId"]
-            project_info = request.state.api.project.get_info_by_id(project_id)
-            if project_info.type != str(ProjectType.IMAGES):
-                raise ValueError("Only images projects are supported.")
-            # check batch size
-            batch_size = request.state.state.get("batch_size", None)
-            if batch_size is None:
-                batch_size = self.get_batch_size()
-            if self.max_batch_size is not None and batch_size > self.max_batch_size:
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                return {
-                    "message": f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
-                    "success": False,
-                }
-            inference_request_uuid = uuid.uuid5(
-                namespace=uuid.NAMESPACE_URL, name=f"{time.time()}"
-            ).hex
-            self._on_inference_start(inference_request_uuid)
-            future = self._executor.submit(
-                self._handle_error_in_async,
-                inference_request_uuid,
-                self._inference_project_id,
-                request.state.api,
-                request.state.state,
-                project_info,
-                inference_request_uuid,
-            )
-            logger.debug(
-                "Inference has scheduled from 'inference_project_id_async' endpoint",
-                extra={"inference_request_uuid": inference_request_uuid},
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            inference_request, _ = self.inference_requests_manager.schedule_task(
+                self._inference_project_id, api, state
             )
             return {
                 "message": "Inference has started.",
-                "inference_request_uuid": inference_request_uuid,
+                "inference_request_uuid": inference_request.uuid,
             }
         @server.post("/run_speedtest")
         def run_speedtest(response: Response, request: Request):
-            logger.debug(f"'run_speedtest' request in json format:{request.state.state}")
-            project_id = request.state.state["projectId"]
-            project_info = request.state.api.project.get_info_by_id(project_id)
-            if project_info.type != str(ProjectType.IMAGES):
-                response.status_code = status.HTTP_400_BAD_REQUEST
-                response.body = {"message": "Only images projects are supported."}
-                raise ValueError("Only images projects are supported.")
-            batch_size = request.state.state["batch_size"]
+            state = request.state.state
+            logger.debug(f"'run_speedtest' request in json format:{state}")
+            batch_size = state["batch_size"]
             if batch_size > 1 and not self.is_batch_inference_supported():
                 response.status_code = status.HTTP_501_NOT_IMPLEMENTED
                 return {
                     "message": "Batch inference is not implemented for this model.",
                     "success": False,
                 }
-            # check batch size
-            if self.max_batch_size is not None and batch_size > self.max_batch_size:
+            self.validate_inference_state(state)
+            api = self.api_from_request(request)
+            project_id = state["projectId"]
+            project_info = api.project.get_info_by_id(project_id)
+            if project_info.type != str(ProjectType.IMAGES):
                 response.status_code = status.HTTP_400_BAD_REQUEST
-                return {
-                    "message": f"Batch size should be less than or equal to {self.max_batch_size} for this model.",
-                    "success": False,
-                }
-            inference_request_uuid = uuid.uuid5(
-                namespace=uuid.NAMESPACE_URL, name=f"{time.time()}"
-            ).hex
-            self._on_inference_start(inference_request_uuid)
-            future = self._executor.submit(
-                self._handle_error_in_async,
-                inference_request_uuid,
-                self._run_speedtest,
-                request.state.api,
-                request.state.state,
-                inference_request_uuid,
-            )
-            logger.debug(
-                "Speedtest has scheduled from 'run_speedtest' endpoint",
-                extra={"inference_request_uuid": inference_request_uuid},
+                response.body = {"message": "Only images projects are supported."}
+                raise ValueError("Only images projects are supported.")
+            inference_request, _ = self.inference_requests_manager.schedule_task(
+                self._run_speedtest, api, state
             )
             return {
                 "message": "Inference has started.",
-                "inference_request_uuid": inference_request_uuid,
+                "inference_request_uuid": inference_request.uuid,
             }
         @server.post(f"/get_inference_progress")
         def get_inference_progress(response: Response, request: Request):
-            inference_request_uuid = request.state.state.get("inference_request_uuid")
+            state = request.state.state
+            logger.debug("Received a request to '/get_inference_progress'", extra={"state": state})
+            inference_request_uuid = state.get("inference_request_uuid")
             if inference_request_uuid is None:
                 response.status_code = status.HTTP_400_BAD_REQUEST
                 return {"message": "Error: 'inference_request_uuid' is required."}
-            inference_request = self._inference_requests[inference_request_uuid].copy()
-            inference_request["progress"] = _convert_sly_progress_to_dict(
-                inference_request["progress"]
-            )
-            # Logging
+            inference_request = self.inference_requests_manager.get(inference_request_uuid)
             log_extra = _get_log_extra_for_inference_request(
-                inference_request_uuid, inference_request
+                inference_request.uuid, inference_request
             )
+            data = {**inference_request.to_json(), **log_extra}
+            if inference_request.stage != InferenceRequest.Stage.INFERENCE:
+                data["progress"] = {"current": 0, "total": 1}
             logger.debug(
                 f"Sending inference progress with uuid:",
-                extra=log_extra,
+                extra=data,
             )
-            # Ger rid of `pending_results` to less response size
-            inference_request["pending_results"] = []
-            inference_request.pop("lock", None)
-            return inference_request
+            return data
         @server.post(f"/pop_inference_results")
         def pop_inference_results(response: Response, request: Request):
@@ -2930,23 +2907,34 @@ class Inference:
                 response.status_code = status.HTTP_400_BAD_REQUEST
                 return {"message": "Error: 'inference_request_uuid' is required."}
-            # Copy results
-            inference_request = self._inference_requests[inference_request_uuid].copy()
-            inference_request["pending_results"] = inference_request["pending_results"].copy()
+            if inference_request_uuid in self._inference_requests:
+                inference_request = self._inference_requests[inference_request_uuid].copy()
+                inference_request["pending_results"] = inference_request["pending_results"].copy()
-            # Clear the queue `pending_results`
-            self._inference_requests[inference_request_uuid]["pending_results"].clear()
+                # Clear the queue `pending_results`
+                self._inference_requests[inference_request_uuid]["pending_results"].clear()
-            inference_request["progress"] = _convert_sly_progress_to_dict(
-                inference_request["progress"]
-            )
+                inference_request["progress"] = _convert_sly_progress_to_dict(
+                    inference_request["progress"]
+                )
+                log_extra = _get_log_extra_for_inference_request(
+                    inference_request_uuid, inference_request
+                )
+                logger.debug(f"Sending inference delta results with uuid:", extra=log_extra)
+                return inference_request
-            # Logging
+            inference_request = self.inference_requests_manager.get(inference_request_uuid)
             log_extra = _get_log_extra_for_inference_request(
-                inference_request_uuid, inference_request
+                inference_request.uuid, inference_request
             )
+            data = {
+                **inference_request.to_json(),
+                **log_extra,
+                "pending_results": inference_request.pop_pending_results(),
+            }
             logger.debug(f"Sending inference delta results with uuid:", extra=log_extra)
-            return inference_request
+            return data
         @server.post(f"/get_inference_result")
         def get_inference_result(response: Response, request: Request):
@@ -2955,22 +2943,34 @@ class Inference:
                 response.status_code = status.HTTP_400_BAD_REQUEST
                 return {"message": "Error: 'inference_request_uuid' is required."}
-            inference_request = self._inference_requests[inference_request_uuid].copy()
+            if inference_request_uuid in self._inference_requests:
+                inference_request = self._inference_requests[inference_request_uuid].copy()
-            inference_request["progress"] = _convert_sly_progress_to_dict(
-                inference_request["progress"]
-            )
+                inference_request["progress"] = _convert_sly_progress_to_dict(
+                    inference_request["progress"]
+                )
-            # Logging
+                # Logging
+                log_extra = _get_log_extra_for_inference_request(
+                    inference_request_uuid, inference_request
+                )
+                logger.debug(
+                    f"Sending inference result with uuid:",
+                    extra=log_extra,
+                )
+                return inference_request["result"]
+            inference_request = self.inference_requests_manager.get(inference_request_uuid)
             log_extra = _get_log_extra_for_inference_request(
-                inference_request_uuid, inference_request
+                inference_request.uuid, inference_request
             )
             logger.debug(
                 f"Sending inference result with uuid:",
                 extra=log_extra,
             )
-            return inference_request["result"]
+            return inference_request.final_result
         @server.post(f"/stop_inference")
         def stop_inference(response: Response, request: Request):
@@ -2981,8 +2981,12 @@ class Inference:
                     "message": "Error: 'inference_request_uuid' is required.",
                     "success": False,
                 }
-            inference_request = self._inference_requests[inference_request_uuid]
-            inference_request["cancel_inference"] = True
+            if inference_request_uuid in self._inference_requests:
+                inference_request = self._inference_requests[inference_request_uuid]
+                inference_request["cancel_inference"] = True
+            else:
+                inference_request = self.inference_requests_manager.get(inference_request_uuid)
+                inference_request.stop()
             return {"message": "Inference will be stopped.", "success": True}
         @server.post(f"/clear_inference_request")
@@ -2994,7 +2998,10 @@ class Inference:
                     "message": "Error: 'inference_request_uuid' is required.",
                     "success": False,
                 }
-            del self._inference_requests[inference_request_uuid]
+            if inference_request_uuid in self._inference_requests:
+                del self._inference_requests[inference_request_uuid]
+            else:
+                self.inference_requests_manager.remove_after(inference_request_uuid, 60)
             logger.debug("Removed an inference request:", extra={"uuid": inference_request_uuid})
             return {"success": True}
@@ -3005,8 +3012,13 @@ class Inference:
                 response.status_code = status.HTTP_400_BAD_REQUEST
                 return {"message": "Error: 'inference_request_uuid' is required."}
-            inference_request = self._inference_requests[inference_request_uuid].copy()
-            return inference_request["preparing_progress"]
+            if inference_request_uuid in self._inference_requests:
+                inference_request = self._inference_requests[inference_request_uuid].copy()
+                return inference_request["preparing_progress"]
+            inference_request = self.inference_requests_manager.get(inference_request_uuid)
+            return _get_log_extra_for_inference_request(inference_request.uuid, inference_request)[
+                "preparing_progress"
+            ]
         @server.post("/get_deploy_settings")
         def _get_deploy_settings(response: Response, request: Request):
@@ -3052,22 +3064,84 @@ class Inference:
                     self.shutdown_model()
                 state = request.state.state
                 deploy_params = state["deploy_params"]
+                model_name = state.get("model_name", None)
                 if isinstance(self.gui, GUI.ServingGUITemplate):
+                    if deploy_params["model_source"] == ModelSource.PRETRAINED and model_name:
+                        deploy_params = self._build_deploy_params_from_api(
+                            model_name, deploy_params
+                        )
                     model_files = self._download_model_files(deploy_params)
                     deploy_params["model_files"] = model_files
+                    deploy_params = self._set_common_deploy_params(deploy_params)
                     self._load_model_headless(**deploy_params)
                 elif isinstance(self.gui, GUI.ServingGUI):
+                    if deploy_params["model_source"] == ModelSource.PRETRAINED and model_name:
+                        deploy_params = self._build_legacy_deploy_params_from_api(model_name)
+                    deploy_params = self._set_common_deploy_params(deploy_params)
                     self._load_model(deploy_params)
+                elif self.gui is None and self.api is None:
+                    if deploy_params["model_source"] == ModelSource.PRETRAINED and model_name:
+                        deploy_params = self._build_deploy_params_from_api(
+                            model_name, deploy_params
+                        )
+                        model_files = self._download_model_files(deploy_params)
+                        deploy_params["model_files"] = model_files
+                    deploy_params = self._set_common_deploy_params(deploy_params)
+                    self._load_model_headless(**deploy_params)
+                    logger.info(
+                        f"Model has been successfully loaded on {deploy_params['device']} device"
+                    )
+                    return {"result": "model was successfully deployed"}
-                self.set_params_to_gui(deploy_params)
-                # update to set correct device
-                device = deploy_params.get("device", "cpu")
-                self.gui.set_deployed(device)
+                else:
+                    raise ValueError("Unknown GUI type")
+                if self.gui is not None:
+                    self.set_params_to_gui(deploy_params)
+                    # update to set correct device
+                    device = deploy_params.get("device", "cpu")
+                    self.gui.set_deployed(device)
                 return {"result": "model was successfully deployed"}
             except Exception as e:
-                self.gui._success_label.hide()
+                if self.gui is not None:
+                    self.gui._success_label.hide()
                 raise e
+        @server.post("/list_pretrained_models")
+        def _list_pretrained_models():
+            if isinstance(self.gui, GUI.ServingGUITemplate):
+                return [
+                    _get_model_name(model) for model in self._gui.pretrained_models_table._models
+                ]
+            elif hasattr(self, "pretrained_models"):
+                return [_get_model_name(model) for model in self.pretrained_models]
+            else:
+                if hasattr(self, "pretrained_models_table"):
+                    return [
+                        _get_model_name(model)
+                        for model in self.pretrained_models_table._models  # pylint: disable=no-member
+                    ]
+                else:
+                    raise HTTPException(
+                        status_code=400,
+                        detail="Pretrained models table is not available in this app.",
+                    )
+        @server.post("/list_pretrained_model_infos")
+        def _list_pretrained_model_infos():
+            if isinstance(self.gui, GUI.ServingGUITemplate):
+                return self._gui.pretrained_models_table._models
+            elif hasattr(self, "pretrained_models"):
+                return self.pretrained_models
+            else:
+                if hasattr(self, "pretrained_models_table"):
+                    return self.pretrained_models_table._models
+                else:
+                    raise HTTPException(
+                        status_code=400,
+                        detail="Pretrained models table is not available in this app.",
+                    )
         @server.post("/is_deployed")
         def _is_deployed(response: Response, request: Request):
             return {
@@ -3080,6 +3154,37 @@ class Inference:
         def _get_deploy_info():
             return asdict(self._get_deploy_info())
+        @server.post("/get_inference_status")
+        def _get_inference_status(request: Request, response: Response):
+            state = request.state.state
+            inference_request_uuid = state.get("inference_request_uuid")
+            if inference_request_uuid is None:
+                response.status_code = status.HTTP_400_BAD_REQUEST
+                return {"message": "Error: 'inference_request_uuid' is required."}
+            inference_request = self.inference_requests_manager.get(inference_request_uuid)
+            if inference_request is None:
+                response.status_code = status.HTTP_404_NOT_FOUND
+                return {"message": "Error: 'inference_request_uuid' is not found."}
+            return inference_request.status()
+        @server.post("/get_status")
+        def _get_status(request: Request):
+            progress = self.inference_requests_manager.global_progress.to_json()
+            ram_allocated, ram_total = get_ram_usage()
+            gpu_allocated, gpu_total = get_gpu_usage()
+            return {
+                "is_deployed": self.is_model_deployed(),
+                "progress": progress,
+                "gpu_memory": {
+                    "allocated": gpu_allocated,
+                    "total": gpu_total,
+                },
+                "ram_memory": {
+                    "allocated": ram_allocated,
+                    "total": ram_total,
+                },
+            }
         # Local deploy without predict args
         if self._is_local_deploy:
             self._run_server()
@@ -3433,7 +3538,7 @@ class Inference:
                     change_name_if_conflict=True,
                 )
                 state["output_project_id"] = output_project.id
-            results = self._inference_project_id(api=self.api, state=state)
+            results = self.inference_requests_manager.run(self._inference_project_id, api, state)
             dataset_infos = api.dataset.get_list(project_id)
             datasets_map = {dataset_info.id: dataset_info.name for dataset_info in dataset_infos}
@@ -3617,136 +3722,157 @@ class Inference:
                 f"Checkpoint {checkpoint_url} not found in Team Files. Cannot set workflow input"
             )
-    def _exclude_duplicated_predictions(
-        self,
-        api: Api,
-        pred_anns: List[Annotation],
-        settings: dict,
-        dataset_id: int,
-        gt_image_ids: List[int],
-        meta: Optional[ProjectMeta] = None,
-    ):
-        """
-        Filter out predictions that significantly overlap with ground truth (GT) objects.
-        This is a wrapper around the `_filter_duplicated_predictions_from_ann` method that does the following:
-        - Checks inference settings for the IoU threshold (`existing_objects_iou_thresh`)
-        - Gets ProjectMeta object if not provided
-        - Downloads GT annotations for the specified image IDs
-        - Filters out predictions that have an IoU greater than or equal to the specified threshold with any GT object
-        :param api: Supervisely API object
-        :type api: Api
-        :param pred_anns: List of Annotation objects containing predictions
-        :type pred_anns: List[Annotation]
-        :param settings: Inference settings
-        :type settings: dict
-        :param dataset_id: ID of the dataset containing the images
-        :type dataset_id: int
-        :param gt_image_ids: List of image IDs to filter predictions. All images should belong to the same dataset
-        :type gt_image_ids: List[int]
-        :param meta: ProjectMeta object
-        :type meta: Optional[ProjectMeta]
-        :return: List of Annotation objects containing filtered predictions
-        :rtype: List[Annotation]
-        Notes:
-        ------
-        - Requires PyTorch and torchvision for IoU calculations
-        - This method is useful for identifying new objects that aren't already annotated in the ground truth
-        """
-        iou = settings.get("existing_objects_iou_thresh")
-        if isinstance(iou, float) and 0 < iou <= 1:
-            if meta is None:
-                ds = api.dataset.get_info_by_id(dataset_id)
-                meta = ProjectMeta.from_json(api.project.get_meta(ds.project_id))
-            gt_anns = api.annotation.download_json_batch(dataset_id, gt_image_ids)
-            gt_anns = [Annotation.from_json(ann, meta) for ann in gt_anns]
-            for i in range(0, len(pred_anns)):
-                before = len(pred_anns[i].labels)
-                with Timer() as timer:
-                    pred_anns[i] = self._filter_duplicated_predictions_from_ann(
-                        gt_anns[i], pred_anns[i], iou
-                    )
-                after = len(pred_anns[i].labels)
-                logger.debug(
-                    f"{[i]}: applied NMS with IoU={iou}. Before: {before}, After: {after}. Time: {timer.get_time():.3f}ms"
-                )
-        return pred_anns
-    def _filter_duplicated_predictions_from_ann(
-        self, gt_ann: Annotation, pred_ann: Annotation, iou_threshold: float
-    ) -> Annotation:
-        """
-        Filter out predictions that significantly overlap with ground truth annotations.
-        This function compares each prediction with ground truth annotations of the same class
-        and removes predictions that have an IoU (Intersection over Union) greater than or equal
-        to the specified threshold with any ground truth annotation. This is useful for identifying
-        new objects that aren't already annotated in the ground truth.
-        :param gt_ann: Annotation object containing ground truth labels
-        :type gt_ann: Annotation
-        :param pred_ann: Annotation object containing prediction labels to be filtered
-        :type pred_ann: Annotation
-        :param iou_threshold:   IoU threshold (0.0-1.0). Predictions with IoU >= threshold with any
-                                ground truth box of the same class will be removed
-        :type iou_threshold: float
-        :return: A new annotation object containing only predictions that don't significantly
-                 overlap with ground truth annotations
-        :rtype: Annotation
-        Notes:
-        ------
-        - Predictions with classes not present in ground truth will be kept
-        - Requires PyTorch and torchvision for IoU calculations
-        """
-        try:
-            import torch
-            from torchvision.ops import box_iou
-        except ImportError:
-            raise ImportError("Please install PyTorch and torchvision to use this feature.")
+def _exclude_duplicated_predictions(
+    api: Api,
+    pred_anns: List[Annotation],
+    dataset_id: int,
+    gt_image_ids: List[int],
+    iou: float = None,
+    meta: Optional[ProjectMeta] = None,
+):
+    """
+    Filter out predictions that significantly overlap with ground truth (GT) objects.
+    This is a wrapper around the `_filter_duplicated_predictions_from_ann` method that does the following:
+    - Checks inference settings for the IoU threshold (`existing_objects_iou_thresh`)
+    - Gets ProjectMeta object if not provided
+    - Downloads GT annotations for the specified image IDs
+    - Filters out predictions that have an IoU greater than or equal to the specified threshold with any GT object
+    :param api: Supervisely API object
+    :type api: Api
+    :param pred_anns: List of Annotation objects containing predictions
+    :type pred_anns: List[Annotation]
+    :param dataset_id: ID of the dataset containing the images
+    :type dataset_id: int
+    :param gt_image_ids: List of image IDs to filter predictions. All images should belong to the same dataset
+    :type gt_image_ids: List[int]
+    :param iou: IoU threshold (0.0-1.0). Predictions with IoU >= threshold with any
+                    ground truth box of the same class will be removed. None if no filtering is needed
+    :type iou: Optional[float]
+    :param meta: ProjectMeta object
+    :type meta: Optional[ProjectMeta]
+    :return: List of Annotation objects containing filtered predictions
+    :rtype: List[Annotation]
+    Notes:
+    ------
+    - Requires PyTorch and torchvision for IoU calculations
+    - This method is useful for identifying new objects that aren't already annotated in the ground truth
+    """
+    if isinstance(iou, float) and 0 < iou <= 1:
+        if meta is None:
+            ds = api.dataset.get_info_by_id(dataset_id)
+            meta = ProjectMeta.from_json(api.project.get_meta(ds.project_id))
+        gt_anns = api.annotation.download_json_batch(dataset_id, gt_image_ids)
+        gt_anns = [Annotation.from_json(ann, meta) for ann in gt_anns]
+        for i in range(0, len(pred_anns)):
+            before = len(pred_anns[i].labels)
+            with Timer() as timer:
+                pred_anns[i] = _filter_duplicated_predictions_from_ann(
+                    gt_anns[i], pred_anns[i], iou
+                )
+            after = len(pred_anns[i].labels)
+            logger.debug(
+                f"{[i]}: applied NMS with IoU={iou}. Before: {before}, After: {after}. Time: {timer.get_time():.3f}ms"
+            )
+    return pred_anns
-        def _to_tensor(geom):
-            return torch.tensor([geom.left, geom.top, geom.right, geom.bottom]).float()
-        new_labels = []
-        pred_cls_bboxes = defaultdict(list)
-        for label in pred_ann.labels:
-            pred_cls_bboxes[label.obj_class.name].append(label)
+def _filter_duplicated_predictions_from_ann(
+    gt_ann: Annotation, pred_ann: Annotation, iou_threshold: float
+) -> Annotation:
+    """
+    Filter out predictions that significantly overlap with ground truth annotations.
+    This function compares each prediction with ground truth annotations of the same class
+    and removes predictions that have an IoU (Intersection over Union) greater than or equal
+    to the specified threshold with any ground truth annotation. This is useful for identifying
+    new objects that aren't already annotated in the ground truth.
+    :param gt_ann: Annotation object containing ground truth labels
+    :type gt_ann: Annotation
+    :param pred_ann: Annotation object containing prediction labels to be filtered
+    :type pred_ann: Annotation
+    :param iou_threshold:   IoU threshold (0.0-1.0). Predictions with IoU >= threshold with any
+                            ground truth box of the same class will be removed
+    :type iou_threshold: float
+    :return: A new annotation object containing only predictions that don't significantly
+                overlap with ground truth annotations
+    :rtype: Annotation
+    Notes:
+    ------
+    - Predictions with classes not present in ground truth will be kept
+    - Requires PyTorch and torchvision for IoU calculations
+    """
-        gt_cls_bboxes = defaultdict(list)
-        for label in gt_ann.labels:
-            if label.obj_class.name not in pred_cls_bboxes:
-                continue
-            gt_cls_bboxes[label.obj_class.name].append(label)
+    try:
+        import torch
+        from torchvision.ops import box_iou
-        for name, pred in pred_cls_bboxes.items():
-            gt = gt_cls_bboxes[name]
-            if len(gt) == 0:
-                new_labels.extend(pred)
-                continue
-            pred_bboxes = torch.stack([_to_tensor(l.geometry.to_bbox()) for l in pred]).float()
-            gt_bboxes = torch.stack([_to_tensor(l.geometry.to_bbox()) for l in gt]).float()
-            iou_matrix = box_iou(pred_bboxes, gt_bboxes)
-            iou_matrix = iou_matrix.cpu().numpy()
-            keep_indices = np.where(np.all(iou_matrix < iou_threshold, axis=1))[0]
-            new_labels.extend([pred[i] for i in keep_indices])
+    except ImportError:
+        raise ImportError("Please install PyTorch and torchvision to use this feature.")
-        return pred_ann.clone(labels=new_labels)
+    def _to_tensor(geom):
+        return torch.tensor([geom.left, geom.top, geom.right, geom.bottom]).float()
+    new_labels = []
+    pred_cls_bboxes = defaultdict(list)
+    for label in pred_ann.labels:
+        pred_cls_bboxes[label.obj_class.name].append(label)
+    gt_cls_bboxes = defaultdict(list)
+    for label in gt_ann.labels:
+        if label.obj_class.name not in pred_cls_bboxes:
+            continue
+        gt_cls_bboxes[label.obj_class.name].append(label)
+    for name, pred in pred_cls_bboxes.items():
+        gt = gt_cls_bboxes[name]
+        if len(gt) == 0:
+            new_labels.extend(pred)
+            continue
+        pred_bboxes = torch.stack([_to_tensor(l.geometry.to_bbox()) for l in pred]).float()
+        gt_bboxes = torch.stack([_to_tensor(l.geometry.to_bbox()) for l in gt]).float()
+        iou_matrix = box_iou(pred_bboxes, gt_bboxes)
+        iou_matrix = iou_matrix.cpu().numpy()
+        keep_indices = np.where(np.all(iou_matrix < iou_threshold, axis=1))[0]
+        new_labels.extend([pred[i] for i in keep_indices])
+    return pred_ann.clone(labels=new_labels)
+def _get_log_extra_for_inference_request(
+    inference_request_uuid, inference_request: Union[InferenceRequest, dict]
+):
+    if isinstance(inference_request, dict):
+        log_extra = {
+            "uuid": inference_request_uuid,
+            "progress": inference_request["progress"],
+            "is_inferring": inference_request["is_inferring"],
+            "cancel_inference": inference_request["cancel_inference"],
+            "has_result": inference_request["result"] is not None,
+            "pending_results": len(inference_request["pending_results"]),
+        }
+        return log_extra
-def _get_log_extra_for_inference_request(inference_request_uuid, inference_request: dict):
+    progress = inference_request.progress_json()
+    del progress["message"]
     log_extra = {
-        "uuid": inference_request_uuid,
-        "progress": inference_request["progress"],
-        "is_inferring": inference_request["is_inferring"],
-        "cancel_inference": inference_request["cancel_inference"],
-        "has_result": inference_request["result"] is not None,
-        "pending_results": len(inference_request["pending_results"]),
+        "uuid": inference_request.uuid,
+        "progress": progress,
+        "is_inferring": inference_request.is_inferring(),
+        "stopped": inference_request.is_stopped(),
+        "finished": inference_request.is_finished(),
+        "cancel_inference": inference_request.is_stopped(),
+        "has_result": inference_request.final_result is not None,
+        "pending_results": inference_request.pending_num(),
+        "exception": inference_request.exception_json(),
+        "result": inference_request._final_result,
+        "preparing_progress": progress,
     }
     return log_extra
@@ -4059,3 +4185,33 @@ def get_hardware_info(device: str) -> str:
     except Exception as e:
         logger.error("Error while getting hardware info", exc_info=True)
     return "Unknown"
+def progress_wrapper(func, progress_cb):
+    @wraps(func)
+    def wrapped_func(*args, **kwargs):
+        result = func(*args, **kwargs)
+        progress_cb(len(result))
+        return result
+    return wrapped_func
+def batched_iter(iterable, batch_size):
+    batch = []
+    for item in iterable:
+        batch.append(item)
+        if len(batch) == batch_size:
+            yield batch
+            batch = []
+    if batch:
+        yield batch
+def get_value_for_keys(data: dict, keys: List, ignore_none: bool = False):
+    for key in keys:
+        if key in data:
+            if ignore_none and data[key] is None:
+                continue
+            return data[key]
+    return None

supervisely 6.73.357__py3-none-any.whl → 6.73.358__py3-none-any.whl

supervisely 6.73.357py3-none-any.whl → 6.73.358py3-none-any.whl