PyPI - synapse-sdk - Versions diffs - 1.0.0a31__py3-none-any.whl → 1.0.0a33__py3-none-any.whl - Mend

synapse-sdk 1.0.0a31py3-none-any.whl → 1.0.0a33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synapse-sdk might be problematic. Click here for more details.

Files changed (28) hide show

synapse_sdk/clients/backend/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from synapse_sdk.clients.backend.annotation import AnnotationClientMixin
 from synapse_sdk.clients.backend.core import CoreClientMixin
 from synapse_sdk.clients.backend.dataset import DatasetClientMixin
+from synapse_sdk.clients.backend.hitl import HITLClientMixin
 from synapse_sdk.clients.backend.integration import IntegrationClientMixin
 from synapse_sdk.clients.backend.ml import MLClientMixin
@@ -11,6 +12,7 @@ class BackendClient(
     DatasetClientMixin,
     IntegrationClientMixin,
     MLClientMixin,
+    HITLClientMixin,
 ):
     name = 'Backend'
     token = None

synapse_sdk/clients/backend/annotation.py CHANGED Viewed

@@ -11,14 +11,14 @@ class AnnotationClientMixin(BaseClient):
         path = f'task_tags/{pk}/'
         return self._get(path)
-    def list_task_tags(self, data):
+    def list_task_tags(self, params):
         path = 'task_tags/'
-        return self._list(path, data=data)
+        return self._list(path, params=params)
-    def list_tasks(self, data, url_conversion=None, list_all=False):
+    def list_tasks(self, params=None, url_conversion=None, list_all=False):
         path = 'tasks/'
         url_conversion = get_default_url_conversion(url_conversion, files_fields=['files'])
-        return self._list(path, data=data, url_conversion=url_conversion, list_all=list_all)
+        return self._list(path, params=params, url_conversion=url_conversion, list_all=list_all)
     def create_tasks(self, data):
         path = 'tasks/'

synapse_sdk/clients/backend/dataset.py CHANGED Viewed

@@ -1,4 +1,6 @@
 from multiprocessing import Pool
+from pathlib import Path
+from typing import Dict, Optional
 from tqdm import tqdm
@@ -11,21 +13,59 @@ class DatasetClientMixin(BaseClient):
         path = 'datasets/'
         return self._list(path)
-    def create_data_file(self, file_path):
+    def get_dataset(self, dataset_id):
+        """Get dataset from synapse-backend.
+        Args:
+            dataset_id: The dataset id to get.
+        """
+        path = f'datasets/{dataset_id}/?expand=file_specifications'
+        return self._get(path)
+    def create_data_file(self, file_path: Path):
+        """Create data file to synapse-backend.
+        Args:
+            file_path: The file pathlib object to upload.
+        """
         path = 'data_files/'
         return self._post(path, files={'file': file_path})
     def create_data_units(self, data):
+        """Create data units to synapse-backend.
+        Args:
+            data: The data bindings to upload from create_data_file interface.
+        """
         path = 'data_units/'
         return self._post(path, data=data)
-    def import_dataset(self, dataset_id, dataset, project_id=None, batch_size=1000, process_pool=10):
+    def upload_dataset(
+        self,
+        dataset_id: int,
+        dataset: Dict,
+        project_id: Optional[int] = None,
+        batch_size: int = 1000,
+        process_pool: int = 10,
+    ):
+        """Upload dataset to synapse-backend.
+        Args:
+            dataset_id: The dataset id to upload the data to.
+            dataset: The dataset to upload.
+                * structure:
+                    - files: The files to upload. (key: file name, value: file pathlib object)
+                    - meta: The meta data to upload.
+            project_id: The project id to upload the data to.
+            batch_size: The batch size to upload the data.
+            process_pool: The process pool to upload the data.
+        """
         # TODO validate dataset with schema
         params = [(data, dataset_id) for data in dataset]
         with Pool(processes=process_pool) as pool:
-            dataset = pool.starmap(self.import_data_file, tqdm(params))
+            dataset = pool.starmap(self.upload_data_file, tqdm(params))
         batches = get_batched_list(dataset, batch_size)
@@ -36,13 +76,25 @@ class DatasetClientMixin(BaseClient):
                 tasks_data = []
                 for data, data_unit in zip(batch, data_units):
                     task_data = {'project': project_id, 'data_unit': data_unit['id']}
-                    # TODO: 추후 import 시 Task data 저장 필요 시 해당 로직 추가 필요.
+                    # TODO: Additional logic needed here if task data storage is required during import.
                     tasks_data.append(task_data)
                 self.create_tasks(tasks_data)
-    def import_data_file(self, data, dataset_id):
+    def upload_data_file(self, data: Dict, dataset_id: int) -> Dict:
+        """Upload files to synapse-backend.
+        Args:
+            data: The data to upload.
+                * structure:
+                    - files: The files to upload. (key: file name, value: file pathlib object)
+                    - meta: The meta data to upload.
+            dataset_id: The dataset id to upload the data to.
+        Returns:
+            Dict: The result of the upload.
+        """
         for name, path in data['files'].items():
             data_file = self.create_data_file(path)
             data['dataset'] = dataset_id

synapse_sdk/clients/backend/hitl.py ADDED Viewed

@@ -0,0 +1,17 @@
+from synapse_sdk.clients.base import BaseClient
+from synapse_sdk.clients.utils import get_default_url_conversion
+class HITLClientMixin(BaseClient):
+    def get_assignment(self, pk):
+        path = f'assignments/{pk}/'
+        return self._get(path)
+    def list_assignments(self, params=None, url_conversion=None, list_all=False):
+        path = 'assignments/'
+        url_conversion = get_default_url_conversion(url_conversion, files_fields=['files'])
+        return self._list(path, params=params, url_conversion=url_conversion, list_all=list_all)
+    def set_tags_assignments(self, data, params=None):
+        path = 'assignments/set_tags/'
+        return self._post(path, payload=data, params=params)

synapse_sdk/clients/backend/integration.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from synapse_sdk.clients.backend.models import Storage
 from synapse_sdk.clients.base import BaseClient
 from synapse_sdk.utils.file import convert_file_to_base64
@@ -79,5 +80,6 @@ class IntegrationClientMixin(BaseClient):
         return self._list(path, params=params, list_all=list_all)
     def get_storage(self, pk):
+        """Get specific storage data from synapse backend."""
         path = f'storages/{pk}/'
-        return self._get(path)
+        return self._get(path, pydantic_model=Storage)

synapse_sdk/clients/backend/models.py ADDED Viewed

@@ -0,0 +1,44 @@
+from enum import Enum
+from typing import Dict
+from pydantic import BaseModel
+class StorageCategory(str, Enum):
+    """Synapse Backend Storage Category Enum."""
+    INTERNAL = 'internal'
+    EXTERNAL = 'external'
+class StorageProvider(str, Enum):
+    """Synapse Backend Storage Provider Enum."""
+    AMAZON_S3 = 'amazon_s3'
+    AZURE = 'azure'
+    DIGITAL_OCEAN = 'digital_ocean'
+    FILE_SYSTEM = 'file_system'
+    FTP = 'ftp'
+    SFTP = 'sftp'
+    MINIO = 'minio'
+    GCP = 'gcp'
+class Storage(BaseModel):
+    """Synapse Backend Storage Model.
+    Attrs:
+        id (int): The storage pk.
+        name (str): The storage name.
+        category (str): The storage category. (ex: internal, external)
+        provider (str): The storage provider. (ex: s3, gcp)
+        configuration (Dict): The storage configuration.
+        is_default (bool): The storage is default for Synapse backend workspace.
+    """
+    id: int
+    name: str
+    category: StorageCategory
+    provider: StorageProvider
+    configuration: Dict
+    is_default: bool

synapse_sdk/clients/base.py CHANGED Viewed

@@ -47,15 +47,12 @@ class BaseClient:
             # If files are included in the request, open them as binary files
             if kwargs.get('files') is not None:
                 for name, file in kwargs['files'].items():
-                    # If file is a path string, bind it as a Path object and open
+                    # Handle both string and Path object cases
                     if isinstance(file, str):
-                        opened_file = Path(file).open(mode='rb')
-                        kwargs['files'][name] = opened_file
-                        opened_files.append(opened_file)
-                    # If file is a Path object, open it directly
-                    elif isinstance(file, Path):
+                        file = Path(file)
+                    if isinstance(file, Path):
                         opened_file = file.open(mode='rb')
-                        kwargs['files'][name] = opened_file
+                        kwargs['files'][name] = (file.name, opened_file)
                         opened_files.append(opened_file)
                 if 'data' in kwargs:
                     for name, value in kwargs['data'].items():
@@ -67,6 +64,7 @@ class BaseClient:
                     kwargs['data'] = json.dumps(kwargs['data'])
         try:
+            # Send request
             response = getattr(self.requests_session, method)(url, headers=headers, **kwargs)
             if not response.ok:
                 raise ClientError(
@@ -87,26 +85,59 @@ class BaseClient:
         except ValueError:
             return response.text
-    def _get(self, path, url_conversion=None, **kwargs):
+    def _get(self, path, url_conversion=None, pydantic_model=None, **kwargs):
+        """
+        Perform a GET request and optionally convert response to a pydantic model.
+        Args:
+            path (str): URL path to request.
+            url_conversion (dict, optional): Configuration for URL to path conversion.
+            pydantic_model (Type, optional): Pydantic model to convert the response to.
+            **kwargs: Additional keyword arguments to pass to the request.
+        Returns:
+            The response data, optionally converted to a pydantic model.
+        """
         response = self._request('get', path, **kwargs)
         if url_conversion:
             if url_conversion['is_list']:
                 files_url_to_path_from_objs(response['results'], **url_conversion, is_async=True)
             else:
                 files_url_to_path_from_objs(response, **url_conversion)
+        if pydantic_model:
+            return self._validate_response_with_pydantic_model(response, pydantic_model)
         return response
-    def _post(self, path, **kwargs):
-        return self._request('post', path, **kwargs)
+    def _post(self, path, pydantic_model=None, **kwargs):
+        response = self._request('post', path, **kwargs)
+        if pydantic_model:
+            return self._validate_response_with_pydantic_model(response, pydantic_model)
+        else:
+            return response
-    def _put(self, path, **kwargs):
-        return self._request('put', path, **kwargs)
+    def _put(self, path, pydantic_model=None, **kwargs):
+        response = self._request('put', path, **kwargs)
+        if pydantic_model:
+            return self._validate_response_with_pydantic_model(response, pydantic_model)
+        else:
+            return response
-    def _patch(self, path, **kwargs):
-        return self._request('patch', path, **kwargs)
+    def _patch(self, path, pydantic_model=None, **kwargs):
+        response = self._request('patch', path, **kwargs)
+        if pydantic_model:
+            return self._validate_response_with_pydantic_model(response, pydantic_model)
+        else:
+            return response
-    def _delete(self, path, **kwargs):
-        return self._request('delete', path, **kwargs)
+    def _delete(self, path, pydantic_model=None, **kwargs):
+        response = self._request('delete', path, **kwargs)
+        if pydantic_model:
+            return self._validate_response_with_pydantic_model(response, pydantic_model)
+        else:
+            return response
     def _list(self, path, url_conversion=None, list_all=False, **kwargs):
         response = self._get(path, **kwargs)
@@ -123,3 +154,17 @@ class BaseClient:
     def exists(self, api, *args, **kwargs):
         return getattr(self, api)(*args, **kwargs)['count'] > 0
+    def _validate_response_with_pydantic_model(self, response, pydantic_model):
+        """Validate a response with a pydantic model."""
+        # Check if model is a pydantic model (has the __pydantic_model__ attribute)
+        if (
+            hasattr(pydantic_model, '__pydantic_model__')
+            or hasattr(pydantic_model, 'model_validate')
+            or hasattr(pydantic_model, 'parse_obj')
+        ):
+            pydantic_model.model_validate(response)
+            return response
+        else:
+            # Not a pydantic model
+            raise TypeError('The provided model is not a pydantic model')

synapse_sdk/plugins/categories/base.py CHANGED Viewed

@@ -17,6 +17,30 @@ from synapse_sdk.utils.pydantic.errors import pydantic_to_drf_error
 class Action:
+    """Base class for all plugin actions.
+    Attrs:
+        name (str): The name of the action.
+        category (PluginCategory): The category of the action.
+        method (RunMethod): The method to run of the action.
+        run_class (Run): The class to run the action.
+        params_model (BaseModel): The model to validate the params.
+        progress_categories (List[str]): The categories to update the progress.
+        params (Dict): The params to run the action.
+        plugin_config (Dict): The plugin config.
+        plugin_release (PluginRelease): The plugin release.
+        config (Dict): The action config.
+        requirements (List[str]): The requirements to install.
+        job_id (str): The job id.
+        direct (bool): The flag to run the action directly.
+        debug (bool): The flag to run the action in debug mode.
+        envs (Dict): The runtime envs.
+        run (Run): The run instance.
+    Raises:
+        ActionError: If the action fails.
+    """
     # class 변수
     name = None
     category = None
@@ -159,11 +183,19 @@ class Action:
         return getattr(self, f'start_by_{self.method.value}')()
     def start(self):
+        """Start the action.
+        TODO: Specify the return type of start method for overrided methods.
+        """
         if self.method == RunMethod.JOB:
             return self.entrypoint(self.run, **self.params)
         return self.entrypoint(**self.params)
     def start_by_task(self):
+        """Ray Task based execution.
+        * A task method that simply executes the entrypoint without job management functionality.
+        """
         import ray
         from ray.exceptions import RayTaskError
@@ -195,6 +227,10 @@ class Action:
             raise ActionError(e.cause)
     def start_by_job(self):
+        """Ray Job based execution.
+        * Executes the entrypoint with Ray job. Ray job manages the entrypoint execution and stores the results.
+        """
         main_options = []
         options = ['run', '--direct']
         arguments = [self.name, f'{json.dumps(json.dumps(self.params))}']
@@ -215,6 +251,10 @@ class Action:
         )
     def start_by_restapi(self):
+        """Ray Serve based execution.
+        * This method executes a Fastapi endpoint defined within the Plugin.
+        """
         path = self.params.pop('path', '')
         method = self.params.pop('method')

synapse_sdk/plugins/categories/export/actions/export.py CHANGED Viewed

@@ -1,3 +1,6 @@
+from abc import ABC, abstractmethod
+from typing import Any, Literal
 from pydantic import BaseModel, field_validator
 from pydantic_core import PydanticCustomError
@@ -9,11 +12,158 @@ from synapse_sdk.plugins.enums import PluginCategory, RunMethod
 from synapse_sdk.utils.storage import get_pathlib
+class ExportTargetHandler(ABC):
+    """
+    Abstract base class for handling export targets.
+    This class defines the blueprint for export target handlers, requiring the implementation
+    of methods to validate filters, retrieve results, and process collections of results.
+    """
+    @abstractmethod
+    def validate_filter(self, value: dict, client: Any):
+        """
+        Validate filter query params to request original data from api.
+        Args:
+            value (dict): The filter criteria to validate.
+            client (Any): The client used to validate the filter.
+        Raises:
+            PydanticCustomError: If the filter criteria are invalid.
+        Returns:
+            dict: The validated filter criteria.
+        """
+        pass
+    @abstractmethod
+    def get_results(self, client: Any, filters: dict):
+        """
+        Retrieve original data from target sources.
+        Args:
+            client (Any): The client used to retrieve the results.
+            filters (dict): The filter criteria to apply.
+        Returns:
+            tuple: A tuple containing the results and the total count of results.
+        """
+        pass
+    @abstractmethod
+    def get_export_item(self, results):
+        """
+        Providing elements to build export data.
+        Args:
+            results (list): The results to process.
+        Yields:
+            generator: A generator that yields processed data items.
+        """
+        pass
+class AssignmentExportTargetHandler(ExportTargetHandler):
+    def validate_filter(self, value: dict, client: Any):
+        if 'project' not in value:
+            raise PydanticCustomError('missing_field', _('Project is required for Assignment.'))
+        try:
+            client.list_assignments(params=value)
+        except ClientError:
+            raise PydanticCustomError('client_error', _('Unable to get Assignment.'))
+        return value
+    def get_results(self, client: Any, filters: dict):
+        return client.list_assignments(params=filters, list_all=True)
+    def get_export_item(self, results):
+        for result in results:
+            yield {
+                'data': result['data'],
+                'files': result['file'],
+                'id': result['id'],
+            }
+class GroundTruthExportTargetHandler(ExportTargetHandler):
+    def validate_filter(self, value: dict, client: Any):
+        if 'ground_truth_dataset_version' not in value:
+            raise PydanticCustomError('missing_field', _('Ground Truth dataset version is required.'))
+        try:
+            client.get_ground_truth_version(value['ground_truth_dataset_version'])
+        except ClientError:
+            raise PydanticCustomError('client_error', _('Unable to get Ground Truth dataset version.'))
+        return value
+    def get_results(self, client: Any, filters: dict):
+        filters['ground_truth_dataset_versions'] = filters.pop('ground_truth_dataset_version')
+        return client.list_ground_truth_events(params=filters, list_all=True)
+    def get_export_item(self, results):
+        for result in results:
+            files_key = next(iter(result['data_unit']['files']))
+            yield {
+                'data': result['data'],
+                'files': result['data_unit']['files'][files_key],
+                'id': result['ground_truth'],
+            }
+class TaskExportTargetHandler(ExportTargetHandler):
+    def validate_filter(self, value: dict, client: Any):
+        if 'project' not in value:
+            raise PydanticCustomError('missing_field', _('Project is required for Task.'))
+        try:
+            client.list_tasks(params=value)
+        except ClientError:
+            raise PydanticCustomError('client_error', _('Unable to get Task.'))
+        return value
+    def get_results(self, client: Any, filters: dict):
+        filters['expand'] = 'data_unit'
+        return client.list_tasks(params=filters, list_all=True)
+    def get_export_item(self, results):
+        for result in results:
+            files_key = next(iter(result['data_unit']['files']))
+            yield {
+                'data': result['data'],
+                'files': result['data_unit']['files'][files_key],
+                'id': result['id'],
+            }
+class TargetHandlerFactory:
+    @staticmethod
+    def get_handler(target: str) -> ExportTargetHandler:
+        if target == 'assignment':
+            return AssignmentExportTargetHandler()
+        elif target == 'ground_truth':
+            return GroundTruthExportTargetHandler()
+        elif target == 'task':
+            return TaskExportTargetHandler()
+        else:
+            raise ValueError(f'Unknown target: {target}')
 class ExportParams(BaseModel):
+    """
+    Parameters for the export action.
+    Attributes:
+        storage (int): The storage ID to save the exported data.
+        save_original_file (bool): Whether to save the original file.
+        path (str): The path to save the exported data.
+        target (str): The target source to export data from. (ex. ground_truth, assignment, task)
+        filter (dict): The filter criteria to apply.
+    """
     storage: int
     save_original_file: bool = True
     path: str
-    ground_truth_dataset_version: int
+    target: Literal['assignment', 'ground_truth', 'task']
     filter: dict
     @field_validator('storage')
@@ -27,16 +177,14 @@ class ExportParams(BaseModel):
             raise PydanticCustomError('client_error', _('Unable to get storage from Synapse backend.'))
         return value
-    @field_validator('ground_truth_dataset_version')
+    @field_validator('filter')
     @staticmethod
-    def check_ground_truth_dataset_version_exists(value, info):
+    def check_filter_by_target(value, info):
         action = info.context['action']
         client = action.client
-        try:
-            client.get_ground_truth_version(value)
-        except ClientError:
-            raise PydanticCustomError('client_error', _('Unable to get Ground Truth dataset version.'))
-        return value
+        target = action.params['target']
+        handler = TargetHandlerFactory.get_handler(target)
+        return handler.validate_filter(value, client)
 @register_action
@@ -51,32 +199,24 @@ class ExportAction(Action):
         }
     }
-    def get_dataset(self, results):
-        """Get dataset for export."""
-        for result in results:
-            yield {
-                'data': result['data'],
-                'files': result['data_unit']['files'],
-                'id': result['ground_truth'],
-            }
-    def get_filtered_results(self):
-        """Get filtered ground truth events."""
-        self.params['filter']['ground_truth_dataset_versions'] = self.params['ground_truth_dataset_version']
-        filters = {'expand': 'data', **self.params['filter']}
+    def get_filtered_results(self, filters, handler):
+        """Get filtered target results."""
         try:
-            gt_dataset_events_list = self.client.list_ground_truth_events(params=filters, list_all=True)
-            results = gt_dataset_events_list[0]
-            count = gt_dataset_events_list[1]
+            result_list = handler.get_results(self.client, filters)
+            results = result_list[0]
+            count = result_list[1]
         except ClientError:
             raise PydanticCustomError('client_error', _('Unable to get Ground Truth dataset.'))
         return results, count
     def start(self):
-        self.params['results'], self.params['count'] = self.get_filtered_results()
-        dataset = self.get_dataset(self.params['results'])
+        filters = {'expand': 'data', **self.params['filter']}
+        target = self.params['target']
+        handler = TargetHandlerFactory.get_handler(target)
+        self.params['results'], self.params['count'] = self.get_filtered_results(filters, handler)
+        export_items = handler.get_export_item(self.params['results'])
         storage = self.client.get_storage(self.params['storage'])
         pathlib_cwd = get_pathlib(storage, self.params['path'])
-        return self.entrypoint(self.run, dataset, pathlib_cwd, **self.params)
+        return self.entrypoint(self.run, export_items, pathlib_cwd, **self.params)

synapse-sdk 1.0.0a31__py3-none-any.whl → 1.0.0a33__py3-none-any.whl

Potentially problematic release.

synapse-sdk 1.0.0a31py3-none-any.whl → 1.0.0a33py3-none-any.whl