PyPI - synapse-sdk - Versions diffs - 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl - Mend

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

synapse_sdk/utils/converters/dm/to_v1.py CHANGED Viewed

@@ -1,367 +1,321 @@
-import random
-import string
+"""
+DM Schema V2 → V1 Converter
-from . import BaseDMConverter
+Created: 2025-12-11
+V2→V1 conversion:
+- If both annotation_data and annotation_meta exist, complete V1 restoration
+- If only annotation_data exists, convert to V1 using defaults
+"""
-class DMV2ToV1Converter(BaseDMConverter):
-    """DM v2 to v1 format converter class."""
+from typing import Any
-    def __init__(self, new_dm_data={}, file_type=None):
-        """Initialize the converter.
+from .base import BaseDMConverter
+from .types import (
+    MEDIA_TYPE_REVERSE_MAP,
+    AnnotationMeta,
+    V2ConversionResult,
+)
-        Args:
-            new_dm_data (dict): DM v2 format data to be converted
-            file_type (str, optional): Type of file being converted (image, video, pcd, text, audio)
-        """
-        # Auto-detect file type if not provided
-        if file_type is None:
-            file_type = self._detect_file_type(new_dm_data)
-        super().__init__(file_type=file_type)
-        self.new_dm_data = new_dm_data
-        self.annotations = {}
-        self.annotations_data = {}
-        self.extra = {}
-        self.relations = {}
-        self.annotation_groups = {}
+class DMV2ToV1Converter(BaseDMConverter):
+    """Converter from DM Schema V2 to V1
-    def _detect_file_type(self, data):
-        """Auto-detect file type from the data structure.
+    V2→V1 conversion:
+    - If both annotation_data and annotation_meta exist, complete V1 restoration
+    - If only annotation_data exists, convert to V1 using defaults
-        Args:
-            data (dict): DM v2 format data
+    Example:
+        >>> converter = DMV2ToV1Converter()
+        >>> # Complete conversion
+        >>> v1_data = converter.convert(v2_result)
+        >>> # Convert with annotation_data only
+        >>> v1_data = converter.convert({"annotation_data": annotation_data})
+    """
-        Returns:
-            str: Detected file type (image, video, pcd, text, audio)
-        """
-        if not data:
-            return None
-        # Check for media type keys (plural forms)
-        if 'images' in data:
-            return 'image'
-        elif 'videos' in data:
-            return 'video'
-        elif 'pcds' in data:
-            return 'pcd'
-        elif 'texts' in data:
-            return 'text'
-        elif 'audios' in data:
-            return 'audio'
-        return None
-    def convert(self):
-        """Convert DM v2 data to v1 format.
+    def _setup_tool_processors(self) -> None:
+        """Register tool processors"""
+        from .tools.bounding_box import BoundingBoxProcessor
-        Returns:
-            dict: Converted data in DM v1 format
-        """
-        # Reset state
-        new_dm_data = self.new_dm_data
-        self.annotations = {}
-        self.annotations_data = {}
-        self.extra = {}
-        self.relations = {}
-        self.annotation_groups = {}
-        # Process each media type (images, videos, etc.)
-        for media_type_plural, media_items in new_dm_data.items():
-            if media_type_plural == 'classification':
-                continue
+        self.register_processor(BoundingBoxProcessor())
-            media_type = self._singularize_media_type(media_type_plural)
-            for index, media_item in enumerate(media_items, 1):
-                media_id = f'{media_type}_{index}'
-                # Initialize structures for this media
-                self.annotations[media_id] = []
-                self.annotations_data[media_id] = []
-                self.extra[media_id] = {}
-                self.relations[media_id] = []
-                self.annotation_groups[media_id] = []
-                # Process each tool type in the media item
-                for tool_type, tool_data in media_item.items():
-                    self._process_tool_data(media_id, tool_type, tool_data)
-        # Build final result
-        result = {
-            'extra': self.extra,
-            'relations': self.relations,
-            'annotations': self.annotations,
-            'annotationsData': self.annotations_data,
-            'annotationGroups': self.annotation_groups,
-        }
+        # polygon to be added later
+        try:
+            from .tools.polygon import PolygonProcessor
-        return result
+            self.register_processor(PolygonProcessor())
+        except ImportError:
+            pass
-    def _process_tool_data(self, media_id, tool_type, tool_data):
-        """Process tool data for a specific media item.
+        try:
+            from .tools.polyline import PolylineProcessor
-        Args:
-            media_id (str): ID of the media item
-            tool_type (str): Type of annotation tool
-            tool_data (list): List of annotation data for this tool
-        """
-        for annotation in tool_data:
-            annotation_id = annotation['id']
-            classification = annotation['classification']
-            attrs = annotation.get('attrs', [])
-            data = annotation.get('data', {})
-            # Create annotation entry
-            annotation_entry = {
-                'id': annotation_id,
-                'tool': tool_type,
-                'isLocked': False,
-                'isVisible': True,
-                'classification': {'class': classification},
-            }
-            # Add additional classification attributes from attrs
-            for attr in attrs:
-                attr_name = attr.get('name')
-                attr_value = attr.get('value')
-                if attr_name and attr_value is not None:
-                    annotation_entry['classification'][attr_name] = attr_value
-            # Add special attributes for specific tools
-            if tool_type == 'keypoint':
-                annotation_entry['shape'] = 'circle'
-            self.annotations[media_id].append(annotation_entry)
-            # Create annotations data entry using tool processor
-            processor = self.tool_processors.get(tool_type)
-            if processor:
-                processor(annotation_id, data, self.annotations_data[media_id])
-            else:
-                self._handle_unknown_tool(tool_type, annotation_id)
-    def _convert_bounding_box(self, annotation_id, data, annotations_data):
-        """Process bounding box annotation data.
+            self.register_processor(PolylineProcessor())
+        except ImportError:
+            pass
-        Args:
-            annotation_id (str): ID of the annotation
-            data (list): Bounding box data [x1, y1, x2, y2]
-            annotations_data (list): List to append the processed data
-        """
-        if len(data) >= 4:
-            x1, y1, width, height = data[:4]
-            coordinate = {'x': x1, 'y': y1, 'width': width, 'height': height}
+        try:
+            from .tools.keypoint import KeypointProcessor
-            annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
+            self.register_processor(KeypointProcessor())
+        except ImportError:
+            pass
-    def _convert_named_entity(self, annotation_id, data, annotations_data):
-        """Process named entity annotation data.
+        try:
+            from .tools.bounding_box_3d import BoundingBox3DProcessor
-        Args:
-            annotation_id (str): ID of the annotation
-            data (dict): Named entity data with ranges and content
-            annotations_data (list): List to append the processed data
-        """
-        entity_data = {'id': annotation_id}
+            self.register_processor(BoundingBox3DProcessor())
+        except ImportError:
+            pass
-        if 'ranges' in data:
-            entity_data['ranges'] = data['ranges']
+        try:
+            from .tools.segmentation import SegmentationProcessor
-        if 'content' in data:
-            entity_data['content'] = data['content']
+            self.register_processor(SegmentationProcessor())
+        except ImportError:
+            pass
-        annotations_data.append(entity_data)
+        try:
+            from .tools.named_entity import NamedEntityProcessor
-    def _convert_classification(self, annotation_id, data, annotations_data):
-        """Process classification annotation data.
+            self.register_processor(NamedEntityProcessor())
+        except ImportError:
+            pass
-        Args:
-            annotation_id (str): ID of the annotation
-            data (dict): Classification data (usually empty)
-            annotations_data (list): List to append the processed data
-        """
-        # Classification data is typically empty in v2, so we just add the ID
-        annotations_data.append({'id': annotation_id})
+        try:
+            from .tools.segmentation_3d import Segmentation3DProcessor
-    def _convert_polyline(self, annotation_id, data, annotations_data):
-        """Process polyline annotation data.
+            self.register_processor(Segmentation3DProcessor())
+        except ImportError:
+            pass
-        Args:
-            annotation_id (str): ID of the annotation
-            data (list): Polyline data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
-            annotations_data (list): List to append the processed data
-        """
-        coordinates = []
+        try:
+            from .tools.classification import ClassificationProcessor
-        if data and isinstance(data[0], list):
-            # Nested format: [[x1, y1], [x2, y2], ...]
-            for point in data:
-                if len(point) >= 2:
-                    coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
-        else:
-            # Flat format: [x1, y1, x2, y2, ...]
-            for i in range(0, len(data), 2):
-                if i + 1 < len(data):
-                    coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
+            self.register_processor(ClassificationProcessor())
+        except ImportError:
+            pass
-        annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
+        try:
+            from .tools.relation import RelationProcessor
-    def _convert_keypoint(self, annotation_id, data, annotations_data):
-        """Process keypoint annotation data.
+            self.register_processor(RelationProcessor())
+        except ImportError:
+            pass
-        Args:
-            annotation_id (str): ID of the annotation
-            data (list): Keypoint data [x, y]
-            annotations_data (list): List to append the processed data
-        """
-        if len(data) >= 2:
-            coordinate = {'x': data[0], 'y': data[1]}
+        try:
+            from .tools.prompt import PromptProcessor
-            annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
+            self.register_processor(PromptProcessor())
+        except ImportError:
+            pass
-    def _convert_3d_bounding_box(self, annotation_id, data, annotations_data):
-        """Process 3D bounding box annotation data.
+        try:
+            from .tools.answer import AnswerProcessor
-        Args:
-            annotation_id (str): ID of the annotation
-            data (dict): 3D bounding box PSR data
-            annotations_data (list): List to append the processed data
-        """
-        annotations_data.append({'id': annotation_id, 'psr': data})
+            self.register_processor(AnswerProcessor())
+        except ImportError:
+            pass
-    def _convert_image_segmentation(self, annotation_id, data, annotations_data):
-        """Process segmentation annotation data.
+    def convert(
+        self,
+        v2_data: V2ConversionResult | dict[str, Any],
+        annotation_meta: AnnotationMeta | None = None,
+    ) -> dict[str, Any]:
+        """Convert V2 data to V1 format
         Args:
-            annotation_id (str): ID of the annotation
-            data (list or dict): Segmentation data (pixel_indices or section)
-            annotations_data (list): List to append the processed data
-        """
-        annotation_data = {'id': annotation_id}
-        if isinstance(data, list):
-            # Pixel-based segmentation
-            annotation_data['pixel_indices'] = data
-        elif isinstance(data, dict):
-            # Section-based segmentation (video)
-            annotation_data['section'] = data
+            v2_data: DM Schema V2 format data
+            annotation_meta: Optional V1 top-level structure passed separately
-        annotations_data.append(annotation_data)
-    def _convert_video_segmentation(self, annotation_id, data, annotations_data):
-        """Process video segmentation annotation data.
+        Returns:
+            DM Schema V1 format data
-        Args:
-            annotation_id (str): ID of the annotation
-            data (list or dict): Segmentation data (pixel_indices or section)
-            annotations_data (list): List to append the processed data
+        Raises:
+            ValueError: Missing required fields or invalid format
         """
-        annotation_data = {'id': annotation_id}
+        # Extract annotation_data
+        if 'annotation_data' in v2_data:
+            annotation_data = v2_data['annotation_data']
+            # Extract annotation_meta (use from v2_data if present, else use parameter)
+            meta = v2_data.get('annotation_meta') or annotation_meta
+        else:
+            # annotation_data passed directly
+            annotation_data = v2_data
+            meta = annotation_meta
-        if isinstance(data, list):
-            # Pixel-based segmentation
-            annotation_data['pixel_indices'] = data
-        elif isinstance(data, dict):
-            # Section-based segmentation (video)
-            annotation_data['section'] = data
+        # Input validation
+        if not annotation_data:
+            raise ValueError("V2 data requires 'annotation_data'")
-        annotations_data.append(annotation_data)
+        # Build V1 data
+        return self._merge_data_and_meta(annotation_data, meta)
-    def _convert_3d_segmentation(self, annotation_id, data, annotations_data):
-        """Process 3D segmentation annotation data.
+    def _merge_data_and_meta(
+        self,
+        annotation_data: dict[str, Any],
+        annotation_meta: AnnotationMeta | None,
+    ) -> dict[str, Any]:
+        """Merge annotation_data and annotation_meta to create V1 format
         Args:
-            annotation_id (str): ID of the annotation
-            data (list or dict): 3D segmentation data
-            annotations_data (list): List to append the processed data
-        """
-        annotation_data = {'id': annotation_id}
+            annotation_data: V2 common annotation structure
+            annotation_meta: V1 top-level structure (restores meta info if present)
-        if isinstance(data, list):
-            # Pixel-based segmentation
-            annotation_data['pixel_indices'] = data
-        elif isinstance(data, dict):
-            # Section-based segmentation
-            annotation_data['section'] = data
-        annotations_data.append(annotation_data)
-    def _convert_prompt(self, annotation_id, data, annotations_data):
-        """Process prompt annotation data.
-        Args:
-            annotation_id (str): ID of the annotation
-            data (dict): Prompt data
-            annotations_data (list): List to append the processed data
+        Returns:
+            Merged V1 format data
         """
-        annotation_data = {'id': annotation_id}
+        annotations: dict[str, list[dict[str, Any]]] = {}
+        annotations_data: dict[str, list[dict[str, Any]]] = {}
-        if isinstance(data, dict):
-            annotation_data.update(data)
+        # Process by media type
+        media_index_by_type: dict[str, int] = {}
-        annotations_data.append(annotation_data)
+        for plural_type in ['images', 'videos', 'pcds', 'texts', 'audios', 'prompts']:
+            if plural_type not in annotation_data:
+                continue
-    def _convert_answer(self, annotation_id, data, annotations_data):
-        """Process answer annotation data.
+            singular_type = MEDIA_TYPE_REVERSE_MAP.get(plural_type, plural_type.rstrip('s'))
+            media_index_by_type[singular_type] = 0
-        Args:
-            annotation_id (str): ID of the annotation
-            data (dict): Answer data
-            annotations_data (list): List to append the processed data
-        """
-        annotation_data = {'id': annotation_id}
-        if isinstance(data, dict):
-            annotation_data.update(data)
+            for media_item in annotation_data[plural_type]:
+                # Generate media ID
+                media_index_by_type[singular_type] += 1
+                media_id = f'{singular_type}_{media_index_by_type[singular_type]}'
-        annotations_data.append(annotation_data)
+                # Convert by tool
+                ann_list, data_list = self._convert_media_item(media_item, media_id, annotation_meta)
-    def _convert_polygon(self, annotation_id, data, annotations_data):
-        """Process polygon annotation data.
+                if ann_list:
+                    annotations[media_id] = ann_list
+                if data_list:
+                    annotations_data[media_id] = data_list
-        Args:
-            annotation_id (str): ID of the annotation
-            data (list): Polygon data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
-            annotations_data (list): List to append the processed data
-        """
-        coordinates = []
+        # Build V1 result
+        result: dict[str, Any] = {
+            'annotations': annotations,
+            'annotationsData': annotations_data,
+        }
-        if data and isinstance(data[0], list):
-            # Nested format: [[x1, y1], [x2, y2], ...]
-            for point in data:
-                if len(point) >= 2:
-                    coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
+        # Restore additional fields if annotation_meta exists
+        if annotation_meta:
+            result['extra'] = annotation_meta.get('extra', {})
+            result['relations'] = annotation_meta.get('relations', {})
+            result['annotationGroups'] = annotation_meta.get('annotationGroups', {})
+            result['assignmentId'] = annotation_meta.get('assignmentId')
         else:
-            # Flat format: [x1, y1, x2, y2, ...]
-            for i in range(0, len(data), 2):
-                if i + 1 < len(data):
-                    coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
+            # Default values
+            result['extra'] = {}
+            result['relations'] = {}
+            result['annotationGroups'] = {}
+            result['assignmentId'] = None
-        annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
+        return result
-    def _convert_relation(self, annotation_id, data, annotations_data):
-        """Process relation annotation data.
+    def _convert_media_item(
+        self,
+        media_item: dict[str, Any],
+        media_id: str,
+        annotation_meta: AnnotationMeta | None,
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+        """Convert V2 media item to V1 annotations/annotationsData
         Args:
-            annotation_id (str): ID of the annotation
-            data (list): Relation data
-            annotations_data (list): List to append the processed data
+            media_item: V2 media item
+            media_id: Media ID to generate
+            annotation_meta: V1 top-level structure (for meta info restoration)
+        Returns:
+            (V1 annotations list, V1 annotationsData list)
         """
-        annotations_data.append({'id': annotation_id, 'data': data})
+        annotations: list[dict[str, Any]] = []
+        annotations_data: list[dict[str, Any]] = []
+        # Process by tool
+        for tool_name, v2_annotations in media_item.items():
+            processor = self.get_processor(tool_name)
+            if not processor:
+                continue
-    def _convert_group(self, annotation_id, data, annotations_data):
-        """Process group annotation data.
+            for v2_ann in v2_annotations:
+                # Convert to V1
+                v1_ann, v1_data = processor.to_v1(v2_ann)
+                # Restore meta info from annotation_meta
+                if annotation_meta:
+                    v1_ann = self._restore_meta_fields(v1_ann, annotation_meta, v2_ann.get('id', ''), media_id)
+                else:
+                    # Set default values
+                    v1_ann.setdefault('isLocked', False)
+                    v1_ann.setdefault('isVisible', True)
+                    v1_ann.setdefault('isValid', False)
+                    v1_ann.setdefault('isDrawCompleted', True)
+                    v1_ann.setdefault('label', [])
+                annotations.append(v1_ann)
+                annotations_data.append(v1_data)
+        return annotations, annotations_data
+    def _restore_meta_fields(
+        self,
+        v1_annotation: dict[str, Any],
+        annotation_meta: AnnotationMeta,
+        annotation_id: str,
+        media_id: str,
+    ) -> dict[str, Any]:
+        """Restore V1 annotation meta fields from annotation_meta
         Args:
-            annotation_id (str): ID of the annotation
-            data (list): Group data
-            annotations_data (list): List to append the processed data
-        """
-        annotations_data.append({'id': annotation_id, 'data': data})
+            v1_annotation: Base converted V1 annotation
+            annotation_meta: V1 top-level structure
+            annotation_id: Annotation ID
+            media_id: Media ID
-    def _generate_random_id(self):
-        """Generate a random ID similar to the original format."""
-        # Generate 10-character random string with letters, numbers, and symbols
-        chars = string.ascii_letters + string.digits + '-_'
-        return ''.join(random.choices(chars, k=10))
+        Returns:
+            V1 annotation with restored meta fields
+        """
+        # Find annotation in annotation_meta
+        meta_annotations = annotation_meta.get('annotations', {})
+        # Try to find by media_id
+        source_media_id = None
+        for mid in meta_annotations:
+            for ann in meta_annotations[mid]:
+                if ann.get('id') == annotation_id:
+                    source_media_id = mid
+                    break
+            if source_media_id:
+                break
+        if not source_media_id:
+            # Use defaults if not found
+            v1_annotation.setdefault('isLocked', False)
+            v1_annotation.setdefault('isVisible', True)
+            v1_annotation.setdefault('isValid', False)
+            v1_annotation.setdefault('isDrawCompleted', True)
+            v1_annotation.setdefault('label', [])
+            return v1_annotation
+        # Restore meta info from the found annotation
+        for meta_ann in meta_annotations[source_media_id]:
+            if meta_ann.get('id') == annotation_id:
+                # Restore meta fields
+                v1_annotation['isLocked'] = meta_ann.get('isLocked', False)
+                v1_annotation['isVisible'] = meta_ann.get('isVisible', True)
+                v1_annotation['isValid'] = meta_ann.get('isValid', False)
+                v1_annotation['isDrawCompleted'] = meta_ann.get('isDrawCompleted', True)
+                v1_annotation['label'] = meta_ann.get('label', [])
+                # Merge classification if present in meta
+                meta_classification = meta_ann.get('classification')
+                if meta_classification:
+                    # Keep existing class from classification and merge other fields
+                    current_class = v1_annotation.get('classification', {}).get('class')
+                    v1_annotation['classification'] = meta_classification.copy()
+                    if current_class:
+                        v1_annotation['classification']['class'] = current_class
+                break
+        return v1_annotation

synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl