PyPI - synapse-sdk - Versions diffs - 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl - Mend

synapse-sdk 1.0.0a23py3-none-any.whl → 2025.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

synapse_sdk/utils/converters/coco/to_dm.py ADDED Viewed

@@ -0,0 +1,215 @@
+import json
+import os
+from typing import IO, Any, Dict
+from synapse_sdk.utils.converters import ToDMConverter
+class COCOToDMConverter(ToDMConverter):
+    """Convert COCO format annotations to DM (Data Manager) format."""
+    def __init__(self, root_dir: str = None, is_categorized_dataset: bool = False, is_single_conversion: bool = False):
+        super().__init__(root_dir, is_categorized_dataset, is_single_conversion)
+    def convert(self):
+        if self.is_categorized_dataset:
+            splits = self._validate_splits(['train', 'valid'], ['test'])
+            all_split_data = {}
+            for split, split_dir in splits.items():
+                annotation_path = os.path.join(split_dir, 'annotations.json')
+                if not os.path.exists(annotation_path):
+                    raise FileNotFoundError(f'annotations.json not found in {split_dir}')
+                with open(annotation_path, 'r', encoding='utf-8') as f:
+                    coco_data = json.load(f)
+                split_data = self._convert_coco_ann_to_dm(coco_data, split_dir)
+                all_split_data[split] = split_data
+            self.converted_data = all_split_data
+            return all_split_data
+        else:
+            annotation_path = os.path.join(self.root_dir, 'annotations.json')
+            if not os.path.exists(annotation_path):
+                raise FileNotFoundError(f'annotations.json not found in {self.root_dir}')
+            with open(annotation_path, 'r', encoding='utf-8') as f:
+                coco_data = json.load(f)
+            converted_data = self._convert_coco_ann_to_dm(coco_data, self.root_dir)
+            self.converted_data = converted_data
+            return converted_data
+    def _convert_coco_ann_to_dm(self, coco_data, base_dir):
+        """Convert COCO annotations to DM format."""
+        dataset_type = coco_data.get('type', 'image')  # Default to 'image' if type is not specified
+        if dataset_type == 'image':
+            return self._process_image_data(coco_data, base_dir)
+        else:
+            raise ValueError(f'Unsupported dataset type: {dataset_type}')
+    def _process_image_data(self, coco_data, img_base_dir):
+        """Process COCO image data and convert to DM format."""
+        images = coco_data.get('images', [])
+        annotations = coco_data.get('annotations', [])
+        categories = coco_data.get('categories', [])
+        cat_map = {cat['id']: cat for cat in categories}
+        # Build image_id -> annotation list
+        ann_by_img_id = {}
+        for ann in annotations:
+            img_id = ann['image_id']
+            ann_by_img_id.setdefault(img_id, []).append(ann)
+        result = {}
+        for img in images:
+            img_id = img['id']
+            img_filename = img['file_name']
+            img_path = os.path.join(img_base_dir, img_filename)
+            anns = ann_by_img_id.get(img_id, [])
+            # DM image structure
+            dm_img = {
+                'bounding_box': [],
+                'keypoint': [],
+                'relation': [],
+                'group': [],
+            }
+            # Handle bounding_box
+            bbox_ids = []
+            for ann in anns:
+                cat = cat_map.get(ann['category_id'], {})
+                if 'bbox' in ann and ann['bbox']:
+                    bbox_id = self._generate_unique_id()
+                    bbox_ids.append(bbox_id)
+                    dm_img['bounding_box'].append({
+                        'id': bbox_id,
+                        'classification': cat.get('name', str(ann['category_id'])),
+                        'attrs': ann.get('attrs', []),
+                        'data': list(ann['bbox']),
+                    })
+            # Handle keypoints
+            for ann in anns:
+                cat = cat_map.get(ann['category_id'], {})
+                attrs = ann.get('attrs', [])
+                if 'keypoints' in ann and ann['keypoints']:
+                    kp_names = cat.get('keypoints', [])
+                    kps = ann['keypoints']
+                    keypoint_ids = []
+                    for idx in range(min(len(kps) // 3, len(kp_names))):
+                        x, y, v = kps[idx * 3 : idx * 3 + 3]
+                        kp_id = self._generate_unique_id()
+                        keypoint_ids.append(kp_id)
+                        dm_img['keypoint'].append({
+                            'id': kp_id,
+                            'classification': kp_names[idx] if idx < len(kp_names) else f'keypoint_{idx}',
+                            'attrs': attrs,
+                            'data': [x, y],
+                        })
+                    group_ids = bbox_ids + keypoint_ids
+                    if group_ids:
+                        dm_img['group'].append({
+                            'id': self._generate_unique_id(),
+                            'classification': cat.get('name', str(ann['category_id'])),
+                            'attrs': attrs,
+                            'data': group_ids,
+                        })
+            dm_json = {'images': [dm_img]}
+            result[img_filename] = (dm_json, img_path)
+        return result
+    def convert_single_file(self, data: Dict[str, Any], original_file: IO, original_image_name: str) -> Dict[str, Any]:
+        """Convert a single COCO annotation data and corresponding image to DM format.
+        Args:
+            data: COCO format data dictionary (JSON content)
+            original_file: File object for the corresponding original image
+            original_image_name: Original image name
+        Returns:
+            Dictionary containing DM format data for the single file
+        """
+        if not self.is_single_conversion:
+            raise RuntimeError('convert_single_file is only available when is_single_conversion=True')
+        images = data.get('images', [])
+        annotations = data.get('annotations', [])
+        categories = data.get('categories', [])
+        if not images:
+            raise ValueError('No images found in COCO data')
+        # Get file name from original_file
+        img_path = getattr(original_file, 'name', None)
+        if not img_path:
+            raise ValueError('original_file must have a "name" attribute representing its path or filename.')
+        img_basename = os.path.basename(img_path)
+        # Find the matching image info in COCO 'images' section by comparing file name
+        # COCO image dicts might use 'file_name', 'filename', or similar
+        matched_img = None
+        for img in images:
+            for key in ['file_name', 'filename', 'name']:
+                if key in img and os.path.basename(img[key]) == original_image_name:
+                    matched_img = img
+                    break
+            if matched_img:
+                break
+        if not matched_img:
+            raise ValueError(f'No matching image found in COCO data for file: {img_basename}')
+        img_id = matched_img['id']
+        cat_map = {cat['id']: cat for cat in categories}
+        anns = [ann for ann in annotations if ann['image_id'] == img_id]
+        dm_img = {
+            'bounding_box': [],
+            'keypoint': [],
+            'relation': [],
+            'group': [],
+        }
+        bbox_ids = []
+        for ann in anns:
+            cat = cat_map.get(ann['category_id'], {})
+            if 'bbox' in ann and ann['bbox']:
+                bbox_id = self._generate_unique_id()
+                bbox_ids.append(bbox_id)
+                dm_img['bounding_box'].append({
+                    'id': bbox_id,
+                    'classification': cat.get('name', str(ann['category_id'])),
+                    'attrs': ann.get('attrs', []),
+                    'data': list(ann['bbox']),
+                })
+        for ann in anns:
+            cat = cat_map.get(ann['category_id'], {})
+            attrs = ann.get('attrs', [])
+            if 'keypoints' in ann and ann['keypoints']:
+                kp_names = cat.get('keypoints', [])
+                kps = ann['keypoints']
+                keypoint_ids = []
+                for idx in range(min(len(kps) // 3, len(kp_names))):
+                    x, y, _ = kps[idx * 3 : idx * 3 + 3]
+                    kp_id = self._generate_unique_id()
+                    keypoint_ids.append(kp_id)
+                    dm_img['keypoint'].append({
+                        'id': kp_id,
+                        'classification': kp_names[idx] if idx < len(kp_names) else f'keypoint_{idx}',
+                        'attrs': attrs,
+                        'data': [x, y],
+                    })
+                group_ids = bbox_ids + keypoint_ids
+                if group_ids:
+                    dm_img['group'].append({
+                        'id': self._generate_unique_id(),
+                        'classification': cat.get('name', str(ann['category_id'])),
+                        'attrs': attrs,
+                        'data': group_ids,
+                    })
+        dm_json = {'images': [dm_img]}
+        return {
+            'dm_json': dm_json,
+            'image_path': img_path,
+            'image_name': img_basename,
+        }

synapse_sdk/utils/converters/dm/__init__.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+DM Schema V1/V2 Bidirectional Converter
+"""
+from typing import Any
+from .types import (
+    AnnotationMeta,
+    V2AnnotationData,
+    V2ConversionResult,
+)
+def convert_v1_to_v2(v1_data: dict[str, Any]) -> V2ConversionResult:
+    """Convert DM Schema V1 data to V2 (separated result)
+    Args:
+        v1_data: DM Schema V1 format data
+    Returns:
+        V2ConversionResult: Separated conversion result
+            - annotation_data: V2 common annotation structure
+            - annotation_meta: Preserved V1 top-level structure
+    """
+    from .from_v1 import DMV1ToV2Converter
+    converter = DMV1ToV2Converter()
+    return converter.convert(v1_data)
+def convert_v2_to_v1(
+    v2_data: V2ConversionResult | dict[str, Any],
+    annotation_meta: AnnotationMeta | None = None,
+) -> dict[str, Any]:
+    """Convert DM Schema V2 data to V1
+    Args:
+        v2_data: DM Schema V2 format data
+        annotation_meta: Optional V1 top-level structure passed separately
+    Returns:
+        DM Schema V1 format data
+    """
+    from .to_v1 import DMV2ToV1Converter
+    converter = DMV2ToV1Converter()
+    return converter.convert(v2_data, annotation_meta)
+__all__ = [
+    'convert_v1_to_v2',
+    'convert_v2_to_v1',
+    'V2ConversionResult',
+    'V2AnnotationData',
+    'AnnotationMeta',
+]

synapse_sdk/utils/converters/dm/base.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""
+DM Schema V1/V2 Converter Base Class
+Created: 2025-12-11
+"""
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any
+from .types import MEDIA_TYPE_MAP, SUPPORTED_FILE_TYPES
+from .utils import detect_file_type, extract_media_type_info
+if TYPE_CHECKING:
+    from .tools import ToolProcessor
+class BaseDMConverter(ABC):
+    """DM Schema Converter Base Class
+    Abstract base class for all DM converters.
+    Attributes:
+        file_type: File type to process (None for auto-detection)
+        SUPPORTED_FILE_TYPES: Tuple of supported file types
+        MEDIA_TYPE_MAP: Media type mapping dictionary
+    Example:
+        >>> class MyConverter(BaseDMConverter):
+        ...     def convert(self, data):
+        ...         # implementation
+        ...         pass
+    """
+    SUPPORTED_FILE_TYPES = SUPPORTED_FILE_TYPES
+    MEDIA_TYPE_MAP = MEDIA_TYPE_MAP
+    def __init__(self, file_type: str | None = None) -> None:
+        """
+        Args:
+            file_type: File type to process (None for auto-detection)
+        Raises:
+            ValueError: Unsupported file type
+        """
+        if file_type is not None and file_type not in self.SUPPORTED_FILE_TYPES:
+            raise ValueError(
+                f'Unsupported file type: {file_type}. Supported types: {", ".join(self.SUPPORTED_FILE_TYPES)}'
+            )
+        self.file_type = file_type
+        self._tool_processors: dict[str, 'ToolProcessor'] = {}
+        self._setup_tool_processors()
+    @abstractmethod
+    def _setup_tool_processors(self) -> None:
+        """Register tool processors
+        Subclasses implement this to register supported tool processors.
+        Example:
+            >>> def _setup_tool_processors(self):
+            ...     from .tools import BoundingBoxProcessor, PolygonProcessor
+            ...     self.register_processor(BoundingBoxProcessor())
+            ...     self.register_processor(PolygonProcessor())
+        """
+        ...
+    def register_processor(self, processor: 'ToolProcessor') -> None:
+        """Register a tool processor
+        Use this method to register processors when adding new tool support.
+        Allows extension without modifying existing code (AR-001).
+        Args:
+            processor: ToolProcessor implementation
+        Example:
+            >>> class KeypointProcessor:
+            ...     tool_name = "keypoint"
+            ...     def to_v2(self, v1_annotation, v1_data): ...
+            ...     def to_v1(self, v2_annotation): ...
+            >>> converter.register_processor(KeypointProcessor())
+        """
+        self._tool_processors[processor.tool_name] = processor
+    def get_processor(self, tool_name: str) -> 'ToolProcessor | None':
+        """Get a registered tool processor
+        Args:
+            tool_name: Tool name (e.g., 'bounding_box', 'polygon')
+        Returns:
+            Registered processor or None
+        """
+        return self._tool_processors.get(tool_name)
+    @abstractmethod
+    def convert(self, data: dict[str, Any]) -> dict[str, Any]:
+        """Perform data conversion
+        Args:
+            data: Input data (V1 or V2)
+        Returns:
+            Converted data (V2 or V1)
+        Raises:
+            ValueError: Data cannot be converted
+        """
+        ...
+    def _detect_file_type(self, data: dict[str, Any], is_v2: bool = False) -> str:
+        """Auto-detect file type from data
+        Args:
+            data: Input data
+            is_v2: Whether the format is V2
+        Returns:
+            Detected file type ('image', 'video', etc.)
+        Raises:
+            ValueError: Unable to detect file type
+        """
+        if self.file_type:
+            return self.file_type
+        return detect_file_type(data, is_v2)
+    def _extract_media_type_info(self, media_id: str) -> tuple[str, str]:
+        """Extract type information from media ID
+        Args:
+            media_id: Media ID (e.g., 'image_1', 'video_2')
+        Returns:
+            (singular, plural) tuple (e.g., ('image', 'images'))
+        """
+        return extract_media_type_info(media_id)

synapse_sdk/utils/converters/dm/from_v1.py ADDED Viewed

@@ -0,0 +1,273 @@
+"""
+DM Schema V1 → V2 Converter
+Created: 2025-12-11
+V1→V2 conversion separates the result into annotation_data and annotation_meta.
+"""
+from typing import Any
+from .base import BaseDMConverter
+from .types import (
+    AnnotationMeta,
+    V2AnnotationData,
+    V2ConversionResult,
+)
+class DMV1ToV2Converter(BaseDMConverter):
+    """Converter from DM Schema V1 to V2
+    V1→V2 conversion separates the result into annotation_data and annotation_meta.
+    Example:
+        >>> converter = DMV1ToV2Converter()
+        >>> result = converter.convert(v1_data)
+        >>> annotation_data = result["annotation_data"]
+        >>> annotation_meta = result["annotation_meta"]
+    """
+    def _setup_tool_processors(self) -> None:
+        """Register tool processors"""
+        from .tools.bounding_box import BoundingBoxProcessor
+        self.register_processor(BoundingBoxProcessor())
+        # polygon to be added later
+        try:
+            from .tools.polygon import PolygonProcessor
+            self.register_processor(PolygonProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.polyline import PolylineProcessor
+            self.register_processor(PolylineProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.keypoint import KeypointProcessor
+            self.register_processor(KeypointProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.bounding_box_3d import BoundingBox3DProcessor
+            self.register_processor(BoundingBox3DProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.segmentation import SegmentationProcessor
+            self.register_processor(SegmentationProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.named_entity import NamedEntityProcessor
+            self.register_processor(NamedEntityProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.segmentation_3d import Segmentation3DProcessor
+            self.register_processor(Segmentation3DProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.classification import ClassificationProcessor
+            self.register_processor(ClassificationProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.relation import RelationProcessor
+            self.register_processor(RelationProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.prompt import PromptProcessor
+            self.register_processor(PromptProcessor())
+        except ImportError:
+            pass
+        try:
+            from .tools.answer import AnswerProcessor
+            self.register_processor(AnswerProcessor())
+        except ImportError:
+            pass
+    def convert(self, v1_data: dict[str, Any]) -> V2ConversionResult:
+        """Convert V1 data to V2 format (separated result)
+        Args:
+            v1_data: DM Schema V1 format data
+        Returns:
+            V2ConversionResult: Separated conversion result
+                - annotation_data: V2 common annotation structure
+                - annotation_meta: Preserved V1 top-level structure
+        Raises:
+            ValueError: Missing required fields or invalid format
+        """
+        # Input validation
+        if 'annotations' not in v1_data:
+            raise ValueError("V1 data requires 'annotations' field")
+        if 'annotationsData' not in v1_data:
+            raise ValueError("V1 data requires 'annotationsData' field")
+        # Create annotation_data
+        annotation_data = self._build_annotation_data(v1_data)
+        # Create annotation_meta (preserve V1 top-level structure)
+        annotation_meta = self._build_annotation_meta(v1_data)
+        return {
+            'annotation_data': annotation_data,
+            'annotation_meta': annotation_meta,
+        }
+    def _build_annotation_data(self, v1_data: dict[str, Any]) -> V2AnnotationData:
+        """Create annotation_data (V2 common structure) from V1 data
+        Args:
+            v1_data: V1 data
+        Returns:
+            V2 common annotation structure
+        """
+        annotations = v1_data.get('annotations', {})
+        annotations_data = v1_data.get('annotationsData', {})
+        # Build classification map
+        classification_map = self._build_classification_map(annotations)
+        # Convert annotations by media type
+        result: V2AnnotationData = {
+            'classification': classification_map,
+        }
+        # Process by media ID
+        for media_id, ann_list in annotations.items():
+            # Detect media type
+            singular_type, plural_type = self._extract_media_type_info(media_id)
+            # Initialize media type array
+            if plural_type not in result:
+                result[plural_type] = []
+            # Convert media item
+            media_item = self._convert_media_item(media_id, ann_list, annotations_data.get(media_id, []))
+            result[plural_type].append(media_item)
+        return result
+    def _build_annotation_meta(self, v1_data: dict[str, Any]) -> AnnotationMeta:
+        """Create annotation_meta (V1 top-level structure) from V1 data
+        Args:
+            v1_data: Complete V1 data
+        Returns:
+            V1 top-level structure (preserved as-is)
+        """
+        return {
+            'extra': v1_data.get('extra', {}),
+            'annotations': v1_data.get('annotations', {}),
+            'annotationsData': v1_data.get('annotationsData', {}),
+            'relations': v1_data.get('relations', {}),
+            'annotationGroups': v1_data.get('annotationGroups', {}),
+            'assignmentId': v1_data.get('assignmentId'),
+        }
+    def _build_classification_map(self, annotations: dict[str, list[dict[str, Any]]]) -> dict[str, list[str]]:
+        """Build classification map from annotations
+        Args:
+            annotations: V1 annotations data
+        Returns:
+            Class label map by tool
+            e.g., {"bounding_box": ["person", "car"], "polygon": ["road"]}
+        """
+        classification_map: dict[str, set[str]] = {}
+        for media_id, ann_list in annotations.items():
+            for ann in ann_list:
+                tool = ann.get('tool', '')
+                classification_obj = ann.get('classification') or {}
+                class_label = classification_obj.get('class', '')
+                if tool and class_label:
+                    if tool not in classification_map:
+                        classification_map[tool] = set()
+                    classification_map[tool].add(class_label)
+        # Convert set to list
+        return {tool: sorted(list(labels)) for tool, labels in classification_map.items()}
+    def _convert_media_item(
+        self,
+        media_id: str,
+        annotations: list[dict[str, Any]],
+        annotations_data: list[dict[str, Any]],
+    ) -> dict[str, list[dict[str, Any]]]:
+        """Convert annotations for a single media item
+        Args:
+            media_id: Media ID
+            annotations: V1 annotations for this media
+            annotations_data: V1 annotationsData for this media
+        Returns:
+            V2 annotations grouped by tool
+        """
+        # Create ID → annotationData mapping
+        data_by_id = {item['id']: item for item in annotations_data if 'id' in item}
+        # Group by tool
+        result: dict[str, list[dict[str, Any]]] = {}
+        for ann in annotations:
+            ann_id = ann.get('id', '')
+            tool = ann.get('tool', '')
+            if not tool:
+                continue
+            # Get processor
+            processor = self.get_processor(tool)
+            if not processor:
+                # Raise error for unsupported tool
+                supported_tools = list(self._tool_processors.keys())
+                raise ValueError(f"Unsupported tool: '{tool}'. Supported tools: {', '.join(sorted(supported_tools))}")
+            # Find annotationData for this ID
+            ann_data = data_by_id.get(ann_id, {})
+            # Convert to V2
+            v2_annotation = processor.to_v2(ann, ann_data)
+            # Group by tool
+            if tool not in result:
+                result[tool] = []
+            result[tool].append(v2_annotation)
+        return result

synapse-sdk 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl

synapse-sdk 1.0.0a23py3-none-any.whl → 2025.12.3py3-none-any.whl