PyPI - synapse-sdk - Versions diffs - 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl - Mend

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

synapse_sdk/utils/converters/dm_legacy/to_v1.py ADDED Viewed

@@ -0,0 +1,367 @@
+import random
+import string
+from . import BaseDMConverter
+class DMV2ToV1Converter(BaseDMConverter):
+    """DM v2 to v1 format converter class."""
+    def __init__(self, new_dm_data={}, file_type=None):
+        """Initialize the converter.
+        Args:
+            new_dm_data (dict): DM v2 format data to be converted
+            file_type (str, optional): Type of file being converted (image, video, pcd, text, audio)
+        """
+        # Auto-detect file type if not provided
+        if file_type is None:
+            file_type = self._detect_file_type(new_dm_data)
+        super().__init__(file_type=file_type)
+        self.new_dm_data = new_dm_data
+        self.annotations = {}
+        self.annotations_data = {}
+        self.extra = {}
+        self.relations = {}
+        self.annotation_groups = {}
+    def _detect_file_type(self, data):
+        """Auto-detect file type from the data structure.
+        Args:
+            data (dict): DM v2 format data
+        Returns:
+            str: Detected file type (image, video, pcd, text, audio)
+        """
+        if not data:
+            return None
+        # Check for media type keys (plural forms)
+        if 'images' in data:
+            return 'image'
+        elif 'videos' in data:
+            return 'video'
+        elif 'pcds' in data:
+            return 'pcd'
+        elif 'texts' in data:
+            return 'text'
+        elif 'audios' in data:
+            return 'audio'
+        return None
+    def convert(self):
+        """Convert DM v2 data to v1 format.
+        Returns:
+            dict: Converted data in DM v1 format
+        """
+        # Reset state
+        new_dm_data = self.new_dm_data
+        self.annotations = {}
+        self.annotations_data = {}
+        self.extra = {}
+        self.relations = {}
+        self.annotation_groups = {}
+        # Process each media type (images, videos, etc.)
+        for media_type_plural, media_items in new_dm_data.items():
+            if media_type_plural == 'classification':
+                continue
+            media_type = self._singularize_media_type(media_type_plural)
+            for index, media_item in enumerate(media_items, 1):
+                media_id = f'{media_type}_{index}'
+                # Initialize structures for this media
+                self.annotations[media_id] = []
+                self.annotations_data[media_id] = []
+                self.extra[media_id] = {}
+                self.relations[media_id] = []
+                self.annotation_groups[media_id] = []
+                # Process each tool type in the media item
+                for tool_type, tool_data in media_item.items():
+                    self._process_tool_data(media_id, tool_type, tool_data)
+        # Build final result
+        result = {
+            'extra': self.extra,
+            'relations': self.relations,
+            'annotations': self.annotations,
+            'annotationsData': self.annotations_data,
+            'annotationGroups': self.annotation_groups,
+        }
+        return result
+    def _process_tool_data(self, media_id, tool_type, tool_data):
+        """Process tool data for a specific media item.
+        Args:
+            media_id (str): ID of the media item
+            tool_type (str): Type of annotation tool
+            tool_data (list): List of annotation data for this tool
+        """
+        for annotation in tool_data:
+            annotation_id = annotation['id']
+            classification = annotation['classification']
+            attrs = annotation.get('attrs', [])
+            data = annotation.get('data', {})
+            # Create annotation entry
+            annotation_entry = {
+                'id': annotation_id,
+                'tool': tool_type,
+                'isLocked': False,
+                'isVisible': True,
+                'classification': {'class': classification},
+            }
+            # Add additional classification attributes from attrs
+            for attr in attrs:
+                attr_name = attr.get('name')
+                attr_value = attr.get('value')
+                if attr_name and attr_value is not None:
+                    annotation_entry['classification'][attr_name] = attr_value
+            # Add special attributes for specific tools
+            if tool_type == 'keypoint':
+                annotation_entry['shape'] = 'circle'
+            self.annotations[media_id].append(annotation_entry)
+            # Create annotations data entry using tool processor
+            processor = self.tool_processors.get(tool_type)
+            if processor:
+                processor(annotation_id, data, self.annotations_data[media_id])
+            else:
+                self._handle_unknown_tool(tool_type, annotation_id)
+    def _convert_bounding_box(self, annotation_id, data, annotations_data):
+        """Process bounding box annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list): Bounding box data [x1, y1, x2, y2]
+            annotations_data (list): List to append the processed data
+        """
+        if len(data) >= 4:
+            x1, y1, width, height = data[:4]
+            coordinate = {'x': x1, 'y': y1, 'width': width, 'height': height}
+            annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
+    def _convert_named_entity(self, annotation_id, data, annotations_data):
+        """Process named entity annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (dict): Named entity data with ranges and content
+            annotations_data (list): List to append the processed data
+        """
+        entity_data = {'id': annotation_id}
+        if 'ranges' in data:
+            entity_data['ranges'] = data['ranges']
+        if 'content' in data:
+            entity_data['content'] = data['content']
+        annotations_data.append(entity_data)
+    def _convert_classification(self, annotation_id, data, annotations_data):
+        """Process classification annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (dict): Classification data (usually empty)
+            annotations_data (list): List to append the processed data
+        """
+        # Classification data is typically empty in v2, so we just add the ID
+        annotations_data.append({'id': annotation_id})
+    def _convert_polyline(self, annotation_id, data, annotations_data):
+        """Process polyline annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list): Polyline data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
+            annotations_data (list): List to append the processed data
+        """
+        coordinates = []
+        if data and isinstance(data[0], list):
+            # Nested format: [[x1, y1], [x2, y2], ...]
+            for point in data:
+                if len(point) >= 2:
+                    coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
+        else:
+            # Flat format: [x1, y1, x2, y2, ...]
+            for i in range(0, len(data), 2):
+                if i + 1 < len(data):
+                    coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
+        annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
+    def _convert_keypoint(self, annotation_id, data, annotations_data):
+        """Process keypoint annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list): Keypoint data [x, y]
+            annotations_data (list): List to append the processed data
+        """
+        if len(data) >= 2:
+            coordinate = {'x': data[0], 'y': data[1]}
+            annotations_data.append({'id': annotation_id, 'coordinate': coordinate})
+    def _convert_3d_bounding_box(self, annotation_id, data, annotations_data):
+        """Process 3D bounding box annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (dict): 3D bounding box PSR data
+            annotations_data (list): List to append the processed data
+        """
+        annotations_data.append({'id': annotation_id, 'psr': data})
+    def _convert_image_segmentation(self, annotation_id, data, annotations_data):
+        """Process segmentation annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list or dict): Segmentation data (pixel_indices or section)
+            annotations_data (list): List to append the processed data
+        """
+        annotation_data = {'id': annotation_id}
+        if isinstance(data, list):
+            # Pixel-based segmentation
+            annotation_data['pixel_indices'] = data
+        elif isinstance(data, dict):
+            # Section-based segmentation (video)
+            annotation_data['section'] = data
+        annotations_data.append(annotation_data)
+    def _convert_video_segmentation(self, annotation_id, data, annotations_data):
+        """Process video segmentation annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list or dict): Segmentation data (pixel_indices or section)
+            annotations_data (list): List to append the processed data
+        """
+        annotation_data = {'id': annotation_id}
+        if isinstance(data, list):
+            # Pixel-based segmentation
+            annotation_data['pixel_indices'] = data
+        elif isinstance(data, dict):
+            # Section-based segmentation (video)
+            annotation_data['section'] = data
+        annotations_data.append(annotation_data)
+    def _convert_3d_segmentation(self, annotation_id, data, annotations_data):
+        """Process 3D segmentation annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list or dict): 3D segmentation data
+            annotations_data (list): List to append the processed data
+        """
+        annotation_data = {'id': annotation_id}
+        if isinstance(data, list):
+            # Pixel-based segmentation
+            annotation_data['pixel_indices'] = data
+        elif isinstance(data, dict):
+            # Section-based segmentation
+            annotation_data['section'] = data
+        annotations_data.append(annotation_data)
+    def _convert_prompt(self, annotation_id, data, annotations_data):
+        """Process prompt annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (dict): Prompt data
+            annotations_data (list): List to append the processed data
+        """
+        annotation_data = {'id': annotation_id}
+        if isinstance(data, dict):
+            annotation_data.update(data)
+        annotations_data.append(annotation_data)
+    def _convert_answer(self, annotation_id, data, annotations_data):
+        """Process answer annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (dict): Answer data
+            annotations_data (list): List to append the processed data
+        """
+        annotation_data = {'id': annotation_id}
+        if isinstance(data, dict):
+            annotation_data.update(data)
+        annotations_data.append(annotation_data)
+    def _convert_polygon(self, annotation_id, data, annotations_data):
+        """Process polygon annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list): Polygon data - can be flat [x1, y1, x2, y2, ...] or nested [[x1, y1], [x2, y2], ...]
+            annotations_data (list): List to append the processed data
+        """
+        coordinates = []
+        if data and isinstance(data[0], list):
+            # Nested format: [[x1, y1], [x2, y2], ...]
+            for point in data:
+                if len(point) >= 2:
+                    coordinates.append({'x': point[0], 'y': point[1], 'id': self._generate_random_id()})
+        else:
+            # Flat format: [x1, y1, x2, y2, ...]
+            for i in range(0, len(data), 2):
+                if i + 1 < len(data):
+                    coordinates.append({'x': data[i], 'y': data[i + 1], 'id': self._generate_random_id()})
+        annotations_data.append({'id': annotation_id, 'coordinate': coordinates})
+    def _convert_relation(self, annotation_id, data, annotations_data):
+        """Process relation annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list): Relation data
+            annotations_data (list): List to append the processed data
+        """
+        annotations_data.append({'id': annotation_id, 'data': data})
+    def _convert_group(self, annotation_id, data, annotations_data):
+        """Process group annotation data.
+        Args:
+            annotation_id (str): ID of the annotation
+            data (list): Group data
+            annotations_data (list): List to append the processed data
+        """
+        annotations_data.append({'id': annotation_id, 'data': data})
+    def _generate_random_id(self):
+        """Generate a random ID similar to the original format."""
+        # Generate 10-character random string with letters, numbers, and symbols
+        chars = string.ascii_letters + string.digits + '-_'
+        return ''.join(random.choices(chars, k=10))

synapse_sdk/utils/file/__init__.py ADDED Viewed

@@ -0,0 +1,58 @@
+# File utilities module
+# Maintains backward compatibility by re-exporting all functions
+from .archive import archive, unarchive
+from .checksum import calculate_checksum, get_checksum_from_file
+from .chunking import read_file_in_chunks
+from .download import (
+    adownload_file,
+    afiles_url_to_path,
+    afiles_url_to_path_from_objs,
+    download_file,
+    files_url_to_path,
+    files_url_to_path_from_objs,
+)
+from .encoding import convert_file_to_base64
+from .io import get_dict_from_file, get_temp_path
+from .upload import (
+    FilesDict,
+    FileProcessingError,
+    FileTuple,
+    FileUploadError,
+    FileValidationError,
+    RequestsFile,
+    close_file_handles,
+    process_files_for_upload,
+)
+__all__ = [
+    # Chunking
+    'read_file_in_chunks',
+    # Download
+    'download_file',
+    'adownload_file',
+    'files_url_to_path',
+    'afiles_url_to_path',
+    'files_url_to_path_from_objs',
+    'afiles_url_to_path_from_objs',
+    # Checksum
+    'calculate_checksum',
+    'get_checksum_from_file',
+    # Archive
+    'archive',
+    'unarchive',
+    # Encoding
+    'convert_file_to_base64',
+    # I/O
+    'get_dict_from_file',
+    'get_temp_path',
+    # Upload
+    'process_files_for_upload',
+    'close_file_handles',
+    'FileUploadError',
+    'FileValidationError',
+    'FileProcessingError',
+    'FileTuple',
+    'FilesDict',
+    'RequestsFile',
+]

synapse_sdk/utils/file/archive.py ADDED Viewed

@@ -0,0 +1,32 @@
+import zipfile
+from pathlib import Path
+def archive(input_path, output_path, append=False):
+    input_path = Path(input_path)
+    output_path = Path(output_path)
+    mode = 'a' if append and output_path.exists() else 'w'
+    with zipfile.ZipFile(output_path, mode=mode, compression=zipfile.ZIP_DEFLATED) as zipf:
+        if input_path.is_file():
+            zipf.write(input_path, input_path.name)
+        else:
+            for file_path in input_path.rglob('*'):
+                if file_path.is_file():  # Only add files, skip directories
+                    arcname = file_path.relative_to(input_path.parent)
+                    zipf.write(file_path, arcname)
+def unarchive(file_path, output_path):
+    """
+    Unarchives a ZIP file to a given directory.
+    Parameters:
+        file_path (str | Path): The path to the ZIP file.
+        output_path (str): The directory where the files will be extracted.
+    """
+    output_path = Path(output_path)
+    output_path.mkdir(parents=True, exist_ok=True)
+    with zipfile.ZipFile(str(file_path), 'r') as zip_ref:
+        zip_ref.extractall(output_path)

synapse_sdk/utils/file/checksum.py ADDED Viewed

@@ -0,0 +1,56 @@
+import hashlib
+from typing import IO, Any, Callable
+def calculate_checksum(file_path, prefix=''):
+    md5_hash = hashlib.md5()
+    with open(file_path, 'rb') as f:
+        for byte_block in iter(lambda: f.read(4096), b''):
+            md5_hash.update(byte_block)
+    checksum = md5_hash.hexdigest()
+    if prefix:
+        return f'dev-{checksum}'
+    return checksum
+def get_checksum_from_file(file: IO[Any], digest_mod: Callable[[], Any] = hashlib.sha1) -> str:
+    """
+    Calculate checksum for a file-like object.
+    Args:
+        file (IO[Any]): File-like object with read() method that supports reading in chunks
+        digest_mod (Callable[[], Any]): Hash algorithm from hashlib (defaults to hashlib.sha1)
+    Returns:
+        str: Hexadecimal digest of the file contents
+    Example:
+        ```python
+        import hashlib
+        from io import BytesIO
+        from synapse_sdk.utils.file import get_checksum_from_file
+        # With BytesIO
+        data = BytesIO(b'Hello, world!')
+        checksum = get_checksum_from_file(data)
+        # With different hash algorithm
+        checksum = get_checksum_from_file(data, digest_mod=hashlib.sha256)
+        ```
+    """
+    digest = digest_mod()
+    chunk_size = 4096
+    # Reset file pointer to beginning if possible
+    if hasattr(file, 'seek'):
+        file.seek(0)
+    while True:
+        chunk = file.read(chunk_size)
+        if not chunk:
+            break
+        if isinstance(chunk, str):
+            chunk = chunk.encode('utf-8')
+        digest.update(chunk)
+    return digest.hexdigest()

synapse_sdk/utils/file/chunking.py ADDED Viewed

@@ -0,0 +1,31 @@
+def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
+    """
+    Read a file in chunks for efficient memory usage during file processing.
+    This function is particularly useful for large files or when you need to process
+    files in chunks, such as for uploading or hashing.
+    Args:
+        file_path (str | Path): Path to the file to read
+        chunk_size (int, optional): Size of each chunk in bytes. Defaults to 50MB (1024 * 1024 * 50)
+    Yields:
+        bytes: File content chunks
+    Raises:
+        FileNotFoundError: If the file doesn't exist
+        PermissionError: If the file can't be read due to permissions
+        OSError: If there's an OS-level error reading the file
+    Example:
+        ```python
+        from synapse_sdk.utils.file import read_file_in_chunks
+        # Read a file in 10MB chunks
+        for chunk in read_file_in_chunks('large_file.bin', chunk_size=1024*1024*10):
+            process_chunk(chunk)
+        ```
+    """
+    with open(file_path, 'rb') as file:
+        while chunk := file.read(chunk_size):
+            yield chunk

synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl