PyPI - synapse-sdk - Versions diffs - 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl - Mend

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

synapse_sdk/clients/agent/ray.py CHANGED Viewed

@@ -1,10 +1,86 @@
-import requests
+import weakref
+from concurrent.futures import ThreadPoolExecutor
 from synapse_sdk.clients.base import BaseClient
 from synapse_sdk.clients.exceptions import ClientError
+from synapse_sdk.utils.network import (
+    HTTPStreamManager,
+    StreamLimits,
+    WebSocketStreamManager,
+    http_to_websocket_url,
+    sanitize_error_message,
+    validate_resource_id,
+    validate_timeout,
+)
 class RayClientMixin(BaseClient):
+    """Mixin class providing Ray cluster management and monitoring functionality.
+    This mixin extends BaseClient with Ray-specific operations for interacting with
+    Apache Ray distributed computing clusters. It provides comprehensive job management,
+    node monitoring, task tracking, and Ray Serve application control capabilities.
+    Key Features:
+        - Job lifecycle management (list, get, monitor)
+        - Real-time log streaming via WebSocket and HTTP protocols
+        - Node and task monitoring
+        - Ray Serve application deployment and management
+        - Robust error handling with input validation
+        - Resource management with automatic cleanup
+    Streaming Capabilities:
+        - WebSocket streaming for real-time log tailing
+        - HTTP streaming as fallback protocol
+        - Configurable timeouts and stream limits
+        - Automatic protocol validation and error recovery
+    Resource Management:
+        - Thread pool for concurrent operations (5 workers)
+        - WeakSet for tracking active connections
+        - Automatic cleanup on object destruction
+        - Stream limits to prevent resource exhaustion
+    Usage Examples:
+        Basic job operations:
+            >>> client = RayClient(base_url="http://ray-head:8265")
+            >>> jobs = client.list_jobs()
+            >>> job = client.get_job('job-12345')
+        Real-time log streaming:
+            >>> # WebSocket streaming (preferred)
+            >>> for log_line in client.tail_job_logs('job-12345', protocol='websocket'):
+            ...     print(log_line)
+            >>> # HTTP streaming (fallback)
+            >>> for log_line in client.tail_job_logs('job-12345', protocol='stream'):
+            ...     print(log_line)
+        Node and task monitoring:
+            >>> nodes = client.list_nodes()
+            >>> tasks = client.list_tasks()
+            >>> node_details = client.get_node('node-id')
+        Ray Serve management:
+            >>> apps = client.list_serve_applications()
+            >>> client.delete_serve_application('app-id')
+    Note:
+        This class is designed as a mixin and should be combined with other
+        client classes that provide authentication and base functionality.
+        It requires the BaseClient foundation for HTTP operations.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._thread_pool = ThreadPoolExecutor(max_workers=5, thread_name_prefix='ray_client_')
+        self._active_connections = weakref.WeakSet()
+        # Initialize stream managers
+        stream_limits = StreamLimits()
+        self._websocket_manager = WebSocketStreamManager(self._thread_pool, stream_limits)
+        self._http_manager = HTTPStreamManager(self.requests_session, stream_limits)
     def get_job(self, pk):
         path = f'jobs/{pk}/'
         return self._get(path)
@@ -17,48 +93,180 @@ class RayClientMixin(BaseClient):
         path = f'jobs/{pk}/logs/'
         return self._get(path)
-    def tail_job_logs(self, pk, stream_timeout=10):
-        if self.long_poll_handler:
-            raise ClientError(400, '"tail_job_logs" does not support long polling')
+    def websocket_tail_job_logs(self, pk, stream_timeout=10):
+        """Stream job logs in real-time using WebSocket protocol.
+        Establishes a WebSocket connection to stream job logs as they are generated.
+        This method provides the lowest latency for real-time log monitoring and is
+        the preferred protocol when available.
-        path = f'jobs/{pk}/tail_logs/'
-        url = self._get_url(path)
+        Args:
+            pk (str): Job primary key or identifier. Must be alphanumeric with
+                     optional hyphens/underscores, max 100 characters.
+            stream_timeout (float, optional): Maximum time in seconds to wait for
+                                            log data. Defaults to 10. Must be positive
+                                            and cannot exceed 300 seconds.
+        Returns:
+            Generator[str, None, None]: A generator yielding log lines as strings.
+                                      Each line includes a newline character.
+        Raises:
+            ClientError:
+                - 400: If long polling is enabled (incompatible)
+                - 400: If pk is empty, contains invalid characters, or too long
+                - 400: If stream_timeout is not positive or exceeds maximum
+                - 500: If WebSocket library is unavailable
+                - 503: If connection to Ray cluster fails
+                - 408: If connection timeout occurs
+                - 429: If stream limits are exceeded (lines, size, messages)
+        Usage:
+            >>> # Basic log streaming
+            >>> for log_line in client.websocket_tail_job_logs('job-12345'):
+            ...     print(log_line.strip())
+            >>> # With custom timeout
+            >>> for log_line in client.websocket_tail_job_logs('job-12345', stream_timeout=30):
+            ...     if 'ERROR' in log_line:
+            ...         break
+        Technical Notes:
+            - Uses WebSocketStreamManager for connection management
+            - Automatic input validation and sanitization
+            - Resource cleanup handled by WeakSet tracking
+            - Stream limits prevent memory exhaustion
+            - Thread pool manages WebSocket operations
+        See Also:
+            stream_tail_job_logs: HTTP-based alternative
+            tail_job_logs: Protocol-agnostic wrapper method
+        """
+        if hasattr(self, 'long_poll_handler') and self.long_poll_handler:
+            raise ClientError(400, '"websocket_tail_job_logs" does not support long polling')
+        # Validate inputs using network utilities
+        validated_pk = validate_resource_id(pk, 'job')
+        validated_timeout = validate_timeout(stream_timeout)
+        # Build WebSocket URL
+        path = f'ray/jobs/{validated_pk}/logs/ws/'
+        url = self._get_url(path, trailing_slash=True)
+        ws_url = http_to_websocket_url(url)
+        # Get headers and use WebSocket manager
         headers = self._get_headers()
+        headers['Agent-Token'] = f'Token {self.agent_token}'
+        context = f'job {validated_pk}'
+        return self._websocket_manager.stream_logs(ws_url, headers, validated_timeout, context)
+    def stream_tail_job_logs(self, pk, stream_timeout=10):
+        """Stream job logs in real-time using HTTP chunked transfer encoding.
+        Establishes an HTTP connection with chunked transfer encoding to stream
+        job logs as they are generated. This method serves as a reliable fallback
+        when WebSocket connections are not available or suitable.
+        Args:
+            pk (str): Job primary key or identifier. Must be alphanumeric with
+                     optional hyphens/underscores, max 100 characters.
+            stream_timeout (float, optional): Maximum time in seconds to wait for
+                                            log data. Defaults to 10. Must be positive
+                                            and cannot exceed 300 seconds.
+        Returns:
+            Generator[str, None, None]: A generator yielding log lines as strings.
+                                      Each line includes a newline character.
+        Raises:
+            ClientError:
+                - 400: If long polling is enabled (incompatible)
+                - 400: If pk is empty, contains invalid characters, or too long
+                - 400: If stream_timeout is not positive or exceeds maximum
+                - 503: If connection to Ray cluster fails
+                - 408: If connection or read timeout occurs
+                - 404: If job is not found
+                - 429: If stream limits are exceeded (lines, size, messages)
+                - 500: If unexpected streaming error occurs
+        Usage:
+            >>> # Basic HTTP log streaming
+            >>> for log_line in client.stream_tail_job_logs('job-12345'):
+            ...     print(log_line.strip())
+            >>> # With error handling and custom timeout
+            >>> try:
+            ...     for log_line in client.stream_tail_job_logs('job-12345', stream_timeout=60):
+            ...         if 'COMPLETED' in log_line:
+            ...             break
+            ... except ClientError as e:
+            ...     print(f"Streaming failed: {e}")
+        Technical Notes:
+            - Uses HTTPStreamManager for connection management
+            - Automatic input validation and sanitization
+            - Proper HTTP response cleanup on completion/error
+            - Stream limits prevent memory exhaustion
+            - Filters out oversized lines (>10KB) automatically
+            - Connection reuse through requests session
+        See Also:
+            websocket_tail_job_logs: WebSocket-based alternative (preferred)
+            tail_job_logs: Protocol-agnostic wrapper method
+        """
+        if hasattr(self, 'long_poll_handler') and self.long_poll_handler:
+            raise ClientError(400, '"stream_tail_job_logs" does not support long polling')
+        # Validate inputs using network utilities
+        validated_pk = validate_resource_id(pk, 'job')
+        validated_timeout = validate_timeout(stream_timeout)
+        # Build HTTP URL and prepare request
+        path = f'ray/jobs/{validated_pk}/logs/stream/'
+        url = self._get_url(path, trailing_slash=True)
+        headers = self._get_headers()
+        headers['Agent-Token'] = f'Token {self.agent_token}'
+        timeout = (self.timeout['connect'], validated_timeout)
+        context = f'job {validated_pk}'
+        return self._http_manager.stream_logs(url, headers, timeout, context)
+    def tail_job_logs(self, pk, stream_timeout=10, protocol='stream'):
+        """Tail job logs using either WebSocket or HTTP streaming.
+        Args:
+            pk: Job primary key
+            stream_timeout: Timeout for streaming operations
+            protocol: 'websocket' or 'stream' (default: 'stream')
+        """
+        # Validate protocol first
+        if protocol not in ('websocket', 'stream'):
+            raise ClientError(400, f'Unsupported protocol: {protocol}. Use "websocket" or "stream"')
+        # Pre-validate common inputs using network utilities
+        validate_resource_id(pk, 'job')
+        validate_timeout(stream_timeout)
         try:
-            # Use shorter timeout for streaming to prevent hanging
-            response = self.requests_session.get(
-                url, headers=headers, stream=True, timeout=(self.timeout['connect'], stream_timeout)
-            )
-            response.raise_for_status()
-            # Set up streaming with timeout handling
-            try:
-                for line in response.iter_lines(decode_unicode=True, chunk_size=1024):
-                    if line:
-                        yield f'{line}\n'
-            except requests.exceptions.ChunkedEncodingError:
-                # Connection was interrupted during streaming
-                raise ClientError(503, f'Log stream for job {pk} was interrupted')
-            except requests.exceptions.ReadTimeout:
-                # Read timeout during streaming
-                raise ClientError(408, f'Log stream for job {pk} timed out after {stream_timeout}s')
-        except requests.exceptions.ConnectTimeout:
-            raise ClientError(
-                408, f'Failed to connect to log stream for job {pk} (timeout: {self.timeout["connect"]}s)'
-            )
-        except requests.exceptions.ReadTimeout:
-            raise ClientError(408, f'Log stream for job {pk} read timeout ({stream_timeout}s)')
-        except requests.exceptions.ConnectionError as e:
-            if 'Connection refused' in str(e):
-                raise ClientError(503, f'Agent connection refused for job {pk}')
-            else:
-                raise ClientError(503, f'Agent connection error for job {pk}: {str(e)[:100]}')
-        except requests.exceptions.HTTPError as e:
-            raise ClientError(e.response.status_code, f'HTTP error streaming logs for job {pk}: {e}')
+            if protocol == 'websocket':
+                return self.websocket_tail_job_logs(pk, stream_timeout)
+            else:  # protocol == 'stream'
+                return self.stream_tail_job_logs(pk, stream_timeout)
+        except ClientError:
+            raise
         except Exception as e:
-            raise ClientError(500, f'Unexpected error streaming logs for job {pk}: {str(e)[:100]}')
+            # Fallback error handling using network utility
+            sanitized_error = sanitize_error_message(str(e), f'job {pk}')
+            raise ClientError(500, f'Protocol {protocol} failed: {sanitized_error}')
+    def __del__(self):
+        """Cleanup resources when object is destroyed."""
+        try:
+            if hasattr(self, '_thread_pool'):
+                self._thread_pool.shutdown(wait=False)
+        except Exception:
+            pass  # Ignore cleanup errors during destruction
     def get_node(self, pk):
         path = f'nodes/{pk}/'
@@ -87,3 +295,53 @@ class RayClientMixin(BaseClient):
     def delete_serve_application(self, pk):
         path = f'serve_applications/{pk}/'
         return self._delete(path)
+    def stop_job(self, pk):
+        """Stop a running job gracefully.
+        Uses Ray's stop_job() API to request graceful termination of the job.
+        This preserves job state and allows for potential resubmission later.
+        Args:
+            pk (str): Job primary key or identifier. Must be alphanumeric with
+                     optional hyphens/underscores, max 100 characters.
+        Returns:
+            dict: Response containing job status and stop details.
+        Raises:
+            ClientError:
+                - 400: If pk is empty, contains invalid characters, or too long
+                - 400: If job is already in terminal state (STOPPED, FAILED, etc.)
+                - 404: If job is not found
+                - 503: If connection to Ray cluster fails
+                - 500: If unexpected error occurs during stop
+        Usage:
+            >>> # Stop a running job
+            >>> result = client.stop_job('job-12345')
+            >>> print(result['status'])  # Should show 'STOPPING' or similar
+            >>> # Handle stop errors
+            >>> try:
+            ...     client.stop_job('job-12345')
+            ... except ClientError as e:
+            ...     print(f"Stop failed: {e}")
+        Technical Notes:
+            - Uses Ray's stop_job() API for graceful termination
+            - Validates job state before attempting stop
+            - Maintains consistency with existing SDK patterns
+            - Provides detailed error messages for debugging
+        See Also:
+            resume_job: Method for restarting stopped jobs
+        """
+        # Validate inputs using network utilities
+        validated_pk = validate_resource_id(pk, 'job')
+        # Build API path for job stop
+        path = f'jobs/{validated_pk}/stop/'
+        # Use _post method with empty data to match Ray's API pattern
+        return self._post(path)

synapse_sdk/clients/backend/annotation.py CHANGED Viewed

@@ -24,7 +24,7 @@ class AnnotationClientMixin(BaseClient):
         return self._list(path, params=params)
     def list_tasks(self, params=None, url_conversion=None, list_all=False):
-        path = 'tasks/'
+        path = 'sdk/tasks/'
         url_conversion = get_default_url_conversion(url_conversion, files_fields=['files'])
         return self._list(path, params=params, url_conversion=url_conversion, list_all=list_all)

synapse_sdk/clients/backend/core.py CHANGED Viewed

@@ -3,15 +3,42 @@ import os
 from pathlib import Path
 from synapse_sdk.clients.base import BaseClient
+from synapse_sdk.utils.file import read_file_in_chunks
 class CoreClientMixin(BaseClient):
     def create_chunked_upload(self, file_path):
-        def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
-            with open(file_path, 'rb') as file:
-                while chunk := file.read(chunk_size):
-                    yield chunk
+        """
+        Upload a file using chunked upload for efficient handling of large files.
+        This method breaks the file into chunks and uploads them sequentially to the server.
+        It calculates an MD5 hash of the entire file to ensure data integrity during upload.
+        Args:
+            file_path (str | Path): Path to the file to upload
+        Returns:
+            dict: Response from the server after successful upload completion,
+                  typically containing upload confirmation and file metadata
+        Raises:
+            FileNotFoundError: If the specified file doesn't exist
+            PermissionError: If the file can't be read due to permissions
+            ClientError: If there's an error during the upload process
+            OSError: If there's an OS-level error accessing the file
+        Example:
+            ```python
+            client = CoreClientMixin(base_url='https://api.example.com')
+            result = client.create_chunked_upload('/path/to/large_file.zip')
+            print(f"Upload completed: {result}")
+            ```
+        Note:
+            - Uses 50MB chunks by default for optimal upload performance
+            - Automatically resumes from the last successfully uploaded chunk
+            - Verifies upload integrity using MD5 checksum
+        """
         file_path = Path(file_path)
         size = os.path.getsize(file_path)
         hash_md5 = hashlib.md5()

synapse_sdk/clients/backend/data_collection.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from multiprocessing import Pool
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 from tqdm import tqdm
@@ -9,6 +9,17 @@ from synapse_sdk.clients.utils import get_batched_list, get_default_url_conversi
 class DataCollectionClientMixin(BaseClient):
+    """Mixin class for data collection operations.
+    Provides methods for managing data collections, files, and units
+    in the Synapse backend. Supports both regular file uploads and
+    chunked uploads for large files.
+    This mixin extends BaseClient with data collection-specific functionality
+    including file upload capabilities, data unit management, and batch processing
+    operations for efficient data collection workflows.
+    """
     def list_data_collection(self):
         path = 'data_collections/'
         return self._list(path)
@@ -22,14 +33,66 @@ class DataCollectionClientMixin(BaseClient):
         path = f'data_collections/{data_collection_id}/?expand=file_specifications'
         return self._get(path)
-    def create_data_file(self, file_path: Path):
-        """Create data file to synapse-backend.
+    def create_data_file(
+        self, file_path: Path, use_chunked_upload: bool = False
+    ) -> Union[Dict[str, Union[str, int]], str]:
+        """Create and upload a data file to the Synapse backend.
+        This method supports two upload strategies:
+        1. Direct file upload for smaller files (default)
+        2. Chunked upload for large files (automatic when flag is enabled)
         Args:
-            file_path: The file pathlib object to upload.
+            file_path: Path object pointing to the file to upload.
+                Must be a valid file path that exists on the filesystem.
+            use_chunked_upload: Boolean flag to enable chunked upload for the file.
+                When True, automatically creates a chunked upload for the file
+                instead of uploading it directly. Defaults to False.
+        Returns:
+            Dictionary containing the created data file information including:
+                - id: The unique identifier of the created data file
+                - checksum: The MD5 checksum of the uploaded file
+                - size: The file size in bytes
+                - created_at: Timestamp of creation
+                - Additional metadata fields from the backend
+            Or a string response in case of non-JSON response.
+        Raises:
+            FileNotFoundError: If the specified file doesn't exist (for direct upload)
+            PermissionError: If the file can't be read due to permissions
+            ClientError: If the backend returns an error response
+            ValueError: If file_path is not a valid Path object
+        Examples:
+            Direct file upload for small files:
+            ```python
+            client = DataCollectionClientMixin(base_url='https://api.example.com')
+            file_path = Path('/path/to/small_file.csv')
+            result = client.create_data_file(file_path)
+            print(f"File uploaded with ID: {result['id']}")
+            ```
+            Using chunked upload for large files:
+            ```python
+            # Automatically create chunked upload for large file
+            file_path = Path('/path/to/large_file.zip')
+            result = client.create_data_file(file_path, use_chunked_upload=True)
+            print(f"Large file uploaded with ID: {result['id']}")
+            ```
+        Note:
+            - For files larger than 100MB, consider using chunked upload
+            - The chunked upload will be automatically created when the flag is enabled
+            - Chunked uploads provide better reliability for large files
         """
         path = 'data_files/'
-        return self._post(path, files={'file': file_path})
+        if use_chunked_upload:
+            chunked_upload = self.create_chunked_upload(file_path)
+            data = {'chunked_upload': chunked_upload['id'], 'meta': {'filename': file_path.name}}
+            return self._post(path, data=data)
+        else:
+            return self._post(path, files={'file': file_path})
     def get_data_unit(self, data_unit_id: int, params=None):
         path = f'data_units/{data_unit_id}/'
@@ -49,6 +112,16 @@ class DataCollectionClientMixin(BaseClient):
         url_conversion = get_default_url_conversion(url_conversion, files_fields=['files'])
         return self._list(path, params=params, url_conversion=url_conversion, list_all=list_all)
+    def data_files_verify_checksums(self, checksums: list[str]):
+        """Check checksums from files are exists.
+        Args:
+            checksums: A list of MD5 checksums to verify.
+        """
+        path = 'data_files/verify_checksums/'
+        data = {'checksums': checksums}
+        return self._post(path, data=data)
     def upload_data_collection(
         self,
         data_collection_id: int,
@@ -91,7 +164,7 @@ class DataCollectionClientMixin(BaseClient):
                 self.create_tasks(tasks_data)
-    def upload_data_file(self, data: Dict, data_collection_id: int) -> Dict:
+    def upload_data_file(self, data: Dict, data_collection_id: int, use_chunked_upload: bool = False) -> Dict:
         """Upload files to synapse-backend.
         Args:
@@ -100,12 +173,14 @@ class DataCollectionClientMixin(BaseClient):
                     - files: The files to upload. (key: file name, value: file pathlib object)
                     - meta: The meta data to upload.
             data_collection_id: The data_collection id to upload the data to.
+            use_chunked_upload: Whether to use chunked upload for large files.(default False)
+                Automatically determined based on file size threshold in upload plugin (default 50MB).
         Returns:
             Dict: The result of the upload.
         """
         for name, path in data['files'].items():
-            data_file = self.create_data_file(path)
+            data_file = self.create_data_file(path, use_chunked_upload)
             data['data_collection'] = data_collection_id
             data['files'][name] = {'checksum': data_file['checksum'], 'path': str(path)}
         return data

synapse_sdk/clients/backend/hitl.py CHANGED Viewed

@@ -8,7 +8,7 @@ class HITLClientMixin(BaseClient):
         return self._get(path)
     def list_assignments(self, params=None, url_conversion=None, list_all=False):
-        path = 'assignments/'
+        path = 'sdk/assignments/'
         url_conversion = get_default_url_conversion(url_conversion, files_fields=['files'])
         return self._list(path, params=params, url_conversion=url_conversion, list_all=list_all)

synapse_sdk/clients/backend/ml.py CHANGED Viewed

@@ -19,7 +19,7 @@ class MLClientMixin(BaseClient):
         return self._post(path, data=data)
     def list_ground_truth_events(self, params=None, url_conversion=None, list_all=False):
-        path = 'ground_truth_events/'
+        path = 'sdk/ground_truth_events/'
         url_conversion = get_default_url_conversion(url_conversion, files_fields=['files'])
         return self._list(path, params=params, url_conversion=url_conversion, list_all=list_all)

synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl