PyPI - synapse-sdk - Versions diffs - 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl - Mend

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

synapse_sdk/utils/file/video/transcode.py ADDED Viewed

@@ -0,0 +1,307 @@
+import asyncio
+import shutil
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable, Optional
+import ffmpeg
+# Exception classes
+class VideoTranscodeError(Exception):
+    """Base exception for video transcoding errors."""
+    pass
+class UnsupportedFormatError(VideoTranscodeError):
+    """Raised when input format is not supported."""
+    pass
+class FFmpegNotFoundError(VideoTranscodeError):
+    """Raised when FFmpeg is not installed or not in PATH."""
+    pass
+class TranscodingFailedError(VideoTranscodeError):
+    """Raised when FFmpeg transcoding process fails."""
+    pass
+@dataclass
+class TranscodeConfig:
+    """Video transcoding configuration."""
+    vcodec: str = 'libx264'  # Video codec
+    preset: str = 'medium'  # Encoding preset (ultrafast to veryslow)
+    crf: int = 28  # Constant Rate Factor (0-51, lower=better quality)
+    acodec: str = 'aac'  # Audio codec
+    audio_bitrate: str = '128k'  # Audio bitrate
+    movflags: str = '+faststart'  # MP4 optimization flags
+    resolution: Optional[str] = None  # Target resolution (e.g., '1920x1080')
+    fps: Optional[int] = None  # Target frame rate
+    start_time: Optional[float] = None  # Trim start time in seconds
+    duration: Optional[float] = None  # Trim duration in seconds
+# Supported input formats
+SUPPORTED_FORMATS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.mpeg', '.mpg', '.m4v', '.3gp', '.ogv'}
+def _check_ffmpeg_available():
+    """Check if FFmpeg is available in PATH."""
+    if not shutil.which('ffmpeg'):
+        raise FFmpegNotFoundError(
+            'FFmpeg is not installed or not found in PATH. Please install FFmpeg to use video transcoding features.'
+        )
+def validate_video_format(video_path: str | Path) -> bool:
+    """
+    Check if video format is supported for transcoding.
+    Args:
+        video_path (str | Path): Path to the video file
+    Returns:
+        bool: True if format is supported, False otherwise
+    """
+    path = Path(video_path)
+    return path.suffix.lower() in SUPPORTED_FORMATS
+def get_video_info(video_path: str | Path) -> dict:
+    """
+    Extract video metadata (resolution, duration, codecs, etc.).
+    Args:
+        video_path (str | Path): Path to the video file
+    Returns:
+        dict: Video metadata information
+    Raises:
+        VideoTranscodeError: If unable to probe video file
+    """
+    _check_ffmpeg_available()
+    try:
+        probe = ffmpeg.probe(str(video_path))
+        video_info = {}
+        # Get format information
+        if 'format' in probe:
+            format_info = probe['format']
+            video_info['duration'] = float(format_info.get('duration', 0))
+            video_info['size'] = int(format_info.get('size', 0))
+            video_info['bitrate'] = int(format_info.get('bit_rate', 0))
+        # Get stream information
+        video_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'video']
+        audio_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'audio']
+        if video_streams:
+            video_stream = video_streams[0]
+            video_info['width'] = int(video_stream.get('width', 0))
+            video_info['height'] = int(video_stream.get('height', 0))
+            video_info['video_codec'] = video_stream.get('codec_name', '')
+            video_info['fps'] = eval(video_stream.get('r_frame_rate', '0/1'))
+        if audio_streams:
+            audio_stream = audio_streams[0]
+            video_info['audio_codec'] = audio_stream.get('codec_name', '')
+            video_info['channels'] = int(audio_stream.get('channels', 0))
+            video_info['sample_rate'] = int(audio_stream.get('sample_rate', 0))
+        return video_info
+    except Exception as e:
+        raise VideoTranscodeError(f'Failed to probe video file: {str(e)}')
+def _build_ffmpeg_stream(input_path: str | Path, output_path: str | Path, config: TranscodeConfig):
+    """Build FFmpeg stream with configuration."""
+    stream = ffmpeg.input(str(input_path))
+    # Apply start time and duration trimming
+    if config.start_time is not None or config.duration is not None:
+        kwargs = {}
+        if config.start_time is not None:
+            kwargs['ss'] = config.start_time
+        if config.duration is not None:
+            kwargs['t'] = config.duration
+        stream = ffmpeg.input(str(input_path), **kwargs)
+    # Apply video filters
+    if config.resolution or config.fps:
+        if config.resolution:
+            width, height = config.resolution.split('x')
+            stream = ffmpeg.filter(stream, 'scale', width, height)
+        if config.fps:
+            stream = ffmpeg.filter(stream, 'fps', fps=config.fps)
+    # Build output with encoding parameters
+    output_kwargs = {
+        'vcodec': config.vcodec,
+        'preset': config.preset,
+        'crf': config.crf,
+        'acodec': config.acodec,
+        'audio_bitrate': config.audio_bitrate,
+        'movflags': config.movflags,
+    }
+    return ffmpeg.output(stream, str(output_path), **output_kwargs)
+def transcode_video(
+    input_path: str | Path,
+    output_path: str | Path,
+    config: Optional[TranscodeConfig] = None,
+    progress_callback: Optional[Callable[[float], None]] = None,
+) -> Path:
+    """
+    Transcode video with specified configuration.
+    Args:
+        input_path (str | Path): Path to input video file
+        output_path (str | Path): Path to output video file
+        config (Optional[TranscodeConfig]): Transcoding configuration
+        progress_callback (Optional[Callable[[float], None]]): Progress callback function
+    Returns:
+        Path: Path to the transcoded video file
+    Raises:
+        UnsupportedFormatError: If input format is not supported
+        FFmpegNotFoundError: If FFmpeg is not available
+        TranscodingFailedError: If transcoding fails
+    """
+    _check_ffmpeg_available()
+    input_path = Path(input_path)
+    output_path = Path(output_path)
+    if not validate_video_format(input_path):
+        raise UnsupportedFormatError(f'Unsupported video format: {input_path.suffix}')
+    if config is None:
+        config = TranscodeConfig()
+    # Ensure output directory exists
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        # Build FFmpeg command
+        stream = _build_ffmpeg_stream(input_path, output_path, config)
+        # Run FFmpeg
+        if progress_callback:
+            # Get video duration for progress calculation
+            video_info = get_video_info(input_path)
+            total_duration = video_info.get('duration', 0)
+            # Run with progress monitoring
+            process = ffmpeg.run_async(stream, pipe_stderr=True, overwrite_output=True)
+            while True:
+                output = process.stderr.readline()
+                if output == b'' and process.poll() is not None:
+                    break
+                if output:
+                    line = output.decode('utf-8')
+                    # Parse progress from FFmpeg output
+                    if 'time=' in line and total_duration > 0:
+                        try:
+                            time_str = line.split('time=')[1].split()[0]
+                            hours, minutes, seconds = time_str.split(':')
+                            current_time = int(hours) * 3600 + int(minutes) * 60 + float(seconds)
+                            progress = min(current_time / total_duration, 1.0)
+                            progress_callback(progress)
+                        except (ValueError, IndexError):
+                            pass
+            if process.returncode != 0:
+                raise TranscodingFailedError('FFmpeg process failed')
+        else:
+            # Run without progress monitoring
+            ffmpeg.run(stream, overwrite_output=True, quiet=True)
+        return output_path
+    except ffmpeg.Error as e:
+        error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
+        raise TranscodingFailedError(f'Transcoding failed: {error_message}')
+    except Exception as e:
+        raise VideoTranscodeError(f'Unexpected error during transcoding: {str(e)}')
+def optimize_for_web(video_path: str | Path, output_path: str | Path) -> Path:
+    """
+    Quick optimization for web streaming with default settings.
+    Args:
+        video_path (str | Path): Path to input video file
+        output_path (str | Path): Path to output video file
+    Returns:
+        Path: Path to the optimized video file
+    """
+    config = TranscodeConfig(
+        preset='fast',  # Faster encoding for web optimization
+        crf=23,  # Better quality for web
+        movflags='+faststart+frag_keyframe+empty_moov',  # Advanced web optimization
+    )
+    return transcode_video(video_path, output_path, config)
+async def atranscode_video(
+    input_path: str | Path, output_path: str | Path, config: Optional[TranscodeConfig] = None
+) -> Path:
+    """
+    Async version of transcode_video.
+    Args:
+        input_path (str | Path): Path to input video file
+        output_path (str | Path): Path to output video file
+        config (Optional[TranscodeConfig]): Transcoding configuration
+    Returns:
+        Path: Path to the transcoded video file
+    """
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, transcode_video, input_path, output_path, config)
+def transcode_batch(
+    video_paths: list[Path], output_dir: Path, config: Optional[TranscodeConfig] = None, max_workers: int = 4
+) -> list[Path]:
+    """
+    Process multiple videos concurrently.
+    Args:
+        video_paths (list[Path]): List of input video file paths
+        output_dir (Path): Directory for output files
+        config (Optional[TranscodeConfig]): Transcoding configuration
+        max_workers (int): Maximum number of concurrent workers
+    Returns:
+        list[Path]: List of paths to transcoded video files
+    """
+    import concurrent.futures
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    def process_video(video_path):
+        output_path = output_dir / f'{video_path.stem}_transcoded.mp4'
+        return transcode_video(video_path, output_path, config)
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+        results = list(executor.map(process_video, video_paths))
+    return results

synapse_sdk/utils/{file.py → file.py.backup} RENAMED Viewed

@@ -7,6 +7,7 @@ import operator
 import zipfile
 from functools import reduce
 from pathlib import Path
+from typing import IO, Any, Callable
 import aiohttp
 import requests
@@ -16,6 +17,39 @@ from synapse_sdk.utils.network import clean_url
 from synapse_sdk.utils.string import hash_text
+def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
+    """
+    Read a file in chunks for efficient memory usage during file processing.
+    This function is particularly useful for large files or when you need to process
+    files in chunks, such as for uploading or hashing.
+    Args:
+        file_path (str | Path): Path to the file to read
+        chunk_size (int, optional): Size of each chunk in bytes. Defaults to 50MB (1024 * 1024 * 50)
+    Yields:
+        bytes: File content chunks
+    Raises:
+        FileNotFoundError: If the file doesn't exist
+        PermissionError: If the file can't be read due to permissions
+        OSError: If there's an OS-level error reading the file
+    Example:
+        ```python
+        from synapse_sdk.utils.file import read_file_in_chunks
+        # Read a file in 10MB chunks
+        for chunk in read_file_in_chunks('large_file.bin', chunk_size=1024*1024*10):
+            process_chunk(chunk)
+        ```
+    """
+    with open(file_path, 'rb') as file:
+        while chunk := file.read(chunk_size):
+            yield chunk
 def download_file(url, path_download, name=None, coerce=None, use_cached=True):
     chunk_size = 1024 * 1024 * 50
     cleaned_url = clean_url(url)  # remove query params and fragment
@@ -150,6 +184,49 @@ def calculate_checksum(file_path, prefix=''):
     return checksum
+def get_checksum_from_file(file: IO[Any], digest_mod: Callable[[], Any] = hashlib.sha1) -> str:
+    """
+    Calculate checksum for a file-like object.
+    Args:
+        file (IO[Any]): File-like object with read() method that supports reading in chunks
+        digest_mod (Callable[[], Any]): Hash algorithm from hashlib (defaults to hashlib.sha1)
+    Returns:
+        str: Hexadecimal digest of the file contents
+    Example:
+        ```python
+        import hashlib
+        from io import BytesIO
+        from synapse_sdk.utils.file import get_checksum_from_file
+        # With BytesIO
+        data = BytesIO(b'Hello, world!')
+        checksum = get_checksum_from_file(data)
+        # With different hash algorithm
+        checksum = get_checksum_from_file(data, digest_mod=hashlib.sha256)
+        ```
+    """
+    digest = digest_mod()
+    chunk_size = 4096
+    # Reset file pointer to beginning if possible
+    if hasattr(file, 'seek'):
+        file.seek(0)
+    while True:
+        chunk = file.read(chunk_size)
+        if not chunk:
+            break
+        if isinstance(chunk, str):
+            chunk = chunk.encode('utf-8')
+        digest.update(chunk)
+    return digest.hexdigest()
 def archive(input_path, output_path, append=False):
     input_path = Path(input_path)
     output_path = Path(output_path)

synapse_sdk/utils/network.py CHANGED Viewed

@@ -1,5 +1,277 @@
+import asyncio
+import queue as queue_module
+import re
+import ssl
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from typing import Any, Dict, Generator, Optional
 from urllib.parse import urlparse, urlunparse
+import requests
+from synapse_sdk.clients.exceptions import ClientError
+@dataclass
+class StreamLimits:
+    """Configuration for streaming limits."""
+    max_messages: int = 10000
+    max_lines: int = 50000
+    max_bytes: int = 50 * 1024 * 1024  # 50MB
+    max_message_size: int = 10240  # 10KB
+    queue_size: int = 1000
+    exception_queue_size: int = 10
+def validate_resource_id(resource_id: Any, resource_name: str = 'resource') -> str:
+    """Validate resource ID to prevent injection attacks."""
+    if not resource_id:
+        raise ClientError(400, f'{resource_name} ID cannot be empty')
+    # Allow numeric IDs and UUID formats
+    id_str = str(resource_id)
+    if not re.match(r'^[a-zA-Z0-9\-_]+$', id_str):
+        raise ClientError(400, f'Invalid {resource_name} ID format')
+    if len(id_str) > 100:
+        raise ClientError(400, f'{resource_name} ID too long')
+    return id_str
+def validate_timeout(timeout: Any, max_timeout: int = 300) -> float:
+    """Validate timeout value with bounds checking."""
+    if not isinstance(timeout, (int, float)) or timeout <= 0:
+        raise ClientError(400, 'Timeout must be a positive number')
+    if timeout > max_timeout:
+        raise ClientError(400, f'Timeout cannot exceed {max_timeout} seconds')
+    return float(timeout)
+def sanitize_error_message(error_msg: str, context: str = '') -> str:
+    """Sanitize error messages to prevent information disclosure."""
+    sanitized = str(error_msg)[:100]
+    # Remove any potential sensitive information
+    sanitized = re.sub(r'["\']([^"\']*)["\']', '"[REDACTED]"', sanitized)
+    if context:
+        return f'{context}: {sanitized}'
+    return sanitized
+def http_to_websocket_url(url: str) -> str:
+    """Convert HTTP/HTTPS URL to WebSocket URL safely."""
+    try:
+        parsed = urlparse(url)
+        if parsed.scheme == 'http':
+            ws_scheme = 'ws'
+        elif parsed.scheme == 'https':
+            ws_scheme = 'wss'
+        else:
+            raise ClientError(400, f'Invalid URL scheme: {parsed.scheme}')
+        ws_url = urlunparse((ws_scheme, parsed.netloc, parsed.path, parsed.params, parsed.query, parsed.fragment))
+        return ws_url
+    except Exception as e:
+        raise ClientError(400, f'Invalid URL format: {str(e)[:50]}')
+def check_library_available(library_name: str) -> bool:
+    """Check if optional library is available."""
+    try:
+        __import__(library_name)
+        return True
+    except ImportError:
+        return False
+class WebSocketStreamManager:
+    """Manages secure WebSocket streaming with rate limiting and error handling."""
+    def __init__(self, thread_pool: ThreadPoolExecutor, limits: Optional[StreamLimits] = None):
+        self.thread_pool = thread_pool
+        self.limits = limits or StreamLimits()
+    def stream_logs(
+        self, ws_url: str, headers: Dict[str, str], timeout: float, context: str
+    ) -> Generator[str, None, None]:
+        """Stream logs from WebSocket with proper error handling and cleanup."""
+        if not check_library_available('websockets'):
+            raise ClientError(500, 'websockets library not available for WebSocket connections')
+        try:
+            import websockets
+            # Use bounded queues to prevent memory exhaustion
+            message_queue = queue_module.Queue(maxsize=self.limits.queue_size)
+            exception_queue = queue_module.Queue(maxsize=self.limits.exception_queue_size)
+            async def websocket_client():
+                try:
+                    # Add SSL verification and proper timeouts
+                    connect_kwargs = {
+                        'extra_headers': headers,
+                        'close_timeout': timeout,
+                        'ping_timeout': timeout,
+                        'ping_interval': timeout // 2,
+                    }
+                    # For secure connections, add SSL context
+                    if ws_url.startswith('wss://'):
+                        ssl_context = ssl.create_default_context()
+                        ssl_context.check_hostname = True
+                        ssl_context.verify_mode = ssl.CERT_REQUIRED
+                        connect_kwargs['ssl'] = ssl_context
+                    async with websockets.connect(ws_url, **connect_kwargs) as websocket:
+                        message_count = 0
+                        async for message in websocket:
+                            message_count += 1
+                            if message_count > self.limits.max_messages:
+                                exception_queue.put_nowait(ClientError(429, f'Message limit exceeded for {context}'))
+                                break
+                            # Validate message size
+                            if len(str(message)) > self.limits.max_message_size:
+                                continue
+                            try:
+                                message_queue.put_nowait(f'{message}\n')
+                            except queue_module.Full:
+                                exception_queue.put_nowait(ClientError(429, f'Message queue full for {context}'))
+                                break
+                        message_queue.put_nowait(None)  # Signal end
+                except websockets.exceptions.ConnectionClosed:
+                    exception_queue.put_nowait(ClientError(503, f'WebSocket connection closed for {context}'))
+                except asyncio.TimeoutError:
+                    exception_queue.put_nowait(ClientError(408, f'WebSocket timed out for {context}'))
+                except Exception as e:
+                    sanitized_error = sanitize_error_message(str(e), context)
+                    exception_queue.put_nowait(ClientError(500, sanitized_error))
+            # Use thread pool instead of raw threading
+            future = self.thread_pool.submit(lambda: asyncio.run(websocket_client()))
+            # Yield messages with proper cleanup
+            try:
+                while True:
+                    # Check for exceptions first
+                    try:
+                        exception = exception_queue.get_nowait()
+                        raise exception
+                    except queue_module.Empty:
+                        pass
+                    # Get message with timeout
+                    try:
+                        message = message_queue.get(timeout=1.0)
+                        if message is None:  # End signal
+                            break
+                        yield message
+                    except queue_module.Empty:
+                        # Check if future is done
+                        if future.done():
+                            try:
+                                future.result()  # This will raise any exception
+                                break  # Normal completion
+                            except Exception:
+                                break  # Error already in queue
+                        continue
+            finally:
+                # Cleanup: cancel future if still running
+                if not future.done():
+                    future.cancel()
+        except ImportError:
+            raise ClientError(500, 'websockets library not available for WebSocket connections')
+        except Exception as e:
+            if isinstance(e, ClientError):
+                raise
+            sanitized_error = sanitize_error_message(str(e), context)
+            raise ClientError(500, sanitized_error)
+class HTTPStreamManager:
+    """Manages HTTP streaming with rate limiting and proper resource cleanup."""
+    def __init__(self, requests_session: requests.Session, limits: Optional[StreamLimits] = None):
+        self.requests_session = requests_session
+        self.limits = limits or StreamLimits()
+    def stream_logs(
+        self, url: str, headers: Dict[str, str], timeout: tuple, context: str
+    ) -> Generator[str, None, None]:
+        """Stream logs from HTTP endpoint with proper error handling and cleanup."""
+        response = None
+        try:
+            # Use timeout for streaming to prevent hanging
+            response = self.requests_session.get(url, headers=headers, stream=True, timeout=timeout)
+            response.raise_for_status()
+            # Set up streaming with timeout and size limits
+            line_count = 0
+            total_bytes = 0
+            try:
+                for line in response.iter_lines(decode_unicode=True, chunk_size=1024):
+                    if line:
+                        line_count += 1
+                        total_bytes += len(line.encode('utf-8'))
+                        # Rate limiting checks
+                        if line_count > self.limits.max_lines:
+                            raise ClientError(429, f'Line limit exceeded for {context}')
+                        if total_bytes > self.limits.max_bytes:
+                            raise ClientError(429, f'Size limit exceeded for {context}')
+                        # Validate line size
+                        if len(line) > self.limits.max_message_size:
+                            continue
+                        yield f'{line}\n'
+            except requests.exceptions.ChunkedEncodingError:
+                raise ClientError(503, f'Log stream interrupted for {context}')
+            except requests.exceptions.ReadTimeout:
+                raise ClientError(408, f'Log stream timed out for {context}')
+        except requests.exceptions.ConnectTimeout:
+            raise ClientError(408, f'Failed to connect to log stream for {context}')
+        except requests.exceptions.ReadTimeout:
+            raise ClientError(408, f'Log stream read timeout for {context}')
+        except requests.exceptions.ConnectionError as e:
+            if 'Connection refused' in str(e):
+                raise ClientError(503, f'Agent connection refused for {context}')
+            else:
+                sanitized_error = sanitize_error_message(str(e), context)
+                raise ClientError(503, f'Agent connection error: {sanitized_error}')
+        except requests.exceptions.HTTPError as e:
+            if hasattr(e.response, 'status_code'):
+                status_code = e.response.status_code
+            else:
+                status_code = 500
+            raise ClientError(status_code, f'HTTP error streaming logs for {context}')
+        except Exception as e:
+            if isinstance(e, ClientError):
+                raise
+            sanitized_error = sanitize_error_message(str(e), context)
+            raise ClientError(500, sanitized_error)
+        finally:
+            # Ensure response is properly closed
+            if response is not None:
+                try:
+                    response.close()
+                except Exception:
+                    pass  # Ignore cleanup errors
 def clean_url(url, remove_query_params=True, remove_fragment=True):
     parsed = urlparse(url)

synapse_sdk/utils/storage/__init__.py CHANGED Viewed

@@ -20,7 +20,11 @@ def get_storage(connection_param: str | dict):
     else:
         storage_scheme = urlparse(connection_param).scheme
-    assert storage_scheme in STORAGE_PROVIDERS.keys(), _('Storage provider not supported.')
+    assert storage_scheme in STORAGE_PROVIDERS.keys(), _(
+        f'Storage provider not supported. Got scheme: {storage_scheme}. '
+        f'Valid schemes: {", ".join(STORAGE_PROVIDERS.keys())}. '
+        f'Full connection_param: {connection_param}'
+    )
     return STORAGE_PROVIDERS[storage_scheme](connection_param)
@@ -29,7 +33,7 @@ def get_pathlib(storage_config: str | dict, path_root: str) -> Path:
     Args:
         storage_config (str | dict): The storage config by synapse-backend storage api.
-        path_root (str): The path root.
+        path_root (ㅔstr): The path root.
     Returns:
         pathlib.Path: The pathlib object.

synapse-sdk 1.0.0b5__py3-none-any.whl → 2025.12.3__py3-none-any.whl

synapse-sdk 1.0.0b5py3-none-any.whl → 2025.12.3py3-none-any.whl