PyPI - synapse-sdk - Versions diffs - 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl - Mend

synapse-sdk 1.0.0a23py3-none-any.whl → 2025.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

synapse_sdk/utils/file/download.py ADDED Viewed

@@ -0,0 +1,385 @@
+import asyncio
+import operator
+from functools import reduce
+from pathlib import Path
+import aiohttp
+import requests
+from synapse_sdk.utils.network import clean_url
+from synapse_sdk.utils.string import hash_text
+from .io import get_temp_path
+def download_file(url, path_download, name=None, coerce=None, use_cached=True):
+    """Download a file from a URL to a specified directory.
+    This function downloads a file from a URL with support for caching, custom naming,
+    and optional path transformation. Downloads are streamed in chunks for memory efficiency.
+    Args:
+        url (str): The URL to download from. Query parameters and fragments are cleaned
+            before generating the cached filename.
+        path_download (str | Path): Directory path where the file will be saved.
+        name (str, optional): Custom filename for the downloaded file (without extension).
+            If provided, caching is disabled. If None, a hash of the URL is used as the name.
+        coerce (callable, optional): A function to transform the downloaded file path.
+            Called with the Path object after download completes.
+            Example: lambda p: str(p) to convert Path to string
+        use_cached (bool): If True (default), skip download if file already exists.
+            Automatically set to False when a custom name is provided.
+    Returns:
+        Path | Any: Path object pointing to the downloaded file, or the result of
+            coerce(path) if a coerce function was provided.
+    Raises:
+        requests.HTTPError: If the HTTP request fails (e.g., 404, 500 errors).
+        IOError: If file write fails due to permissions or disk space.
+    Examples:
+        Basic download with caching:
+        >>> path = download_file('https://example.com/image.jpg', '/tmp/downloads')
+        >>> print(path)  # /tmp/downloads/abc123def456.jpg (hash-based name)
+        Custom filename without caching:
+        >>> path = download_file(
+        ...     'https://example.com/data.json',
+        ...     '/tmp/downloads',
+        ...     name='my_data'
+        ... )
+        >>> print(path)  # /tmp/downloads/my_data.json
+        With path coercion to string:
+        >>> path_str = download_file(
+        ...     'https://example.com/file.txt',
+        ...     '/tmp',
+        ...     coerce=str
+        ... )
+        >>> print(type(path_str))  # <class 'str'>
+    Note:
+        - Downloads are streamed in 50MB chunks for memory efficiency
+        - URL is cleaned (query params removed) before generating cached filename
+        - File extension is preserved from the cleaned URL
+        - Existing files are reused when use_cached=True
+    """
+    chunk_size = 1024 * 1024 * 50
+    cleaned_url = clean_url(url)  # remove query params and fragment
+    if name:
+        use_cached = False
+    else:
+        name = hash_text(cleaned_url)
+    name += Path(cleaned_url).suffix
+    path = Path(path_download) / name
+    if not use_cached or not path.is_file():
+        response = requests.get(url, allow_redirects=True, stream=True)
+        response.raise_for_status()
+        with path.open('wb') as file:
+            for chunk in response.iter_content(chunk_size=chunk_size):
+                file.write(chunk)
+    if coerce:
+        path = coerce(path)
+    return path
+def files_url_to_path(files, coerce=None, file_field=None):
+    """Convert file URLs to local file paths by downloading them.
+    This function downloads files from URLs and replaces the URLs with local paths
+    in the provided dictionary. Supports both flat dictionaries and nested structures.
+    Args:
+        files (dict): Dictionary containing file URLs or file objects.
+            - If values are strings: treated as URLs and replaced with local paths
+            - If values are dicts with 'url' key: 'url' is replaced with 'path'
+        coerce (callable, optional): Function to transform downloaded paths.
+            Applied to each downloaded file path.
+        file_field (str, optional): Specific field name to process. If provided,
+            only this field is processed. If None, all fields are processed.
+    Returns:
+        None: Modifies the files dictionary in-place.
+    Examples:
+        Simple URL replacement:
+        >>> files = {'image': 'https://example.com/img.jpg'}
+        >>> files_url_to_path(files)
+        >>> print(files['image'])  # Path('/tmp/media/abc123.jpg')
+        With nested objects:
+        >>> files = {'video': {'url': 'https://example.com/vid.mp4', 'size': 1024}}
+        >>> files_url_to_path(files)
+        >>> print(files['video'])  # {'path': Path('/tmp/media/def456.mp4'), 'size': 1024}
+        Process specific field only:
+        >>> files = {'image': 'https://ex.com/a.jpg', 'doc': 'https://ex.com/b.pdf'}
+        >>> files_url_to_path(files, file_field='image')
+        >>> # Only 'image' is downloaded, 'doc' remains as URL
+        With path coercion:
+        >>> files = {'data': 'https://example.com/data.csv'}
+        >>> files_url_to_path(files, coerce=str)
+        >>> print(type(files['data']))  # <class 'str'>
+    Note:
+        - Downloads to temporary media directory: get_temp_path('media')
+        - Creates download directory if it doesn't exist
+        - Modifies input dictionary in-place
+        - Uses caching by default (via download_file)
+    """
+    path_download = get_temp_path('media')
+    path_download.mkdir(parents=True, exist_ok=True)
+    if file_field:
+        files[file_field] = download_file(files[file_field], path_download, coerce=coerce)
+    else:
+        for file_name in files:
+            if isinstance(files[file_name], str):
+                files[file_name] = download_file(files[file_name], path_download, coerce=coerce)
+            else:
+                files[file_name]['path'] = download_file(files[file_name].pop('url'), path_download, coerce=coerce)
+def files_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False, is_async=False):
+    """Convert file URLs to paths for multiple objects with nested field support.
+    This function processes one or more objects, extracting file URLs from specified
+    nested fields and replacing them with local file paths. Supports both synchronous
+    and asynchronous operation.
+    Args:
+        objs (dict | list): Single object or list of objects to process.
+            If is_list=False, can be a single dict.
+            If is_list=True, should be a list of dicts.
+        files_fields (list[str]): List of field paths to process.
+            Supports dot notation for nested fields (e.g., 'data.files', 'meta.image').
+        coerce (callable, optional): Function to transform downloaded paths.
+        is_list (bool): If True, objs is treated as a list. If False, objs is wrapped
+            in a list for processing. Default False.
+        is_async (bool): If True, uses async download (afiles_url_to_path_from_objs).
+            If False, uses synchronous download. Default False.
+    Returns:
+        None: Modifies objects in-place, replacing URLs with local paths.
+    Examples:
+        Single object with simple field:
+        >>> obj = {'files': {'image': 'https://example.com/img.jpg'}}
+        >>> files_url_to_path_from_objs(obj, files_fields=['files'])
+        >>> print(obj['files']['image'])  # Path('/tmp/media/abc123.jpg')
+        Multiple objects with nested fields:
+        >>> objs = [
+        ...     {'data': {'files': {'img': 'https://ex.com/1.jpg'}}},
+        ...     {'data': {'files': {'img': 'https://ex.com/2.jpg'}}}
+        ... ]
+        >>> files_url_to_path_from_objs(objs, files_fields=['data.files'], is_list=True)
+        >>> # Both images are downloaded and URLs replaced with paths
+        Async download for better performance:
+        >>> objs = [{'files': {'a': 'url1', 'b': 'url2'}} for _ in range(10)]
+        >>> files_url_to_path_from_objs(
+        ...     objs,
+        ...     files_fields=['files'],
+        ...     is_list=True,
+        ...     is_async=True
+        ... )
+        >>> # All files downloaded concurrently
+        Multiple field paths:
+        >>> obj = {
+        ...     'images': {'photo': 'https://ex.com/photo.jpg'},
+        ...     'videos': {'clip': 'https://ex.com/video.mp4'}
+        ... }
+        >>> files_url_to_path_from_objs(obj, files_fields=['images', 'videos'])
+        >>> # Both images and videos fields are processed
+    Note:
+        - Silently skips missing fields (KeyError is caught and ignored)
+        - Supports dot notation for nested field access
+        - Async mode (is_async=True) provides better performance for multiple files
+        - Commonly used with API responses containing file URLs
+        - Used by BaseClient._list() with url_conversion parameter
+    """
+    if is_async:
+        asyncio.run(afiles_url_to_path_from_objs(objs, files_fields, coerce=coerce, is_list=is_list))
+    else:
+        if not is_list:
+            objs = [objs]
+        for obj in objs:
+            for files_field in files_fields:
+                try:
+                    files = reduce(operator.getitem, files_field.split('.'), obj)
+                    if isinstance(files, str):
+                        files_url_to_path(obj, coerce=coerce, file_field=files_field)
+                    else:
+                        files_url_to_path(files, coerce=coerce)
+                except KeyError:
+                    pass
+async def adownload_file(url, path_download, name=None, coerce=None, use_cached=True):
+    """Asynchronously download a file from a URL to a specified directory.
+    Async version of download_file() using aiohttp for concurrent downloads.
+    All parameters and behavior are identical to download_file().
+    Args:
+        url (str): The URL to download from.
+        path_download (str | Path): Directory path where the file will be saved.
+        name (str, optional): Custom filename (without extension).
+        coerce (callable, optional): Function to transform the downloaded file path.
+        use_cached (bool): If True (default), skip download if file exists.
+    Returns:
+        Path | Any: Path to downloaded file, or coerce(path) if provided.
+    Examples:
+        Basic async download:
+        >>> path = await adownload_file('https://example.com/large.zip', '/tmp')
+        Multiple concurrent downloads:
+        >>> urls = ['https://ex.com/1.jpg', 'https://ex.com/2.jpg']
+        >>> paths = await asyncio.gather(*[
+        ...     adownload_file(url, '/tmp') for url in urls
+        ... ])
+    Note:
+        - Uses aiohttp.ClientSession for async HTTP requests
+        - Downloads in 50MB chunks for memory efficiency
+        - Recommended for downloading multiple files concurrently
+    """
+    chunk_size = 1024 * 1024 * 50
+    cleaned_url = clean_url(url)  # remove query params and fragment
+    if name:
+        use_cached = False
+    else:
+        name = hash_text(cleaned_url)
+    name += Path(cleaned_url).suffix
+    path = Path(path_download) / name
+    if not use_cached or not path.is_file():
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url) as response:
+                with path.open('wb') as file:
+                    while chunk := await response.content.read(chunk_size):
+                        file.write(chunk)
+    if coerce:
+        path = coerce(path)
+    return path
+async def afiles_url_to_path(files, coerce=None):
+    """Asynchronously convert file URLs to local paths by downloading them.
+    Async version of files_url_to_path() for concurrent file downloads.
+    Processes all files in the dictionary concurrently for better performance.
+    Args:
+        files (dict): Dictionary containing file URLs or file objects.
+        coerce (callable, optional): Function to transform downloaded paths.
+    Returns:
+        None: Modifies the files dictionary in-place.
+    Examples:
+        Download multiple files concurrently:
+        >>> files = {
+        ...     'image1': 'https://ex.com/1.jpg',
+        ...     'image2': 'https://ex.com/2.jpg',
+        ...     'image3': 'https://ex.com/3.jpg'
+        ... }
+        >>> await afiles_url_to_path(files)
+        >>> # All 3 files downloaded concurrently
+        With nested file objects:
+        >>> files = {
+        ...     'thumb': {'url': 'https://ex.com/thumb.jpg'},
+        ...     'full': {'url': 'https://ex.com/full.jpg'}
+        ... }
+        >>> await afiles_url_to_path(files)
+        >>> print(files['thumb']['path'])  # Path object
+    Note:
+        - All files are downloaded concurrently using asyncio
+        - More efficient than synchronous version for multiple files
+        - Does not support file_field parameter (processes all fields)
+    """
+    path_download = get_temp_path('media')
+    path_download.mkdir(parents=True, exist_ok=True)
+    for file_name in files:
+        if isinstance(files[file_name], str):
+            files[file_name] = await adownload_file(files[file_name], path_download, coerce=coerce)
+        else:
+            files[file_name]['path'] = await adownload_file(files[file_name].pop('url'), path_download, coerce=coerce)
+async def afiles_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False):
+    """Asynchronously convert file URLs to paths for multiple objects.
+    Async version of files_url_to_path_from_objs() that downloads all files
+    concurrently using asyncio.gather() for maximum performance.
+    Args:
+        objs (dict | list): Single object or list of objects to process.
+        files_fields (list[str]): List of field paths to process (supports dot notation).
+        coerce (callable, optional): Function to transform downloaded paths.
+        is_list (bool): If True, objs is treated as a list. Default False.
+    Returns:
+        None: Modifies objects in-place, replacing URLs with local paths.
+    Examples:
+        Download files from multiple objects concurrently:
+        >>> objs = [
+        ...     {'files': {'img': 'https://ex.com/1.jpg'}},
+        ...     {'files': {'img': 'https://ex.com/2.jpg'}},
+        ...     {'files': {'img': 'https://ex.com/3.jpg'}}
+        ... ]
+        >>> await afiles_url_to_path_from_objs(objs, ['files'], is_list=True)
+        >>> # All 3 images downloaded concurrently
+        Process large dataset efficiently:
+        >>> # 100 objects with multiple files each
+        >>> objs = [{'data': {'files': {...}}} for _ in range(100)]
+        >>> await afiles_url_to_path_from_objs(
+        ...     objs,
+        ...     files_fields=['data.files'],
+        ...     is_list=True
+        ... )
+        >>> # All files downloaded in parallel, much faster than sync version
+    Note:
+        - All file downloads happen concurrently using asyncio.gather()
+        - Significantly faster than synchronous version for large datasets
+        - Ideal for processing API responses with many file URLs
+        - Used internally when is_async=True in files_url_to_path_from_objs()
+    """
+    if not is_list:
+        objs = [objs]
+    tasks = []
+    for obj in objs:
+        for files_field in files_fields:
+            try:
+                files = reduce(operator.getitem, files_field.split('.'), obj)
+                tasks.append(afiles_url_to_path(files, coerce=coerce))
+            except KeyError:
+                pass
+    await asyncio.gather(*tasks)

synapse_sdk/utils/file/encoding.py ADDED Viewed

@@ -0,0 +1,40 @@
+import base64
+import mimetypes
+from pathlib import Path
+def convert_file_to_base64(file_path):
+    """
+    Convert a file to base64 using pathlib.
+    Args:
+        file_path (str): Path to the file to convert
+    Returns:
+        str: Base64 encoded string of the file contents
+    """
+    # FIXME base64 is sent sometimes.
+    if file_path.startswith('data:'):
+        return file_path
+    # Convert string path to Path object
+    path = Path(file_path)
+    try:
+        # Read binary content of the file
+        binary_content = path.read_bytes()
+        # Convert to base64
+        base64_encoded = base64.b64encode(binary_content).decode('utf-8')
+        # Get the MIME type of the file
+        mime_type, _ = mimetypes.guess_type(path)
+        assert mime_type is not None, 'MIME type cannot be guessed'
+        # Convert bytes to string for readable output
+        return f'data:{mime_type};base64,{base64_encoded}'
+    except FileNotFoundError:
+        raise FileNotFoundError(f'File not found: {file_path}')
+    except Exception as e:
+        raise Exception(f'Error converting file to base64: {str(e)}')

synapse_sdk/utils/file/io.py ADDED Viewed

@@ -0,0 +1,22 @@
+import json
+from pathlib import Path
+import yaml
+def get_dict_from_file(file_path):
+    if isinstance(file_path, str):
+        file_path = Path(file_path)
+    with open(file_path) as f:
+        if file_path.suffix == '.yaml':
+            return yaml.safe_load(f)
+        else:
+            return json.load(f)
+def get_temp_path(sub_path=None):
+    path = Path('/tmp/datamaker')
+    if sub_path:
+        path = path / sub_path
+    return path

synapse_sdk/utils/file/upload.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""File upload utilities for HTTP requests.
+This module provides utilities for processing files for upload in HTTP requests,
+with proper type definitions for file objects used in the requests library.
+"""
+from io import BufferedReader
+from pathlib import Path
+from typing import BinaryIO, TypeAlias, Union
+# Type definitions for HTTP request file objects
+# Based on requests library file parameter structure
+# File tuple format: (filename, file_content)
+# This is what we actually use when processing Path objects
+FileTuple: TypeAlias = tuple[str, BinaryIO]
+# Combined file type for requests library
+# - bytes: Raw content (for chunked uploads)
+# - FileTuple: (filename, file_handle) for Path objects
+RequestsFile: TypeAlias = Union[bytes, FileTuple]
+# Files dictionary mapping field names to file objects
+FilesDict: TypeAlias = dict[str, RequestsFile]
+class FileUploadError(Exception):
+    """Base exception for file upload errors."""
+    pass
+class FileValidationError(FileUploadError):
+    """Raised when file validation fails."""
+    pass
+class FileProcessingError(FileUploadError):
+    """Raised when file processing fails."""
+    pass
+def process_files_for_upload(files: dict[str, Union[str, Path, bytes, object]]) -> tuple[FilesDict, list[BinaryIO]]:
+    """Process files parameter for upload requests.
+    Converts file paths to file handles suitable for requests library.
+    Supports: str paths, Path objects, UPath objects (cloud storage), and bytes.
+    This function standardizes file inputs into the proper structure for HTTP requests,
+    handling various input types and ensuring proper resource management.
+    Args:
+        files: Dictionary mapping field names to file sources.
+               Supported types:
+               - str: File path (converted to Path)
+               - pathlib.Path: Local file path
+               - upath.UPath: Cloud storage path (GCS, S3, SFTP, etc.)
+               - bytes: Raw file content (e.g., for chunked uploads)
+               Example:
+                   {'file': Path('/tmp/test.txt')}
+                   {'document': 'uploads/doc.pdf'}
+                   {'data': b'raw content'}
+    Returns:
+        tuple[FilesDict, list[BinaryIO]]: A tuple containing:
+            - processed_files: Dictionary ready for requests library
+              Maps field names to RequestsFile objects (tuples of filename + file handle)
+            - opened_file_handles: List of opened file objects that need to be closed
+              Caller is responsible for closing these handles after the request
+    Raises:
+        FileValidationError: If file value is None or has invalid type
+        FileProcessingError: If file cannot be opened or read
+    Example:
+        >>> files = {'document': Path('/tmp/report.pdf'), 'metadata': b'{"version": 1}'}
+        >>> processed, handles = process_files_for_upload(files)
+        >>> try:
+        ...     response = requests.post(url, files=processed)
+        ... finally:
+        ...     for handle in handles:
+        ...         handle.close()
+    Note:
+        - String paths are automatically converted to pathlib.Path objects
+        - Cloud storage paths (UPath) are supported via duck typing (has 'open' and 'name' attributes)
+        - Bytes are passed through unchanged (useful for chunked uploads)
+        - Opened file handles must be closed by the caller to prevent resource leaks
+        - If file opening fails, any previously opened handles are automatically closed
+    """
+    processed_files: FilesDict = {}
+    opened_file_handles: list[BinaryIO] = []
+    for field_name, file_value in files.items():
+        # 1. Validate: Reject None values with clear error message
+        if file_value is None:
+            raise FileValidationError(
+                f"File field '{field_name}' cannot be None. "
+                f'Provide a valid file path (str or Path), UPath object, or bytes.'
+            )
+        # 2. Handle bytes directly (for chunked uploads or raw content)
+        if isinstance(file_value, bytes):
+            processed_files[field_name] = file_value
+            continue
+        # 3. Convert string to Path for uniform handling
+        if isinstance(file_value, str):
+            file_value = Path(file_value)
+        # 4. Handle Path-like objects (pathlib.Path and upath.UPath)
+        #    Using duck typing to support both standard Path and cloud storage UPath
+        if hasattr(file_value, 'open') and hasattr(file_value, 'name'):
+            try:
+                # Open file in binary read mode
+                opened_file: BinaryIO = file_value.open(mode='rb')
+                # Extract filename, use 'file' as fallback if name is empty
+                filename = file_value.name if file_value.name else 'file'
+                # Create file tuple: (filename, file_handle)
+                processed_files[field_name] = (filename, opened_file)
+                # Track opened handle for cleanup
+                opened_file_handles.append(opened_file)
+            except Exception as e:
+                # Clean up already opened files before raising
+                for f in opened_file_handles:
+                    try:
+                        f.close()
+                    except Exception:
+                        pass  # Ignore errors during cleanup
+                raise FileProcessingError(f"Failed to open file '{file_value}' for field '{field_name}': {e}") from e
+        else:
+            # 5. Unsupported type - provide clear error message
+            raise FileValidationError(
+                f"File field '{field_name}' has unsupported type '{type(file_value).__name__}'. "
+                f'Supported types: str (file path), pathlib.Path, upath.UPath, or bytes. '
+                f'Got: {file_value!r}'
+            )
+    return processed_files, opened_file_handles
+def close_file_handles(handles: list[BinaryIO]) -> None:
+    """Safely close multiple file handles, ignoring errors.
+    Args:
+        handles: List of file handles to close
+    Note:
+        Errors during closing are silently ignored to ensure all handles
+        are attempted to be closed even if some fail.
+    """
+    for handle in handles:
+        try:
+            handle.close()
+        except Exception:
+            pass  # Ignore errors during cleanup

synapse_sdk/utils/file/video/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+# Video processing utilities
+from .transcode import (
+    FFmpegNotFoundError,
+    TranscodeConfig,
+    TranscodingFailedError,
+    UnsupportedFormatError,
+    VideoTranscodeError,
+    atranscode_video,
+    get_video_info,
+    optimize_for_web,
+    transcode_batch,
+    transcode_video,
+    validate_video_format,
+)
+__all__ = [
+    'TranscodeConfig',
+    'VideoTranscodeError',
+    'UnsupportedFormatError',
+    'FFmpegNotFoundError',
+    'TranscodingFailedError',
+    'transcode_video',
+    'atranscode_video',
+    'get_video_info',
+    'validate_video_format',
+    'optimize_for_web',
+    'transcode_batch',
+]

synapse-sdk 1.0.0a23__py3-none-any.whl → 2025.12.3__py3-none-any.whl

synapse-sdk 1.0.0a23py3-none-any.whl → 2025.12.3py3-none-any.whl