synapse-sdk 1.0.0b22__py3-none-any.whl → 1.0.0b24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (49) hide show
  1. synapse_sdk/devtools/docs/docs/api/clients/ray.md +1 -1
  2. synapse_sdk/devtools/docs/docs/api/index.md +5 -5
  3. synapse_sdk/devtools/docs/docs/features/utils/file.md +415 -0
  4. synapse_sdk/devtools/docs/docs/{api → features}/utils/network.md +1 -1
  5. synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +140 -0
  6. synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +680 -0
  7. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ray.md +1 -1
  8. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/index.md +5 -5
  9. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/index.md +5 -5
  10. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/file.md +415 -0
  11. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/{api → features}/utils/network.md +1 -1
  12. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +138 -0
  13. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/plugins.md +48 -2
  14. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +897 -0
  15. synapse_sdk/devtools/docs/sidebars.ts +11 -10
  16. synapse_sdk/plugins/README.md +934 -0
  17. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +17 -2
  18. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +20 -0
  19. synapse_sdk/plugins/categories/upload/actions/upload/action.py +623 -0
  20. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +221 -0
  21. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  22. synapse_sdk/plugins/categories/upload/actions/upload/models.py +149 -0
  23. synapse_sdk/plugins/categories/upload/actions/upload/run.py +178 -0
  24. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +139 -0
  25. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +6 -1
  26. synapse_sdk/plugins/models.py +13 -7
  27. synapse_sdk/utils/file/__init__.py +39 -0
  28. synapse_sdk/utils/file/archive.py +32 -0
  29. synapse_sdk/utils/file/checksum.py +56 -0
  30. synapse_sdk/utils/file/chunking.py +31 -0
  31. synapse_sdk/utils/file/download.py +124 -0
  32. synapse_sdk/utils/file/encoding.py +40 -0
  33. synapse_sdk/utils/file/io.py +22 -0
  34. synapse_sdk/utils/file/video/__init__.py +29 -0
  35. synapse_sdk/utils/file/video/transcode.py +307 -0
  36. {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/METADATA +2 -1
  37. {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/RECORD +46 -28
  38. synapse_sdk/devtools/docs/docs/api/utils/file.md +0 -195
  39. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/utils/file.md +0 -195
  40. synapse_sdk/plugins/categories/upload/actions/upload.py +0 -1368
  41. /synapse_sdk/devtools/docs/docs/{api → features}/utils/storage.md +0 -0
  42. /synapse_sdk/devtools/docs/docs/{api → features}/utils/types.md +0 -0
  43. /synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/{api → features}/utils/storage.md +0 -0
  44. /synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/{api → features}/utils/types.md +0 -0
  45. /synapse_sdk/utils/{file.py → file.py.backup} +0 -0
  46. {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/WHEEL +0 -0
  47. {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/entry_points.txt +0 -0
  48. {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/licenses/LICENSE +0 -0
  49. {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,139 @@
1
+ import json
2
+ import os
3
+
4
+
5
+ class PathAwareJSONEncoder(json.JSONEncoder):
6
+ """Custom JSON encoder that handles Path objects and datetime objects.
7
+
8
+ Extends the default JSON encoder to properly serialize Path objects
9
+ and datetime objects that are commonly used in upload operations.
10
+
11
+ Supported object types:
12
+ - Path objects (converts to string using __fspath__ or as_posix)
13
+ - Datetime objects (converts using isoformat)
14
+ - All other standard JSON-serializable types
15
+
16
+ Example:
17
+ >>> data = {"path": Path("/tmp/file.txt"), "timestamp": datetime.now()}
18
+ >>> json.dumps(data, cls=PathAwareJSONEncoder)
19
+ '{"path": "/tmp/file.txt", "timestamp": "2023-01-01T12:00:00"}'
20
+ """
21
+
22
+ def default(self, obj):
23
+ if hasattr(obj, '__fspath__'):
24
+ return obj.__fspath__()
25
+ elif hasattr(obj, 'as_posix'):
26
+ return obj.as_posix()
27
+ elif hasattr(obj, 'isoformat'):
28
+ return obj.isoformat()
29
+ return super().default(obj)
30
+
31
+
32
+ class ExcelSecurityConfig:
33
+ """Configuration class for Excel file security limits.
34
+
35
+ Manages security constraints for Excel file processing to prevent
36
+ resource exhaustion and security vulnerabilities. All limits can
37
+ be configured via environment variables.
38
+
39
+ Attributes:
40
+ MAX_FILE_SIZE_MB (int): Maximum file size in megabytes
41
+ MAX_FILE_SIZE_BYTES (int): Maximum file size in bytes
42
+ MAX_MEMORY_USAGE_MB (int): Maximum memory usage in megabytes
43
+ MAX_MEMORY_USAGE_BYTES (int): Maximum memory usage in bytes
44
+ MAX_ROWS (int): Maximum number of rows allowed
45
+ MAX_COLUMNS (int): Maximum number of columns allowed
46
+ MAX_FILENAME_LENGTH (int): Maximum filename length
47
+ MAX_COLUMN_NAME_LENGTH (int): Maximum column name length
48
+ MAX_METADATA_VALUE_LENGTH (int): Maximum metadata value length
49
+
50
+ Environment Variables:
51
+ EXCEL_MAX_FILE_SIZE_MB: Override default file size limit (default: 10)
52
+ EXCEL_MAX_MEMORY_MB: Override default memory limit (default: 30)
53
+ EXCEL_MAX_ROWS: Override default row limit (default: 10000)
54
+ EXCEL_MAX_COLUMNS: Override default column limit (default: 50)
55
+ EXCEL_MAX_FILENAME_LENGTH: Override filename length limit (default: 255)
56
+ EXCEL_MAX_COLUMN_NAME_LENGTH: Override column name length (default: 100)
57
+ EXCEL_MAX_METADATA_VALUE_LENGTH: Override metadata value length (default: 1000)
58
+ """
59
+
60
+ def __init__(self):
61
+ self.MAX_FILE_SIZE_MB = int(os.getenv('EXCEL_MAX_FILE_SIZE_MB', '10'))
62
+ self.MAX_FILE_SIZE_BYTES = self.MAX_FILE_SIZE_MB * 1024 * 1024
63
+
64
+ self.MAX_MEMORY_USAGE_MB = int(os.getenv('EXCEL_MAX_MEMORY_MB', '30'))
65
+ self.MAX_MEMORY_USAGE_BYTES = self.MAX_MEMORY_USAGE_MB * 1024 * 1024
66
+
67
+ self.MAX_ROWS = int(os.getenv('EXCEL_MAX_ROWS', '10000'))
68
+ self.MAX_COLUMNS = int(os.getenv('EXCEL_MAX_COLUMNS', '50'))
69
+
70
+ self.MAX_FILENAME_LENGTH = int(os.getenv('EXCEL_MAX_FILENAME_LENGTH', '255'))
71
+ self.MAX_COLUMN_NAME_LENGTH = int(os.getenv('EXCEL_MAX_COLUMN_NAME_LENGTH', '100'))
72
+ self.MAX_METADATA_VALUE_LENGTH = int(os.getenv('EXCEL_MAX_METADATA_VALUE_LENGTH', '1000'))
73
+
74
+
75
+ class ExcelMetadataUtils:
76
+ """Utility class for Excel metadata processing and validation.
77
+
78
+ Provides helper methods for validating and processing Excel metadata
79
+ while respecting security constraints defined in ExcelSecurityConfig.
80
+
81
+ Args:
82
+ config (ExcelSecurityConfig): Security configuration instance
83
+
84
+ Example:
85
+ >>> config = ExcelSecurityConfig()
86
+ >>> utils = ExcelMetadataUtils(config)
87
+ >>> safe_value = utils.validate_and_truncate_string("long text", 10)
88
+ >>> is_valid = utils.is_valid_filename_length("file.xlsx")
89
+ """
90
+
91
+ def __init__(self, config: ExcelSecurityConfig):
92
+ self.config = config
93
+
94
+ def validate_and_truncate_string(self, value: str, max_length: int) -> str:
95
+ """Validate and truncate string to maximum length.
96
+
97
+ Converts non-string values to strings, trims whitespace, and
98
+ truncates to the specified maximum length if necessary.
99
+
100
+ Args:
101
+ value (str): Value to validate and truncate
102
+ max_length (int): Maximum allowed length
103
+
104
+ Returns:
105
+ str: Validated and truncated string
106
+
107
+ Example:
108
+ >>> utils.validate_and_truncate_string(" long text ", 5)
109
+ 'long '
110
+ """
111
+ if not isinstance(value, str):
112
+ value = str(value)
113
+
114
+ value = value.strip()
115
+
116
+ if len(value) > max_length:
117
+ return value[:max_length]
118
+
119
+ return value
120
+
121
+ def is_valid_filename_length(self, filename: str) -> bool:
122
+ """Check if filename length is within security limits.
123
+
124
+ Validates that the filename (after trimming whitespace) does not
125
+ exceed the maximum filename length configured in security settings.
126
+
127
+ Args:
128
+ filename (str): Filename to validate
129
+
130
+ Returns:
131
+ bool: True if filename length is valid, False otherwise
132
+
133
+ Example:
134
+ >>> utils.is_valid_filename_length("file.xlsx")
135
+ True
136
+ >>> utils.is_valid_filename_length("x" * 300)
137
+ False
138
+ """
139
+ return len(filename.strip()) <= self.config.MAX_FILENAME_LENGTH
@@ -10,7 +10,12 @@ class Uploader:
10
10
  """
11
11
 
12
12
  def __init__(
13
- self, run, path: Path, file_specification: List = None, organized_files: List = None, extra_params: Dict = None
13
+ self,
14
+ run,
15
+ path: Path,
16
+ file_specification: List = None,
17
+ organized_files: List = None,
18
+ extra_params: Dict = None,
14
19
  ):
15
20
  """Initialize the plugin upload action class.
16
21
 
@@ -124,9 +124,9 @@ class Run:
124
124
  context = None
125
125
  client = None
126
126
 
127
- def __init__(self, job_id, context):
127
+ def __init__(self, job_id, context=None):
128
128
  self.job_id = job_id
129
- self.context = context
129
+ self.context = context or {}
130
130
  config = get_backend_config()
131
131
  if config:
132
132
  self.client = BackendClient(
@@ -134,17 +134,23 @@ class Run:
134
134
  access_token=config['token'],
135
135
  )
136
136
  else:
137
+ # Handle missing environment variables for test environments
138
+ envs = self.context.get('envs', {})
139
+ host = envs.get('SYNAPSE_PLUGIN_RUN_HOST', os.getenv('SYNAPSE_PLUGIN_RUN_HOST', 'http://localhost:8000'))
140
+ token = envs.get('SYNAPSE_PLUGIN_RUN_USER_TOKEN', os.getenv('SYNAPSE_PLUGIN_RUN_USER_TOKEN'))
141
+ tenant = envs.get('SYNAPSE_PLUGIN_RUN_TENANT', os.getenv('SYNAPSE_PLUGIN_RUN_TENANT'))
142
+
137
143
  self.client = BackendClient(
138
- self.context['envs']['SYNAPSE_PLUGIN_RUN_HOST'],
139
- token=self.context['envs'].get('SYNAPSE_PLUGIN_RUN_USER_TOKEN'),
140
- tenant=self.context['envs'].get('SYNAPSE_PLUGIN_RUN_TENANT'),
144
+ host,
145
+ token=token,
146
+ tenant=tenant,
141
147
  )
142
148
  self.set_logger()
143
149
 
144
150
  def set_logger(self):
145
151
  kwargs = {
146
- 'progress_categories': self.context['progress_categories'],
147
- 'metrics_categories': self.context['metrics_categories'],
152
+ 'progress_categories': self.context.get('progress_categories'),
153
+ 'metrics_categories': self.context.get('metrics_categories'),
148
154
  }
149
155
 
150
156
  if self.job_id:
@@ -0,0 +1,39 @@
1
+ # File utilities module
2
+ # Maintains backward compatibility by re-exporting all functions
3
+
4
+ from .archive import archive, unarchive
5
+ from .checksum import calculate_checksum, get_checksum_from_file
6
+ from .chunking import read_file_in_chunks
7
+ from .download import (
8
+ adownload_file,
9
+ afiles_url_to_path,
10
+ afiles_url_to_path_from_objs,
11
+ download_file,
12
+ files_url_to_path,
13
+ files_url_to_path_from_objs,
14
+ )
15
+ from .encoding import convert_file_to_base64
16
+ from .io import get_dict_from_file, get_temp_path
17
+
18
+ __all__ = [
19
+ # Chunking
20
+ 'read_file_in_chunks',
21
+ # Download
22
+ 'download_file',
23
+ 'adownload_file',
24
+ 'files_url_to_path',
25
+ 'afiles_url_to_path',
26
+ 'files_url_to_path_from_objs',
27
+ 'afiles_url_to_path_from_objs',
28
+ # Checksum
29
+ 'calculate_checksum',
30
+ 'get_checksum_from_file',
31
+ # Archive
32
+ 'archive',
33
+ 'unarchive',
34
+ # Encoding
35
+ 'convert_file_to_base64',
36
+ # I/O
37
+ 'get_dict_from_file',
38
+ 'get_temp_path',
39
+ ]
@@ -0,0 +1,32 @@
1
+ import zipfile
2
+ from pathlib import Path
3
+
4
+
5
+ def archive(input_path, output_path, append=False):
6
+ input_path = Path(input_path)
7
+ output_path = Path(output_path)
8
+
9
+ mode = 'a' if append and output_path.exists() else 'w'
10
+ with zipfile.ZipFile(output_path, mode=mode, compression=zipfile.ZIP_DEFLATED) as zipf:
11
+ if input_path.is_file():
12
+ zipf.write(input_path, input_path.name)
13
+ else:
14
+ for file_path in input_path.rglob('*'):
15
+ if file_path.is_file(): # Only add files, skip directories
16
+ arcname = file_path.relative_to(input_path.parent)
17
+ zipf.write(file_path, arcname)
18
+
19
+
20
+ def unarchive(file_path, output_path):
21
+ """
22
+ Unarchives a ZIP file to a given directory.
23
+
24
+ Parameters:
25
+ file_path (str | Path): The path to the ZIP file.
26
+ output_path (str): The directory where the files will be extracted.
27
+ """
28
+ output_path = Path(output_path)
29
+ output_path.mkdir(parents=True, exist_ok=True)
30
+
31
+ with zipfile.ZipFile(str(file_path), 'r') as zip_ref:
32
+ zip_ref.extractall(output_path)
@@ -0,0 +1,56 @@
1
+ import hashlib
2
+ from typing import IO, Any, Callable
3
+
4
+
5
+ def calculate_checksum(file_path, prefix=''):
6
+ md5_hash = hashlib.md5()
7
+ with open(file_path, 'rb') as f:
8
+ for byte_block in iter(lambda: f.read(4096), b''):
9
+ md5_hash.update(byte_block)
10
+ checksum = md5_hash.hexdigest()
11
+ if prefix:
12
+ return f'dev-{checksum}'
13
+ return checksum
14
+
15
+
16
+ def get_checksum_from_file(file: IO[Any], digest_mod: Callable[[], Any] = hashlib.sha1) -> str:
17
+ """
18
+ Calculate checksum for a file-like object.
19
+
20
+ Args:
21
+ file (IO[Any]): File-like object with read() method that supports reading in chunks
22
+ digest_mod (Callable[[], Any]): Hash algorithm from hashlib (defaults to hashlib.sha1)
23
+
24
+ Returns:
25
+ str: Hexadecimal digest of the file contents
26
+
27
+ Example:
28
+ ```python
29
+ import hashlib
30
+ from io import BytesIO
31
+ from synapse_sdk.utils.file import get_checksum_from_file
32
+
33
+ # With BytesIO
34
+ data = BytesIO(b'Hello, world!')
35
+ checksum = get_checksum_from_file(data)
36
+
37
+ # With different hash algorithm
38
+ checksum = get_checksum_from_file(data, digest_mod=hashlib.sha256)
39
+ ```
40
+ """
41
+ digest = digest_mod()
42
+ chunk_size = 4096
43
+
44
+ # Reset file pointer to beginning if possible
45
+ if hasattr(file, 'seek'):
46
+ file.seek(0)
47
+
48
+ while True:
49
+ chunk = file.read(chunk_size)
50
+ if not chunk:
51
+ break
52
+ if isinstance(chunk, str):
53
+ chunk = chunk.encode('utf-8')
54
+ digest.update(chunk)
55
+
56
+ return digest.hexdigest()
@@ -0,0 +1,31 @@
1
+ def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
2
+ """
3
+ Read a file in chunks for efficient memory usage during file processing.
4
+
5
+ This function is particularly useful for large files or when you need to process
6
+ files in chunks, such as for uploading or hashing.
7
+
8
+ Args:
9
+ file_path (str | Path): Path to the file to read
10
+ chunk_size (int, optional): Size of each chunk in bytes. Defaults to 50MB (1024 * 1024 * 50)
11
+
12
+ Yields:
13
+ bytes: File content chunks
14
+
15
+ Raises:
16
+ FileNotFoundError: If the file doesn't exist
17
+ PermissionError: If the file can't be read due to permissions
18
+ OSError: If there's an OS-level error reading the file
19
+
20
+ Example:
21
+ ```python
22
+ from synapse_sdk.utils.file import read_file_in_chunks
23
+
24
+ # Read a file in 10MB chunks
25
+ for chunk in read_file_in_chunks('large_file.bin', chunk_size=1024*1024*10):
26
+ process_chunk(chunk)
27
+ ```
28
+ """
29
+ with open(file_path, 'rb') as file:
30
+ while chunk := file.read(chunk_size):
31
+ yield chunk
@@ -0,0 +1,124 @@
1
+ import asyncio
2
+ import operator
3
+ from functools import reduce
4
+ from pathlib import Path
5
+
6
+ import aiohttp
7
+ import requests
8
+
9
+ from synapse_sdk.utils.network import clean_url
10
+ from synapse_sdk.utils.string import hash_text
11
+
12
+ from .io import get_temp_path
13
+
14
+
15
+ def download_file(url, path_download, name=None, coerce=None, use_cached=True):
16
+ chunk_size = 1024 * 1024 * 50
17
+ cleaned_url = clean_url(url) # remove query params and fragment
18
+
19
+ if name:
20
+ use_cached = False
21
+ else:
22
+ name = hash_text(cleaned_url)
23
+
24
+ name += Path(cleaned_url).suffix
25
+
26
+ path = Path(path_download) / name
27
+
28
+ if not use_cached or not path.is_file():
29
+ response = requests.get(url, allow_redirects=True, stream=True)
30
+ response.raise_for_status()
31
+
32
+ with path.open('wb') as file:
33
+ for chunk in response.iter_content(chunk_size=chunk_size):
34
+ file.write(chunk)
35
+
36
+ if coerce:
37
+ path = coerce(path)
38
+
39
+ return path
40
+
41
+
42
+ def files_url_to_path(files, coerce=None, file_field=None):
43
+ path_download = get_temp_path('media')
44
+ path_download.mkdir(parents=True, exist_ok=True)
45
+ if file_field:
46
+ files[file_field] = download_file(files[file_field], path_download, coerce=coerce)
47
+ else:
48
+ for file_name in files:
49
+ if isinstance(files[file_name], str):
50
+ files[file_name] = download_file(files[file_name], path_download, coerce=coerce)
51
+ else:
52
+ files[file_name]['path'] = download_file(files[file_name].pop('url'), path_download, coerce=coerce)
53
+
54
+
55
+ def files_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False, is_async=False):
56
+ if is_async:
57
+ asyncio.run(afiles_url_to_path_from_objs(objs, files_fields, coerce=coerce, is_list=is_list))
58
+ else:
59
+ if not is_list:
60
+ objs = [objs]
61
+
62
+ for obj in objs:
63
+ for files_field in files_fields:
64
+ try:
65
+ files = reduce(operator.getitem, files_field.split('.'), obj)
66
+ if isinstance(files, str):
67
+ files_url_to_path(obj, coerce=coerce, file_field=files_field)
68
+ else:
69
+ files_url_to_path(files, coerce=coerce)
70
+ except KeyError:
71
+ pass
72
+
73
+
74
+ async def adownload_file(url, path_download, name=None, coerce=None, use_cached=True):
75
+ chunk_size = 1024 * 1024 * 50
76
+ cleaned_url = clean_url(url) # remove query params and fragment
77
+
78
+ if name:
79
+ use_cached = False
80
+ else:
81
+ name = hash_text(cleaned_url)
82
+
83
+ name += Path(cleaned_url).suffix
84
+
85
+ path = Path(path_download) / name
86
+
87
+ if not use_cached or not path.is_file():
88
+ async with aiohttp.ClientSession() as session:
89
+ async with session.get(url) as response:
90
+ with path.open('wb') as file:
91
+ while chunk := await response.content.read(chunk_size):
92
+ file.write(chunk)
93
+
94
+ if coerce:
95
+ path = coerce(path)
96
+
97
+ return path
98
+
99
+
100
+ async def afiles_url_to_path(files, coerce=None):
101
+ path_download = get_temp_path('media')
102
+ path_download.mkdir(parents=True, exist_ok=True)
103
+ for file_name in files:
104
+ if isinstance(files[file_name], str):
105
+ files[file_name] = await adownload_file(files[file_name], path_download, coerce=coerce)
106
+ else:
107
+ files[file_name]['path'] = await adownload_file(files[file_name].pop('url'), path_download, coerce=coerce)
108
+
109
+
110
+ async def afiles_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False):
111
+ if not is_list:
112
+ objs = [objs]
113
+
114
+ tasks = []
115
+
116
+ for obj in objs:
117
+ for files_field in files_fields:
118
+ try:
119
+ files = reduce(operator.getitem, files_field.split('.'), obj)
120
+ tasks.append(afiles_url_to_path(files, coerce=coerce))
121
+ except KeyError:
122
+ pass
123
+
124
+ await asyncio.gather(*tasks)
@@ -0,0 +1,40 @@
1
+ import base64
2
+ import mimetypes
3
+ from pathlib import Path
4
+
5
+
6
+ def convert_file_to_base64(file_path):
7
+ """
8
+ Convert a file to base64 using pathlib.
9
+
10
+ Args:
11
+ file_path (str): Path to the file to convert
12
+
13
+ Returns:
14
+ str: Base64 encoded string of the file contents
15
+ """
16
+ # FIXME base64 is sent sometimes.
17
+ if file_path.startswith('data:'):
18
+ return file_path
19
+
20
+ # Convert string path to Path object
21
+ path = Path(file_path)
22
+
23
+ try:
24
+ # Read binary content of the file
25
+ binary_content = path.read_bytes()
26
+
27
+ # Convert to base64
28
+ base64_encoded = base64.b64encode(binary_content).decode('utf-8')
29
+
30
+ # Get the MIME type of the file
31
+ mime_type, _ = mimetypes.guess_type(path)
32
+ assert mime_type is not None, 'MIME type cannot be guessed'
33
+
34
+ # Convert bytes to string for readable output
35
+ return f'data:{mime_type};base64,{base64_encoded}'
36
+
37
+ except FileNotFoundError:
38
+ raise FileNotFoundError(f'File not found: {file_path}')
39
+ except Exception as e:
40
+ raise Exception(f'Error converting file to base64: {str(e)}')
@@ -0,0 +1,22 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import yaml
5
+
6
+
7
+ def get_dict_from_file(file_path):
8
+ if isinstance(file_path, str):
9
+ file_path = Path(file_path)
10
+
11
+ with open(file_path) as f:
12
+ if file_path.suffix == '.yaml':
13
+ return yaml.safe_load(f)
14
+ else:
15
+ return json.load(f)
16
+
17
+
18
+ def get_temp_path(sub_path=None):
19
+ path = Path('/tmp/datamaker')
20
+ if sub_path:
21
+ path = path / sub_path
22
+ return path
@@ -0,0 +1,29 @@
1
+ # Video processing utilities
2
+
3
+ from .transcode import (
4
+ FFmpegNotFoundError,
5
+ TranscodeConfig,
6
+ TranscodingFailedError,
7
+ UnsupportedFormatError,
8
+ VideoTranscodeError,
9
+ atranscode_video,
10
+ get_video_info,
11
+ optimize_for_web,
12
+ transcode_batch,
13
+ transcode_video,
14
+ validate_video_format,
15
+ )
16
+
17
+ __all__ = [
18
+ 'TranscodeConfig',
19
+ 'VideoTranscodeError',
20
+ 'UnsupportedFormatError',
21
+ 'FFmpegNotFoundError',
22
+ 'TranscodingFailedError',
23
+ 'transcode_video',
24
+ 'atranscode_video',
25
+ 'get_video_info',
26
+ 'validate_video_format',
27
+ 'optimize_for_web',
28
+ 'transcode_batch',
29
+ ]