synapse-sdk 1.0.0b22__py3-none-any.whl → 1.0.0b24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/devtools/docs/docs/api/clients/ray.md +1 -1
- synapse_sdk/devtools/docs/docs/api/index.md +5 -5
- synapse_sdk/devtools/docs/docs/features/utils/file.md +415 -0
- synapse_sdk/devtools/docs/docs/{api → features}/utils/network.md +1 -1
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +140 -0
- synapse_sdk/devtools/docs/docs/plugins/upload-plugins.md +680 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ray.md +1 -1
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/index.md +5 -5
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/index.md +5 -5
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/file.md +415 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/{api → features}/utils/network.md +1 -1
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +138 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/plugins.md +48 -2
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/upload-plugins.md +897 -0
- synapse_sdk/devtools/docs/sidebars.ts +11 -10
- synapse_sdk/plugins/README.md +934 -0
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +17 -2
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +20 -0
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +623 -0
- synapse_sdk/plugins/categories/upload/actions/upload/enums.py +221 -0
- synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +149 -0
- synapse_sdk/plugins/categories/upload/actions/upload/run.py +178 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +139 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +6 -1
- synapse_sdk/plugins/models.py +13 -7
- synapse_sdk/utils/file/__init__.py +39 -0
- synapse_sdk/utils/file/archive.py +32 -0
- synapse_sdk/utils/file/checksum.py +56 -0
- synapse_sdk/utils/file/chunking.py +31 -0
- synapse_sdk/utils/file/download.py +124 -0
- synapse_sdk/utils/file/encoding.py +40 -0
- synapse_sdk/utils/file/io.py +22 -0
- synapse_sdk/utils/file/video/__init__.py +29 -0
- synapse_sdk/utils/file/video/transcode.py +307 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/METADATA +2 -1
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/RECORD +46 -28
- synapse_sdk/devtools/docs/docs/api/utils/file.md +0 -195
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/utils/file.md +0 -195
- synapse_sdk/plugins/categories/upload/actions/upload.py +0 -1368
- /synapse_sdk/devtools/docs/docs/{api → features}/utils/storage.md +0 -0
- /synapse_sdk/devtools/docs/docs/{api → features}/utils/types.md +0 -0
- /synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/{api → features}/utils/storage.md +0 -0
- /synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/{api → features}/utils/types.md +0 -0
- /synapse_sdk/utils/{file.py → file.py.backup} +0 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0b22.dist-info → synapse_sdk-1.0.0b24.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class PathAwareJSONEncoder(json.JSONEncoder):
|
|
6
|
+
"""Custom JSON encoder that handles Path objects and datetime objects.
|
|
7
|
+
|
|
8
|
+
Extends the default JSON encoder to properly serialize Path objects
|
|
9
|
+
and datetime objects that are commonly used in upload operations.
|
|
10
|
+
|
|
11
|
+
Supported object types:
|
|
12
|
+
- Path objects (converts to string using __fspath__ or as_posix)
|
|
13
|
+
- Datetime objects (converts using isoformat)
|
|
14
|
+
- All other standard JSON-serializable types
|
|
15
|
+
|
|
16
|
+
Example:
|
|
17
|
+
>>> data = {"path": Path("/tmp/file.txt"), "timestamp": datetime.now()}
|
|
18
|
+
>>> json.dumps(data, cls=PathAwareJSONEncoder)
|
|
19
|
+
'{"path": "/tmp/file.txt", "timestamp": "2023-01-01T12:00:00"}'
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def default(self, obj):
|
|
23
|
+
if hasattr(obj, '__fspath__'):
|
|
24
|
+
return obj.__fspath__()
|
|
25
|
+
elif hasattr(obj, 'as_posix'):
|
|
26
|
+
return obj.as_posix()
|
|
27
|
+
elif hasattr(obj, 'isoformat'):
|
|
28
|
+
return obj.isoformat()
|
|
29
|
+
return super().default(obj)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ExcelSecurityConfig:
|
|
33
|
+
"""Configuration class for Excel file security limits.
|
|
34
|
+
|
|
35
|
+
Manages security constraints for Excel file processing to prevent
|
|
36
|
+
resource exhaustion and security vulnerabilities. All limits can
|
|
37
|
+
be configured via environment variables.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
MAX_FILE_SIZE_MB (int): Maximum file size in megabytes
|
|
41
|
+
MAX_FILE_SIZE_BYTES (int): Maximum file size in bytes
|
|
42
|
+
MAX_MEMORY_USAGE_MB (int): Maximum memory usage in megabytes
|
|
43
|
+
MAX_MEMORY_USAGE_BYTES (int): Maximum memory usage in bytes
|
|
44
|
+
MAX_ROWS (int): Maximum number of rows allowed
|
|
45
|
+
MAX_COLUMNS (int): Maximum number of columns allowed
|
|
46
|
+
MAX_FILENAME_LENGTH (int): Maximum filename length
|
|
47
|
+
MAX_COLUMN_NAME_LENGTH (int): Maximum column name length
|
|
48
|
+
MAX_METADATA_VALUE_LENGTH (int): Maximum metadata value length
|
|
49
|
+
|
|
50
|
+
Environment Variables:
|
|
51
|
+
EXCEL_MAX_FILE_SIZE_MB: Override default file size limit (default: 10)
|
|
52
|
+
EXCEL_MAX_MEMORY_MB: Override default memory limit (default: 30)
|
|
53
|
+
EXCEL_MAX_ROWS: Override default row limit (default: 10000)
|
|
54
|
+
EXCEL_MAX_COLUMNS: Override default column limit (default: 50)
|
|
55
|
+
EXCEL_MAX_FILENAME_LENGTH: Override filename length limit (default: 255)
|
|
56
|
+
EXCEL_MAX_COLUMN_NAME_LENGTH: Override column name length (default: 100)
|
|
57
|
+
EXCEL_MAX_METADATA_VALUE_LENGTH: Override metadata value length (default: 1000)
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self):
|
|
61
|
+
self.MAX_FILE_SIZE_MB = int(os.getenv('EXCEL_MAX_FILE_SIZE_MB', '10'))
|
|
62
|
+
self.MAX_FILE_SIZE_BYTES = self.MAX_FILE_SIZE_MB * 1024 * 1024
|
|
63
|
+
|
|
64
|
+
self.MAX_MEMORY_USAGE_MB = int(os.getenv('EXCEL_MAX_MEMORY_MB', '30'))
|
|
65
|
+
self.MAX_MEMORY_USAGE_BYTES = self.MAX_MEMORY_USAGE_MB * 1024 * 1024
|
|
66
|
+
|
|
67
|
+
self.MAX_ROWS = int(os.getenv('EXCEL_MAX_ROWS', '10000'))
|
|
68
|
+
self.MAX_COLUMNS = int(os.getenv('EXCEL_MAX_COLUMNS', '50'))
|
|
69
|
+
|
|
70
|
+
self.MAX_FILENAME_LENGTH = int(os.getenv('EXCEL_MAX_FILENAME_LENGTH', '255'))
|
|
71
|
+
self.MAX_COLUMN_NAME_LENGTH = int(os.getenv('EXCEL_MAX_COLUMN_NAME_LENGTH', '100'))
|
|
72
|
+
self.MAX_METADATA_VALUE_LENGTH = int(os.getenv('EXCEL_MAX_METADATA_VALUE_LENGTH', '1000'))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ExcelMetadataUtils:
|
|
76
|
+
"""Utility class for Excel metadata processing and validation.
|
|
77
|
+
|
|
78
|
+
Provides helper methods for validating and processing Excel metadata
|
|
79
|
+
while respecting security constraints defined in ExcelSecurityConfig.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
config (ExcelSecurityConfig): Security configuration instance
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
>>> config = ExcelSecurityConfig()
|
|
86
|
+
>>> utils = ExcelMetadataUtils(config)
|
|
87
|
+
>>> safe_value = utils.validate_and_truncate_string("long text", 10)
|
|
88
|
+
>>> is_valid = utils.is_valid_filename_length("file.xlsx")
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self, config: ExcelSecurityConfig):
|
|
92
|
+
self.config = config
|
|
93
|
+
|
|
94
|
+
def validate_and_truncate_string(self, value: str, max_length: int) -> str:
|
|
95
|
+
"""Validate and truncate string to maximum length.
|
|
96
|
+
|
|
97
|
+
Converts non-string values to strings, trims whitespace, and
|
|
98
|
+
truncates to the specified maximum length if necessary.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
value (str): Value to validate and truncate
|
|
102
|
+
max_length (int): Maximum allowed length
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
str: Validated and truncated string
|
|
106
|
+
|
|
107
|
+
Example:
|
|
108
|
+
>>> utils.validate_and_truncate_string(" long text ", 5)
|
|
109
|
+
'long '
|
|
110
|
+
"""
|
|
111
|
+
if not isinstance(value, str):
|
|
112
|
+
value = str(value)
|
|
113
|
+
|
|
114
|
+
value = value.strip()
|
|
115
|
+
|
|
116
|
+
if len(value) > max_length:
|
|
117
|
+
return value[:max_length]
|
|
118
|
+
|
|
119
|
+
return value
|
|
120
|
+
|
|
121
|
+
def is_valid_filename_length(self, filename: str) -> bool:
|
|
122
|
+
"""Check if filename length is within security limits.
|
|
123
|
+
|
|
124
|
+
Validates that the filename (after trimming whitespace) does not
|
|
125
|
+
exceed the maximum filename length configured in security settings.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
filename (str): Filename to validate
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
bool: True if filename length is valid, False otherwise
|
|
132
|
+
|
|
133
|
+
Example:
|
|
134
|
+
>>> utils.is_valid_filename_length("file.xlsx")
|
|
135
|
+
True
|
|
136
|
+
>>> utils.is_valid_filename_length("x" * 300)
|
|
137
|
+
False
|
|
138
|
+
"""
|
|
139
|
+
return len(filename.strip()) <= self.config.MAX_FILENAME_LENGTH
|
|
@@ -10,7 +10,12 @@ class Uploader:
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
def __init__(
|
|
13
|
-
self,
|
|
13
|
+
self,
|
|
14
|
+
run,
|
|
15
|
+
path: Path,
|
|
16
|
+
file_specification: List = None,
|
|
17
|
+
organized_files: List = None,
|
|
18
|
+
extra_params: Dict = None,
|
|
14
19
|
):
|
|
15
20
|
"""Initialize the plugin upload action class.
|
|
16
21
|
|
synapse_sdk/plugins/models.py
CHANGED
|
@@ -124,9 +124,9 @@ class Run:
|
|
|
124
124
|
context = None
|
|
125
125
|
client = None
|
|
126
126
|
|
|
127
|
-
def __init__(self, job_id, context):
|
|
127
|
+
def __init__(self, job_id, context=None):
|
|
128
128
|
self.job_id = job_id
|
|
129
|
-
self.context = context
|
|
129
|
+
self.context = context or {}
|
|
130
130
|
config = get_backend_config()
|
|
131
131
|
if config:
|
|
132
132
|
self.client = BackendClient(
|
|
@@ -134,17 +134,23 @@ class Run:
|
|
|
134
134
|
access_token=config['token'],
|
|
135
135
|
)
|
|
136
136
|
else:
|
|
137
|
+
# Handle missing environment variables for test environments
|
|
138
|
+
envs = self.context.get('envs', {})
|
|
139
|
+
host = envs.get('SYNAPSE_PLUGIN_RUN_HOST', os.getenv('SYNAPSE_PLUGIN_RUN_HOST', 'http://localhost:8000'))
|
|
140
|
+
token = envs.get('SYNAPSE_PLUGIN_RUN_USER_TOKEN', os.getenv('SYNAPSE_PLUGIN_RUN_USER_TOKEN'))
|
|
141
|
+
tenant = envs.get('SYNAPSE_PLUGIN_RUN_TENANT', os.getenv('SYNAPSE_PLUGIN_RUN_TENANT'))
|
|
142
|
+
|
|
137
143
|
self.client = BackendClient(
|
|
138
|
-
|
|
139
|
-
token=
|
|
140
|
-
tenant=
|
|
144
|
+
host,
|
|
145
|
+
token=token,
|
|
146
|
+
tenant=tenant,
|
|
141
147
|
)
|
|
142
148
|
self.set_logger()
|
|
143
149
|
|
|
144
150
|
def set_logger(self):
|
|
145
151
|
kwargs = {
|
|
146
|
-
'progress_categories': self.context
|
|
147
|
-
'metrics_categories': self.context
|
|
152
|
+
'progress_categories': self.context.get('progress_categories'),
|
|
153
|
+
'metrics_categories': self.context.get('metrics_categories'),
|
|
148
154
|
}
|
|
149
155
|
|
|
150
156
|
if self.job_id:
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# File utilities module
|
|
2
|
+
# Maintains backward compatibility by re-exporting all functions
|
|
3
|
+
|
|
4
|
+
from .archive import archive, unarchive
|
|
5
|
+
from .checksum import calculate_checksum, get_checksum_from_file
|
|
6
|
+
from .chunking import read_file_in_chunks
|
|
7
|
+
from .download import (
|
|
8
|
+
adownload_file,
|
|
9
|
+
afiles_url_to_path,
|
|
10
|
+
afiles_url_to_path_from_objs,
|
|
11
|
+
download_file,
|
|
12
|
+
files_url_to_path,
|
|
13
|
+
files_url_to_path_from_objs,
|
|
14
|
+
)
|
|
15
|
+
from .encoding import convert_file_to_base64
|
|
16
|
+
from .io import get_dict_from_file, get_temp_path
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Chunking
|
|
20
|
+
'read_file_in_chunks',
|
|
21
|
+
# Download
|
|
22
|
+
'download_file',
|
|
23
|
+
'adownload_file',
|
|
24
|
+
'files_url_to_path',
|
|
25
|
+
'afiles_url_to_path',
|
|
26
|
+
'files_url_to_path_from_objs',
|
|
27
|
+
'afiles_url_to_path_from_objs',
|
|
28
|
+
# Checksum
|
|
29
|
+
'calculate_checksum',
|
|
30
|
+
'get_checksum_from_file',
|
|
31
|
+
# Archive
|
|
32
|
+
'archive',
|
|
33
|
+
'unarchive',
|
|
34
|
+
# Encoding
|
|
35
|
+
'convert_file_to_base64',
|
|
36
|
+
# I/O
|
|
37
|
+
'get_dict_from_file',
|
|
38
|
+
'get_temp_path',
|
|
39
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import zipfile
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def archive(input_path, output_path, append=False):
|
|
6
|
+
input_path = Path(input_path)
|
|
7
|
+
output_path = Path(output_path)
|
|
8
|
+
|
|
9
|
+
mode = 'a' if append and output_path.exists() else 'w'
|
|
10
|
+
with zipfile.ZipFile(output_path, mode=mode, compression=zipfile.ZIP_DEFLATED) as zipf:
|
|
11
|
+
if input_path.is_file():
|
|
12
|
+
zipf.write(input_path, input_path.name)
|
|
13
|
+
else:
|
|
14
|
+
for file_path in input_path.rglob('*'):
|
|
15
|
+
if file_path.is_file(): # Only add files, skip directories
|
|
16
|
+
arcname = file_path.relative_to(input_path.parent)
|
|
17
|
+
zipf.write(file_path, arcname)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def unarchive(file_path, output_path):
|
|
21
|
+
"""
|
|
22
|
+
Unarchives a ZIP file to a given directory.
|
|
23
|
+
|
|
24
|
+
Parameters:
|
|
25
|
+
file_path (str | Path): The path to the ZIP file.
|
|
26
|
+
output_path (str): The directory where the files will be extracted.
|
|
27
|
+
"""
|
|
28
|
+
output_path = Path(output_path)
|
|
29
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
with zipfile.ZipFile(str(file_path), 'r') as zip_ref:
|
|
32
|
+
zip_ref.extractall(output_path)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from typing import IO, Any, Callable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def calculate_checksum(file_path, prefix=''):
|
|
6
|
+
md5_hash = hashlib.md5()
|
|
7
|
+
with open(file_path, 'rb') as f:
|
|
8
|
+
for byte_block in iter(lambda: f.read(4096), b''):
|
|
9
|
+
md5_hash.update(byte_block)
|
|
10
|
+
checksum = md5_hash.hexdigest()
|
|
11
|
+
if prefix:
|
|
12
|
+
return f'dev-{checksum}'
|
|
13
|
+
return checksum
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_checksum_from_file(file: IO[Any], digest_mod: Callable[[], Any] = hashlib.sha1) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Calculate checksum for a file-like object.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
file (IO[Any]): File-like object with read() method that supports reading in chunks
|
|
22
|
+
digest_mod (Callable[[], Any]): Hash algorithm from hashlib (defaults to hashlib.sha1)
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
str: Hexadecimal digest of the file contents
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
```python
|
|
29
|
+
import hashlib
|
|
30
|
+
from io import BytesIO
|
|
31
|
+
from synapse_sdk.utils.file import get_checksum_from_file
|
|
32
|
+
|
|
33
|
+
# With BytesIO
|
|
34
|
+
data = BytesIO(b'Hello, world!')
|
|
35
|
+
checksum = get_checksum_from_file(data)
|
|
36
|
+
|
|
37
|
+
# With different hash algorithm
|
|
38
|
+
checksum = get_checksum_from_file(data, digest_mod=hashlib.sha256)
|
|
39
|
+
```
|
|
40
|
+
"""
|
|
41
|
+
digest = digest_mod()
|
|
42
|
+
chunk_size = 4096
|
|
43
|
+
|
|
44
|
+
# Reset file pointer to beginning if possible
|
|
45
|
+
if hasattr(file, 'seek'):
|
|
46
|
+
file.seek(0)
|
|
47
|
+
|
|
48
|
+
while True:
|
|
49
|
+
chunk = file.read(chunk_size)
|
|
50
|
+
if not chunk:
|
|
51
|
+
break
|
|
52
|
+
if isinstance(chunk, str):
|
|
53
|
+
chunk = chunk.encode('utf-8')
|
|
54
|
+
digest.update(chunk)
|
|
55
|
+
|
|
56
|
+
return digest.hexdigest()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
def read_file_in_chunks(file_path, chunk_size=1024 * 1024 * 50):
|
|
2
|
+
"""
|
|
3
|
+
Read a file in chunks for efficient memory usage during file processing.
|
|
4
|
+
|
|
5
|
+
This function is particularly useful for large files or when you need to process
|
|
6
|
+
files in chunks, such as for uploading or hashing.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
file_path (str | Path): Path to the file to read
|
|
10
|
+
chunk_size (int, optional): Size of each chunk in bytes. Defaults to 50MB (1024 * 1024 * 50)
|
|
11
|
+
|
|
12
|
+
Yields:
|
|
13
|
+
bytes: File content chunks
|
|
14
|
+
|
|
15
|
+
Raises:
|
|
16
|
+
FileNotFoundError: If the file doesn't exist
|
|
17
|
+
PermissionError: If the file can't be read due to permissions
|
|
18
|
+
OSError: If there's an OS-level error reading the file
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
```python
|
|
22
|
+
from synapse_sdk.utils.file import read_file_in_chunks
|
|
23
|
+
|
|
24
|
+
# Read a file in 10MB chunks
|
|
25
|
+
for chunk in read_file_in_chunks('large_file.bin', chunk_size=1024*1024*10):
|
|
26
|
+
process_chunk(chunk)
|
|
27
|
+
```
|
|
28
|
+
"""
|
|
29
|
+
with open(file_path, 'rb') as file:
|
|
30
|
+
while chunk := file.read(chunk_size):
|
|
31
|
+
yield chunk
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import operator
|
|
3
|
+
from functools import reduce
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from synapse_sdk.utils.network import clean_url
|
|
10
|
+
from synapse_sdk.utils.string import hash_text
|
|
11
|
+
|
|
12
|
+
from .io import get_temp_path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def download_file(url, path_download, name=None, coerce=None, use_cached=True):
|
|
16
|
+
chunk_size = 1024 * 1024 * 50
|
|
17
|
+
cleaned_url = clean_url(url) # remove query params and fragment
|
|
18
|
+
|
|
19
|
+
if name:
|
|
20
|
+
use_cached = False
|
|
21
|
+
else:
|
|
22
|
+
name = hash_text(cleaned_url)
|
|
23
|
+
|
|
24
|
+
name += Path(cleaned_url).suffix
|
|
25
|
+
|
|
26
|
+
path = Path(path_download) / name
|
|
27
|
+
|
|
28
|
+
if not use_cached or not path.is_file():
|
|
29
|
+
response = requests.get(url, allow_redirects=True, stream=True)
|
|
30
|
+
response.raise_for_status()
|
|
31
|
+
|
|
32
|
+
with path.open('wb') as file:
|
|
33
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
34
|
+
file.write(chunk)
|
|
35
|
+
|
|
36
|
+
if coerce:
|
|
37
|
+
path = coerce(path)
|
|
38
|
+
|
|
39
|
+
return path
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def files_url_to_path(files, coerce=None, file_field=None):
|
|
43
|
+
path_download = get_temp_path('media')
|
|
44
|
+
path_download.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
if file_field:
|
|
46
|
+
files[file_field] = download_file(files[file_field], path_download, coerce=coerce)
|
|
47
|
+
else:
|
|
48
|
+
for file_name in files:
|
|
49
|
+
if isinstance(files[file_name], str):
|
|
50
|
+
files[file_name] = download_file(files[file_name], path_download, coerce=coerce)
|
|
51
|
+
else:
|
|
52
|
+
files[file_name]['path'] = download_file(files[file_name].pop('url'), path_download, coerce=coerce)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def files_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False, is_async=False):
|
|
56
|
+
if is_async:
|
|
57
|
+
asyncio.run(afiles_url_to_path_from_objs(objs, files_fields, coerce=coerce, is_list=is_list))
|
|
58
|
+
else:
|
|
59
|
+
if not is_list:
|
|
60
|
+
objs = [objs]
|
|
61
|
+
|
|
62
|
+
for obj in objs:
|
|
63
|
+
for files_field in files_fields:
|
|
64
|
+
try:
|
|
65
|
+
files = reduce(operator.getitem, files_field.split('.'), obj)
|
|
66
|
+
if isinstance(files, str):
|
|
67
|
+
files_url_to_path(obj, coerce=coerce, file_field=files_field)
|
|
68
|
+
else:
|
|
69
|
+
files_url_to_path(files, coerce=coerce)
|
|
70
|
+
except KeyError:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
async def adownload_file(url, path_download, name=None, coerce=None, use_cached=True):
|
|
75
|
+
chunk_size = 1024 * 1024 * 50
|
|
76
|
+
cleaned_url = clean_url(url) # remove query params and fragment
|
|
77
|
+
|
|
78
|
+
if name:
|
|
79
|
+
use_cached = False
|
|
80
|
+
else:
|
|
81
|
+
name = hash_text(cleaned_url)
|
|
82
|
+
|
|
83
|
+
name += Path(cleaned_url).suffix
|
|
84
|
+
|
|
85
|
+
path = Path(path_download) / name
|
|
86
|
+
|
|
87
|
+
if not use_cached or not path.is_file():
|
|
88
|
+
async with aiohttp.ClientSession() as session:
|
|
89
|
+
async with session.get(url) as response:
|
|
90
|
+
with path.open('wb') as file:
|
|
91
|
+
while chunk := await response.content.read(chunk_size):
|
|
92
|
+
file.write(chunk)
|
|
93
|
+
|
|
94
|
+
if coerce:
|
|
95
|
+
path = coerce(path)
|
|
96
|
+
|
|
97
|
+
return path
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def afiles_url_to_path(files, coerce=None):
|
|
101
|
+
path_download = get_temp_path('media')
|
|
102
|
+
path_download.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
for file_name in files:
|
|
104
|
+
if isinstance(files[file_name], str):
|
|
105
|
+
files[file_name] = await adownload_file(files[file_name], path_download, coerce=coerce)
|
|
106
|
+
else:
|
|
107
|
+
files[file_name]['path'] = await adownload_file(files[file_name].pop('url'), path_download, coerce=coerce)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
async def afiles_url_to_path_from_objs(objs, files_fields, coerce=None, is_list=False):
|
|
111
|
+
if not is_list:
|
|
112
|
+
objs = [objs]
|
|
113
|
+
|
|
114
|
+
tasks = []
|
|
115
|
+
|
|
116
|
+
for obj in objs:
|
|
117
|
+
for files_field in files_fields:
|
|
118
|
+
try:
|
|
119
|
+
files = reduce(operator.getitem, files_field.split('.'), obj)
|
|
120
|
+
tasks.append(afiles_url_to_path(files, coerce=coerce))
|
|
121
|
+
except KeyError:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
await asyncio.gather(*tasks)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import mimetypes
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def convert_file_to_base64(file_path):
|
|
7
|
+
"""
|
|
8
|
+
Convert a file to base64 using pathlib.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
file_path (str): Path to the file to convert
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
str: Base64 encoded string of the file contents
|
|
15
|
+
"""
|
|
16
|
+
# FIXME base64 is sent sometimes.
|
|
17
|
+
if file_path.startswith('data:'):
|
|
18
|
+
return file_path
|
|
19
|
+
|
|
20
|
+
# Convert string path to Path object
|
|
21
|
+
path = Path(file_path)
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
# Read binary content of the file
|
|
25
|
+
binary_content = path.read_bytes()
|
|
26
|
+
|
|
27
|
+
# Convert to base64
|
|
28
|
+
base64_encoded = base64.b64encode(binary_content).decode('utf-8')
|
|
29
|
+
|
|
30
|
+
# Get the MIME type of the file
|
|
31
|
+
mime_type, _ = mimetypes.guess_type(path)
|
|
32
|
+
assert mime_type is not None, 'MIME type cannot be guessed'
|
|
33
|
+
|
|
34
|
+
# Convert bytes to string for readable output
|
|
35
|
+
return f'data:{mime_type};base64,{base64_encoded}'
|
|
36
|
+
|
|
37
|
+
except FileNotFoundError:
|
|
38
|
+
raise FileNotFoundError(f'File not found: {file_path}')
|
|
39
|
+
except Exception as e:
|
|
40
|
+
raise Exception(f'Error converting file to base64: {str(e)}')
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import yaml
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_dict_from_file(file_path):
|
|
8
|
+
if isinstance(file_path, str):
|
|
9
|
+
file_path = Path(file_path)
|
|
10
|
+
|
|
11
|
+
with open(file_path) as f:
|
|
12
|
+
if file_path.suffix == '.yaml':
|
|
13
|
+
return yaml.safe_load(f)
|
|
14
|
+
else:
|
|
15
|
+
return json.load(f)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_temp_path(sub_path=None):
|
|
19
|
+
path = Path('/tmp/datamaker')
|
|
20
|
+
if sub_path:
|
|
21
|
+
path = path / sub_path
|
|
22
|
+
return path
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Video processing utilities
|
|
2
|
+
|
|
3
|
+
from .transcode import (
|
|
4
|
+
FFmpegNotFoundError,
|
|
5
|
+
TranscodeConfig,
|
|
6
|
+
TranscodingFailedError,
|
|
7
|
+
UnsupportedFormatError,
|
|
8
|
+
VideoTranscodeError,
|
|
9
|
+
atranscode_video,
|
|
10
|
+
get_video_info,
|
|
11
|
+
optimize_for_web,
|
|
12
|
+
transcode_batch,
|
|
13
|
+
transcode_video,
|
|
14
|
+
validate_video_format,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
'TranscodeConfig',
|
|
19
|
+
'VideoTranscodeError',
|
|
20
|
+
'UnsupportedFormatError',
|
|
21
|
+
'FFmpegNotFoundError',
|
|
22
|
+
'TranscodingFailedError',
|
|
23
|
+
'transcode_video',
|
|
24
|
+
'atranscode_video',
|
|
25
|
+
'get_video_info',
|
|
26
|
+
'validate_video_format',
|
|
27
|
+
'optimize_for_web',
|
|
28
|
+
'transcode_batch',
|
|
29
|
+
]
|