synapse-sdk 1.0.0b8__py3-none-any.whl → 1.0.0b10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/clients/agent/ray.py +247 -37
- synapse_sdk/devtools/docs/sidebars.ts +20 -1
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +3 -0
- synapse_sdk/plugins/categories/upload/actions/upload.py +242 -34
- synapse_sdk/utils/network.py +272 -0
- {synapse_sdk-1.0.0b8.dist-info → synapse_sdk-1.0.0b10.dist-info}/METADATA +2 -1
- {synapse_sdk-1.0.0b8.dist-info → synapse_sdk-1.0.0b10.dist-info}/RECORD +11 -11
- {synapse_sdk-1.0.0b8.dist-info → synapse_sdk-1.0.0b10.dist-info}/WHEEL +0 -0
- {synapse_sdk-1.0.0b8.dist-info → synapse_sdk-1.0.0b10.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0b8.dist-info → synapse_sdk-1.0.0b10.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0b8.dist-info → synapse_sdk-1.0.0b10.dist-info}/top_level.txt +0 -0
synapse_sdk/clients/agent/ray.py
CHANGED
|
@@ -1,10 +1,87 @@
|
|
|
1
|
-
import
|
|
1
|
+
import weakref
|
|
2
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
2
3
|
|
|
3
4
|
from synapse_sdk.clients.base import BaseClient
|
|
4
5
|
from synapse_sdk.clients.exceptions import ClientError
|
|
6
|
+
from synapse_sdk.utils.network import (
|
|
7
|
+
HTTPStreamManager,
|
|
8
|
+
StreamLimits,
|
|
9
|
+
WebSocketStreamManager,
|
|
10
|
+
http_to_websocket_url,
|
|
11
|
+
sanitize_error_message,
|
|
12
|
+
validate_resource_id,
|
|
13
|
+
validate_timeout,
|
|
14
|
+
)
|
|
5
15
|
|
|
6
16
|
|
|
7
17
|
class RayClientMixin(BaseClient):
|
|
18
|
+
"""
|
|
19
|
+
Mixin class providing Ray cluster management and monitoring functionality.
|
|
20
|
+
|
|
21
|
+
This mixin extends BaseClient with Ray-specific operations for interacting with
|
|
22
|
+
Apache Ray distributed computing clusters. It provides comprehensive job management,
|
|
23
|
+
node monitoring, task tracking, and Ray Serve application control capabilities.
|
|
24
|
+
|
|
25
|
+
Key Features:
|
|
26
|
+
- Job lifecycle management (list, get, monitor)
|
|
27
|
+
- Real-time log streaming via WebSocket and HTTP protocols
|
|
28
|
+
- Node and task monitoring
|
|
29
|
+
- Ray Serve application deployment and management
|
|
30
|
+
- Robust error handling with input validation
|
|
31
|
+
- Resource management with automatic cleanup
|
|
32
|
+
|
|
33
|
+
Streaming Capabilities:
|
|
34
|
+
- WebSocket streaming for real-time log tailing
|
|
35
|
+
- HTTP streaming as fallback protocol
|
|
36
|
+
- Configurable timeouts and stream limits
|
|
37
|
+
- Automatic protocol validation and error recovery
|
|
38
|
+
|
|
39
|
+
Resource Management:
|
|
40
|
+
- Thread pool for concurrent operations (5 workers)
|
|
41
|
+
- WeakSet for tracking active connections
|
|
42
|
+
- Automatic cleanup on object destruction
|
|
43
|
+
- Stream limits to prevent resource exhaustion
|
|
44
|
+
|
|
45
|
+
Usage Examples:
|
|
46
|
+
Basic job operations:
|
|
47
|
+
>>> client = RayClient(base_url="http://ray-head:8265")
|
|
48
|
+
>>> jobs = client.list_jobs()
|
|
49
|
+
>>> job = client.get_job('job-12345')
|
|
50
|
+
|
|
51
|
+
Real-time log streaming:
|
|
52
|
+
>>> # WebSocket streaming (preferred)
|
|
53
|
+
>>> for log_line in client.tail_job_logs('job-12345', protocol='websocket'):
|
|
54
|
+
... print(log_line)
|
|
55
|
+
|
|
56
|
+
>>> # HTTP streaming (fallback)
|
|
57
|
+
>>> for log_line in client.tail_job_logs('job-12345', protocol='stream'):
|
|
58
|
+
... print(log_line)
|
|
59
|
+
|
|
60
|
+
Node and task monitoring:
|
|
61
|
+
>>> nodes = client.list_nodes()
|
|
62
|
+
>>> tasks = client.list_tasks()
|
|
63
|
+
>>> node_details = client.get_node('node-id')
|
|
64
|
+
|
|
65
|
+
Ray Serve management:
|
|
66
|
+
>>> apps = client.list_serve_applications()
|
|
67
|
+
>>> client.delete_serve_application('app-id')
|
|
68
|
+
|
|
69
|
+
Note:
|
|
70
|
+
This class is designed as a mixin and should be combined with other
|
|
71
|
+
client classes that provide authentication and base functionality.
|
|
72
|
+
It requires the BaseClient foundation for HTTP operations.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, *args, **kwargs):
|
|
76
|
+
super().__init__(*args, **kwargs)
|
|
77
|
+
self._thread_pool = ThreadPoolExecutor(max_workers=5, thread_name_prefix='ray_client_')
|
|
78
|
+
self._active_connections = weakref.WeakSet()
|
|
79
|
+
|
|
80
|
+
# Initialize stream managers
|
|
81
|
+
stream_limits = StreamLimits()
|
|
82
|
+
self._websocket_manager = WebSocketStreamManager(self._thread_pool, stream_limits)
|
|
83
|
+
self._http_manager = HTTPStreamManager(self.requests_session, stream_limits)
|
|
84
|
+
|
|
8
85
|
def get_job(self, pk):
|
|
9
86
|
path = f'jobs/{pk}/'
|
|
10
87
|
return self._get(path)
|
|
@@ -17,48 +94,181 @@ class RayClientMixin(BaseClient):
|
|
|
17
94
|
path = f'jobs/{pk}/logs/'
|
|
18
95
|
return self._get(path)
|
|
19
96
|
|
|
20
|
-
def
|
|
21
|
-
|
|
22
|
-
|
|
97
|
+
def websocket_tail_job_logs(self, pk, stream_timeout=10):
|
|
98
|
+
"""
|
|
99
|
+
Stream job logs in real-time using WebSocket protocol.
|
|
100
|
+
|
|
101
|
+
Establishes a WebSocket connection to stream job logs as they are generated.
|
|
102
|
+
This method provides the lowest latency for real-time log monitoring and is
|
|
103
|
+
the preferred protocol when available.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
pk (str): Job primary key or identifier. Must be alphanumeric with
|
|
107
|
+
optional hyphens/underscores, max 100 characters.
|
|
108
|
+
stream_timeout (float, optional): Maximum time in seconds to wait for
|
|
109
|
+
log data. Defaults to 10. Must be positive
|
|
110
|
+
and cannot exceed 300 seconds.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Generator[str, None, None]: A generator yielding log lines as strings.
|
|
114
|
+
Each line includes a newline character.
|
|
115
|
+
|
|
116
|
+
Raises:
|
|
117
|
+
ClientError:
|
|
118
|
+
- 400: If long polling is enabled (incompatible)
|
|
119
|
+
- 400: If pk is empty, contains invalid characters, or too long
|
|
120
|
+
- 400: If stream_timeout is not positive or exceeds maximum
|
|
121
|
+
- 500: If WebSocket library is unavailable
|
|
122
|
+
- 503: If connection to Ray cluster fails
|
|
123
|
+
- 408: If connection timeout occurs
|
|
124
|
+
- 429: If stream limits are exceeded (lines, size, messages)
|
|
125
|
+
|
|
126
|
+
Usage:
|
|
127
|
+
>>> # Basic log streaming
|
|
128
|
+
>>> for log_line in client.websocket_tail_job_logs('job-12345'):
|
|
129
|
+
... print(log_line.strip())
|
|
130
|
+
|
|
131
|
+
>>> # With custom timeout
|
|
132
|
+
>>> for log_line in client.websocket_tail_job_logs('job-12345', stream_timeout=30):
|
|
133
|
+
... if 'ERROR' in log_line:
|
|
134
|
+
... break
|
|
135
|
+
|
|
136
|
+
Technical Notes:
|
|
137
|
+
- Uses WebSocketStreamManager for connection management
|
|
138
|
+
- Automatic input validation and sanitization
|
|
139
|
+
- Resource cleanup handled by WeakSet tracking
|
|
140
|
+
- Stream limits prevent memory exhaustion
|
|
141
|
+
- Thread pool manages WebSocket operations
|
|
142
|
+
|
|
143
|
+
See Also:
|
|
144
|
+
stream_tail_job_logs: HTTP-based alternative
|
|
145
|
+
tail_job_logs: Protocol-agnostic wrapper method
|
|
146
|
+
"""
|
|
147
|
+
if hasattr(self, 'long_poll_handler') and self.long_poll_handler:
|
|
148
|
+
raise ClientError(400, '"websocket_tail_job_logs" does not support long polling')
|
|
149
|
+
|
|
150
|
+
# Validate inputs using network utilities
|
|
151
|
+
validated_pk = validate_resource_id(pk, 'job')
|
|
152
|
+
validated_timeout = validate_timeout(stream_timeout)
|
|
23
153
|
|
|
24
|
-
|
|
154
|
+
# Build WebSocket URL
|
|
155
|
+
path = f'ray/jobs/{validated_pk}/logs/ws/'
|
|
25
156
|
url = self._get_url(path)
|
|
157
|
+
ws_url = http_to_websocket_url(url)
|
|
158
|
+
|
|
159
|
+
# Get headers and use WebSocket manager
|
|
26
160
|
headers = self._get_headers()
|
|
161
|
+
context = f'job {validated_pk}'
|
|
162
|
+
|
|
163
|
+
return self._websocket_manager.stream_logs(ws_url, headers, validated_timeout, context)
|
|
164
|
+
|
|
165
|
+
def stream_tail_job_logs(self, pk, stream_timeout=10):
|
|
166
|
+
"""
|
|
167
|
+
Stream job logs in real-time using HTTP chunked transfer encoding.
|
|
168
|
+
|
|
169
|
+
Establishes an HTTP connection with chunked transfer encoding to stream
|
|
170
|
+
job logs as they are generated. This method serves as a reliable fallback
|
|
171
|
+
when WebSocket connections are not available or suitable.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
pk (str): Job primary key or identifier. Must be alphanumeric with
|
|
175
|
+
optional hyphens/underscores, max 100 characters.
|
|
176
|
+
stream_timeout (float, optional): Maximum time in seconds to wait for
|
|
177
|
+
log data. Defaults to 10. Must be positive
|
|
178
|
+
and cannot exceed 300 seconds.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Generator[str, None, None]: A generator yielding log lines as strings.
|
|
182
|
+
Each line includes a newline character.
|
|
183
|
+
|
|
184
|
+
Raises:
|
|
185
|
+
ClientError:
|
|
186
|
+
- 400: If long polling is enabled (incompatible)
|
|
187
|
+
- 400: If pk is empty, contains invalid characters, or too long
|
|
188
|
+
- 400: If stream_timeout is not positive or exceeds maximum
|
|
189
|
+
- 503: If connection to Ray cluster fails
|
|
190
|
+
- 408: If connection or read timeout occurs
|
|
191
|
+
- 404: If job is not found
|
|
192
|
+
- 429: If stream limits are exceeded (lines, size, messages)
|
|
193
|
+
- 500: If unexpected streaming error occurs
|
|
194
|
+
|
|
195
|
+
Usage:
|
|
196
|
+
>>> # Basic HTTP log streaming
|
|
197
|
+
>>> for log_line in client.stream_tail_job_logs('job-12345'):
|
|
198
|
+
... print(log_line.strip())
|
|
199
|
+
|
|
200
|
+
>>> # With error handling and custom timeout
|
|
201
|
+
>>> try:
|
|
202
|
+
... for log_line in client.stream_tail_job_logs('job-12345', stream_timeout=60):
|
|
203
|
+
... if 'COMPLETED' in log_line:
|
|
204
|
+
... break
|
|
205
|
+
... except ClientError as e:
|
|
206
|
+
... print(f"Streaming failed: {e}")
|
|
207
|
+
|
|
208
|
+
Technical Notes:
|
|
209
|
+
- Uses HTTPStreamManager for connection management
|
|
210
|
+
- Automatic input validation and sanitization
|
|
211
|
+
- Proper HTTP response cleanup on completion/error
|
|
212
|
+
- Stream limits prevent memory exhaustion
|
|
213
|
+
- Filters out oversized lines (>10KB) automatically
|
|
214
|
+
- Connection reuse through requests session
|
|
215
|
+
|
|
216
|
+
See Also:
|
|
217
|
+
websocket_tail_job_logs: WebSocket-based alternative (preferred)
|
|
218
|
+
tail_job_logs: Protocol-agnostic wrapper method
|
|
219
|
+
"""
|
|
220
|
+
if hasattr(self, 'long_poll_handler') and self.long_poll_handler:
|
|
221
|
+
raise ClientError(400, '"stream_tail_job_logs" does not support long polling')
|
|
222
|
+
|
|
223
|
+
# Validate inputs using network utilities
|
|
224
|
+
validated_pk = validate_resource_id(pk, 'job')
|
|
225
|
+
validated_timeout = validate_timeout(stream_timeout)
|
|
226
|
+
|
|
227
|
+
# Build HTTP URL and prepare request
|
|
228
|
+
path = f'ray/jobs/{validated_pk}/logs/stream'
|
|
229
|
+
url = self._get_url(path)
|
|
230
|
+
headers = self._get_headers()
|
|
231
|
+
timeout = (self.timeout['connect'], validated_timeout)
|
|
232
|
+
context = f'job {validated_pk}'
|
|
233
|
+
|
|
234
|
+
return self._http_manager.stream_logs(url, headers, timeout, context)
|
|
235
|
+
|
|
236
|
+
def tail_job_logs(self, pk, stream_timeout=10, protocol='stream'):
|
|
237
|
+
"""
|
|
238
|
+
Tail job logs using either WebSocket or HTTP streaming.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
pk: Job primary key
|
|
242
|
+
stream_timeout: Timeout for streaming operations
|
|
243
|
+
protocol: 'websocket' or 'stream' (default: 'stream')
|
|
244
|
+
"""
|
|
245
|
+
# Validate protocol first
|
|
246
|
+
if protocol not in ('websocket', 'stream'):
|
|
247
|
+
raise ClientError(400, f'Unsupported protocol: {protocol}. Use "websocket" or "stream"')
|
|
248
|
+
|
|
249
|
+
# Pre-validate common inputs using network utilities
|
|
250
|
+
validate_resource_id(pk, 'job')
|
|
251
|
+
validate_timeout(stream_timeout)
|
|
27
252
|
|
|
28
253
|
try:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# Set up streaming with timeout handling
|
|
36
|
-
try:
|
|
37
|
-
for line in response.iter_lines(decode_unicode=True, chunk_size=1024):
|
|
38
|
-
if line:
|
|
39
|
-
yield f'{line}\n'
|
|
40
|
-
except requests.exceptions.ChunkedEncodingError:
|
|
41
|
-
# Connection was interrupted during streaming
|
|
42
|
-
raise ClientError(503, f'Log stream for job {pk} was interrupted')
|
|
43
|
-
except requests.exceptions.ReadTimeout:
|
|
44
|
-
# Read timeout during streaming
|
|
45
|
-
raise ClientError(408, f'Log stream for job {pk} timed out after {stream_timeout}s')
|
|
46
|
-
|
|
47
|
-
except requests.exceptions.ConnectTimeout:
|
|
48
|
-
raise ClientError(
|
|
49
|
-
408, f'Failed to connect to log stream for job {pk} (timeout: {self.timeout["connect"]}s)'
|
|
50
|
-
)
|
|
51
|
-
except requests.exceptions.ReadTimeout:
|
|
52
|
-
raise ClientError(408, f'Log stream for job {pk} read timeout ({stream_timeout}s)')
|
|
53
|
-
except requests.exceptions.ConnectionError as e:
|
|
54
|
-
if 'Connection refused' in str(e):
|
|
55
|
-
raise ClientError(503, f'Agent connection refused for job {pk}')
|
|
56
|
-
else:
|
|
57
|
-
raise ClientError(503, f'Agent connection error for job {pk}: {str(e)[:100]}')
|
|
58
|
-
except requests.exceptions.HTTPError as e:
|
|
59
|
-
raise ClientError(e.response.status_code, f'HTTP error streaming logs for job {pk}: {e}')
|
|
254
|
+
if protocol == 'websocket':
|
|
255
|
+
return self.websocket_tail_job_logs(pk, stream_timeout)
|
|
256
|
+
else: # protocol == 'stream'
|
|
257
|
+
return self.stream_tail_job_logs(pk, stream_timeout)
|
|
258
|
+
except ClientError:
|
|
259
|
+
raise
|
|
60
260
|
except Exception as e:
|
|
61
|
-
|
|
261
|
+
# Fallback error handling using network utility
|
|
262
|
+
sanitized_error = sanitize_error_message(str(e), f'job {pk}')
|
|
263
|
+
raise ClientError(500, f'Protocol {protocol} failed: {sanitized_error}')
|
|
264
|
+
|
|
265
|
+
def __del__(self):
|
|
266
|
+
"""Cleanup resources when object is destroyed."""
|
|
267
|
+
try:
|
|
268
|
+
if hasattr(self, '_thread_pool'):
|
|
269
|
+
self._thread_pool.shutdown(wait=False)
|
|
270
|
+
except Exception:
|
|
271
|
+
pass # Ignore cleanup errors during destruction
|
|
62
272
|
|
|
63
273
|
def get_node(self, pk):
|
|
64
274
|
path = f'nodes/{pk}/'
|
|
@@ -31,7 +31,26 @@ const sidebars: SidebarsConfig = {
|
|
|
31
31
|
label: 'API Reference',
|
|
32
32
|
items: [
|
|
33
33
|
'api/index',
|
|
34
|
-
|
|
34
|
+
{
|
|
35
|
+
type: 'category',
|
|
36
|
+
label: 'Clients',
|
|
37
|
+
items: [
|
|
38
|
+
'api/clients/backend',
|
|
39
|
+
'api/clients/agent',
|
|
40
|
+
'api/clients/ray',
|
|
41
|
+
'api/clients/base',
|
|
42
|
+
],
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
type: 'category',
|
|
46
|
+
label: 'Utilities',
|
|
47
|
+
items: [
|
|
48
|
+
'api/utils/file',
|
|
49
|
+
'api/utils/network',
|
|
50
|
+
'api/utils/storage',
|
|
51
|
+
'api/utils/types',
|
|
52
|
+
],
|
|
53
|
+
},
|
|
35
54
|
],
|
|
36
55
|
},
|
|
37
56
|
'configuration',
|
|
@@ -862,6 +862,9 @@ class ToTaskAction(Action):
|
|
|
862
862
|
}
|
|
863
863
|
|
|
864
864
|
inference_data = client.run_plugin(pre_processor_code, inference_payload)
|
|
865
|
+
# Every inference api should return None if failed to inference.
|
|
866
|
+
if inference_data is None:
|
|
867
|
+
return {'success': False, 'error': 'Inference data is None'}
|
|
865
868
|
return {'success': True, 'data': inference_data}
|
|
866
869
|
except Exception as e:
|
|
867
870
|
return {'success': False, 'error': f'Failed to run inference: {str(e)}'}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import shutil
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from io import BytesIO
|
|
@@ -45,6 +46,13 @@ class UploadStatus(str, Enum):
|
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
class UploadRun(Run):
|
|
49
|
+
class UploadEventLog(BaseModel):
|
|
50
|
+
"""Upload event log model."""
|
|
51
|
+
|
|
52
|
+
info: Optional[str] = None
|
|
53
|
+
status: Context
|
|
54
|
+
created: str
|
|
55
|
+
|
|
48
56
|
class DataFileLog(BaseModel):
|
|
49
57
|
"""Data file log model."""
|
|
50
58
|
|
|
@@ -74,6 +82,192 @@ class UploadRun(Run):
|
|
|
74
82
|
failed: int
|
|
75
83
|
success: int
|
|
76
84
|
|
|
85
|
+
LOG_MESSAGES = {
|
|
86
|
+
# Validation errors - show in both log_message and EventLog
|
|
87
|
+
'STORAGE_VALIDATION_FAILED': {
|
|
88
|
+
'message': 'Storage validation failed.',
|
|
89
|
+
'level': Context.DANGER,
|
|
90
|
+
},
|
|
91
|
+
'COLLECTION_VALIDATION_FAILED': {
|
|
92
|
+
'message': 'Collection validation failed.',
|
|
93
|
+
'level': Context.DANGER,
|
|
94
|
+
},
|
|
95
|
+
'PROJECT_VALIDATION_FAILED': {
|
|
96
|
+
'message': 'Project validation failed.',
|
|
97
|
+
'level': Context.DANGER,
|
|
98
|
+
},
|
|
99
|
+
'VALIDATION_FAILED': {
|
|
100
|
+
'message': 'Validation failed.',
|
|
101
|
+
'level': Context.DANGER,
|
|
102
|
+
},
|
|
103
|
+
'NO_FILES_FOUND': {
|
|
104
|
+
'message': 'Files not found on the path.',
|
|
105
|
+
'level': Context.WARNING,
|
|
106
|
+
},
|
|
107
|
+
'NO_FILES_UPLOADED': {
|
|
108
|
+
'message': 'No files were uploaded.',
|
|
109
|
+
'level': Context.WARNING,
|
|
110
|
+
},
|
|
111
|
+
'NO_DATA_UNITS_GENERATED': {
|
|
112
|
+
'message': 'No data units were generated.',
|
|
113
|
+
'level': Context.WARNING,
|
|
114
|
+
},
|
|
115
|
+
'NO_TYPE_DIRECTORIES': {
|
|
116
|
+
'message': 'No type-based directory structure found.',
|
|
117
|
+
'level': Context.INFO,
|
|
118
|
+
},
|
|
119
|
+
'EXCEL_SECURITY_VIOLATION': {
|
|
120
|
+
'message': 'Excel security validation failed: {}',
|
|
121
|
+
'level': Context.DANGER,
|
|
122
|
+
},
|
|
123
|
+
'EXCEL_PARSING_ERROR': {
|
|
124
|
+
'message': 'Excel parsing failed: {}',
|
|
125
|
+
'level': Context.DANGER,
|
|
126
|
+
},
|
|
127
|
+
'EXCEL_METADATA_LOADED': {
|
|
128
|
+
'message': 'Excel metadata loaded for {} files',
|
|
129
|
+
'level': None,
|
|
130
|
+
},
|
|
131
|
+
'UPLOADING_DATA_FILES': {
|
|
132
|
+
'message': 'Uploading data files...',
|
|
133
|
+
'level': None,
|
|
134
|
+
},
|
|
135
|
+
'GENERATING_DATA_UNITS': {
|
|
136
|
+
'message': 'Generating data units...',
|
|
137
|
+
'level': None,
|
|
138
|
+
},
|
|
139
|
+
'IMPORT_COMPLETED': {
|
|
140
|
+
'message': 'Import completed.',
|
|
141
|
+
'level': None,
|
|
142
|
+
},
|
|
143
|
+
'TYPE_DIRECTORIES_FOUND': {
|
|
144
|
+
'message': 'Found type directories: {}',
|
|
145
|
+
'level': None,
|
|
146
|
+
},
|
|
147
|
+
'TYPE_STRUCTURE_DETECTED': {
|
|
148
|
+
'message': 'Detected type-based directory structure',
|
|
149
|
+
'level': None,
|
|
150
|
+
},
|
|
151
|
+
'FILES_DISCOVERED': {
|
|
152
|
+
'message': 'Discovered {} files',
|
|
153
|
+
'level': None,
|
|
154
|
+
},
|
|
155
|
+
'NO_FILES_FOUND_WARNING': {
|
|
156
|
+
'message': 'No files found.',
|
|
157
|
+
'level': Context.WARNING,
|
|
158
|
+
},
|
|
159
|
+
'FILE_UPLOAD_FAILED': {
|
|
160
|
+
'message': 'Failed to upload file: {}',
|
|
161
|
+
'level': Context.DANGER,
|
|
162
|
+
},
|
|
163
|
+
'DATA_UNIT_BATCH_FAILED': {
|
|
164
|
+
'message': 'Failed to create data units batch: {}',
|
|
165
|
+
'level': Context.DANGER,
|
|
166
|
+
},
|
|
167
|
+
'FILENAME_TOO_LONG': {
|
|
168
|
+
'message': 'Skipping file with overly long name: {}...',
|
|
169
|
+
'level': Context.WARNING,
|
|
170
|
+
},
|
|
171
|
+
'MISSING_REQUIRED_FILES': {
|
|
172
|
+
'message': '{} missing required files: {}',
|
|
173
|
+
'level': Context.WARNING,
|
|
174
|
+
},
|
|
175
|
+
'EXCEL_FILE_NOT_FOUND': {
|
|
176
|
+
'message': 'Excel metadata file not found: {}',
|
|
177
|
+
'level': Context.WARNING,
|
|
178
|
+
},
|
|
179
|
+
# Debug information - only for EventLog
|
|
180
|
+
'EXCEL_FILE_VALIDATION_STARTED': {
|
|
181
|
+
'message': 'Excel file validation started: {}',
|
|
182
|
+
'level': Context.INFO,
|
|
183
|
+
},
|
|
184
|
+
'EXCEL_WORKBOOK_LOADED': {
|
|
185
|
+
'message': 'Excel workbook loaded successfully',
|
|
186
|
+
'level': Context.INFO,
|
|
187
|
+
},
|
|
188
|
+
'FILE_ORGANIZATION_STARTED': {
|
|
189
|
+
'message': 'File organization started for directory: {}',
|
|
190
|
+
'level': Context.INFO,
|
|
191
|
+
},
|
|
192
|
+
'BATCH_PROCESSING_STARTED': {
|
|
193
|
+
'message': 'Batch processing started: {} batches of {} items each',
|
|
194
|
+
'level': Context.INFO,
|
|
195
|
+
},
|
|
196
|
+
'EXCEL_SECURITY_VALIDATION_STARTED': {
|
|
197
|
+
'message': 'Excel security validation started for file size: {} bytes',
|
|
198
|
+
'level': Context.INFO,
|
|
199
|
+
},
|
|
200
|
+
'EXCEL_MEMORY_ESTIMATION': {
|
|
201
|
+
'message': 'Excel memory estimation: {} bytes (file) * 3 = {} bytes (estimated)',
|
|
202
|
+
'level': Context.INFO,
|
|
203
|
+
},
|
|
204
|
+
'EXCEL_FILE_NOT_FOUND_PATH': {
|
|
205
|
+
'message': 'Excel metadata file not found: {}',
|
|
206
|
+
'level': Context.WARNING,
|
|
207
|
+
},
|
|
208
|
+
'EXCEL_SECURITY_VALIDATION_FAILED': {
|
|
209
|
+
'message': 'Excel security validation failed: {}',
|
|
210
|
+
'level': Context.DANGER,
|
|
211
|
+
},
|
|
212
|
+
'EXCEL_PARSING_FAILED': {
|
|
213
|
+
'message': 'Excel parsing failed: {}',
|
|
214
|
+
'level': Context.DANGER,
|
|
215
|
+
},
|
|
216
|
+
'EXCEL_INVALID_FILE_FORMAT': {
|
|
217
|
+
'message': 'Invalid Excel file format: {}',
|
|
218
|
+
'level': Context.DANGER,
|
|
219
|
+
},
|
|
220
|
+
'EXCEL_FILE_TOO_LARGE': {
|
|
221
|
+
'message': 'Excel file too large to process (memory limit exceeded)',
|
|
222
|
+
'level': Context.DANGER,
|
|
223
|
+
},
|
|
224
|
+
'EXCEL_FILE_ACCESS_ERROR': {
|
|
225
|
+
'message': 'File access error reading excel metadata: {}',
|
|
226
|
+
'level': Context.DANGER,
|
|
227
|
+
},
|
|
228
|
+
'EXCEL_UNEXPECTED_ERROR': {
|
|
229
|
+
'message': 'Unexpected error reading excel metadata: {}',
|
|
230
|
+
'level': Context.DANGER,
|
|
231
|
+
},
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
def log_message_with_code(self, code: str, *args, level: Optional[Context] = None):
|
|
235
|
+
"""Unified logging method that handles both log_message and EventLog based on configuration."""
|
|
236
|
+
if code not in self.LOG_MESSAGES:
|
|
237
|
+
self.log_message(f'Unknown log code: {code}')
|
|
238
|
+
self.log_upload_event('UNKNOWN_LOG_CODE', code)
|
|
239
|
+
return
|
|
240
|
+
|
|
241
|
+
log_config = self.LOG_MESSAGES[code]
|
|
242
|
+
message = log_config['message'].format(*args) if args else log_config['message']
|
|
243
|
+
log_level = level or log_config['level'] or Context.INFO
|
|
244
|
+
|
|
245
|
+
# Log to message if configured
|
|
246
|
+
if log_level == Context.INFO.value:
|
|
247
|
+
self.log_message(message, context=log_level.value)
|
|
248
|
+
else:
|
|
249
|
+
self.log_upload_event(code, *args, level)
|
|
250
|
+
|
|
251
|
+
def log_upload_event(self, code: str, *args, level: Optional[Context] = None):
|
|
252
|
+
"""Log upload event using predefined code."""
|
|
253
|
+
if code not in self.LOG_MESSAGES:
|
|
254
|
+
now = datetime.now().isoformat()
|
|
255
|
+
self.log(
|
|
256
|
+
'upload_event',
|
|
257
|
+
self.UploadEventLog(info=f'Unknown log code: {code}', status=Context.DANGER, created=now).model_dump(),
|
|
258
|
+
)
|
|
259
|
+
return
|
|
260
|
+
|
|
261
|
+
log_config = self.LOG_MESSAGES[code]
|
|
262
|
+
message = log_config['message'].format(*args) if args else log_config['message']
|
|
263
|
+
log_level = level or log_config['level'] or Context.INFO
|
|
264
|
+
|
|
265
|
+
now = datetime.now().isoformat()
|
|
266
|
+
self.log(
|
|
267
|
+
'upload_event',
|
|
268
|
+
self.UploadEventLog(info=message, status=log_level, created=now).model_dump(),
|
|
269
|
+
)
|
|
270
|
+
|
|
77
271
|
def log_data_file(self, data_file_info: dict, status: UploadStatus):
|
|
78
272
|
"""Upload data_file log.
|
|
79
273
|
|
|
@@ -465,9 +659,7 @@ class UploadAction(Action):
|
|
|
465
659
|
|
|
466
660
|
file_name = str(row[0]).strip()
|
|
467
661
|
if not self.excel_utils.is_valid_filename_length(file_name):
|
|
468
|
-
self.run.
|
|
469
|
-
f'Skipping file with overly long name: {file_name[:50]}...', context=Context.WARNING.value
|
|
470
|
-
)
|
|
662
|
+
self.run.log_message_with_code('FILENAME_TOO_LONG', file_name[:50])
|
|
471
663
|
return None
|
|
472
664
|
|
|
473
665
|
# Create metadata dictionary from remaining columns
|
|
@@ -596,7 +788,7 @@ class UploadAction(Action):
|
|
|
596
788
|
if self.params.get('excel_metadata_path'):
|
|
597
789
|
excel_path = pathlib_cwd / self.params['excel_metadata_path']
|
|
598
790
|
if not excel_path.exists():
|
|
599
|
-
self.run.
|
|
791
|
+
self.run.log_message_with_code('EXCEL_FILE_NOT_FOUND_PATH', str(excel_path))
|
|
600
792
|
return {}
|
|
601
793
|
else:
|
|
602
794
|
# Look for default meta.xlsx or meta.xls
|
|
@@ -606,33 +798,36 @@ class UploadAction(Action):
|
|
|
606
798
|
return {}
|
|
607
799
|
|
|
608
800
|
try:
|
|
801
|
+
self.run.log_message_with_code('EXCEL_FILE_VALIDATION_STARTED', str(excel_path))
|
|
802
|
+
|
|
609
803
|
# Prepare Excel file with security validation
|
|
610
804
|
excel_stream = self._prepare_excel_file(excel_path)
|
|
611
805
|
|
|
612
806
|
# Load and process workbook
|
|
613
807
|
workbook = load_workbook(excel_stream, read_only=True, data_only=True)
|
|
614
808
|
try:
|
|
809
|
+
self.run.log_message_with_code('EXCEL_WORKBOOK_LOADED')
|
|
615
810
|
return self._process_excel_worksheet(workbook.active)
|
|
616
811
|
finally:
|
|
617
812
|
workbook.close()
|
|
618
813
|
|
|
619
814
|
except ExcelSecurityError as e:
|
|
620
|
-
self.run.
|
|
815
|
+
self.run.log_message_with_code('EXCEL_SECURITY_VALIDATION_FAILED', str(e))
|
|
621
816
|
raise
|
|
622
817
|
except ExcelParsingError as e:
|
|
623
|
-
self.run.
|
|
818
|
+
self.run.log_message_with_code('EXCEL_PARSING_FAILED', str(e))
|
|
624
819
|
raise
|
|
625
820
|
except InvalidFileException as e:
|
|
626
|
-
self.run.
|
|
821
|
+
self.run.log_message_with_code('EXCEL_INVALID_FILE_FORMAT', str(e))
|
|
627
822
|
raise ExcelParsingError(f'Invalid Excel file format: {str(e)}')
|
|
628
823
|
except MemoryError:
|
|
629
|
-
self.run.
|
|
824
|
+
self.run.log_message_with_code('EXCEL_FILE_TOO_LARGE')
|
|
630
825
|
raise ExcelSecurityError('Excel file exceeds memory limits')
|
|
631
826
|
except (OSError, IOError) as e:
|
|
632
|
-
self.run.
|
|
827
|
+
self.run.log_message_with_code('EXCEL_FILE_ACCESS_ERROR', str(e))
|
|
633
828
|
raise ExcelParsingError(f'File access error: {str(e)}')
|
|
634
829
|
except Exception as e:
|
|
635
|
-
self.run.
|
|
830
|
+
self.run.log_message_with_code('EXCEL_UNEXPECTED_ERROR', str(e))
|
|
636
831
|
raise ExcelParsingError(f'Unexpected error: {str(e)}')
|
|
637
832
|
|
|
638
833
|
def start(self) -> Dict[str, Any]:
|
|
@@ -653,22 +848,20 @@ class UploadAction(Action):
|
|
|
653
848
|
try:
|
|
654
849
|
excel_metadata = self._read_excel_metadata(pathlib_cwd)
|
|
655
850
|
if excel_metadata:
|
|
656
|
-
self.run.
|
|
851
|
+
self.run.log_message_with_code('EXCEL_METADATA_LOADED', len(excel_metadata))
|
|
657
852
|
except ExcelSecurityError as e:
|
|
658
853
|
# Security violations should stop the process entirely
|
|
659
|
-
self.run.
|
|
660
|
-
self.run.log_message('Upload aborted due to Excel security concerns.', context=Context.ERROR.value)
|
|
854
|
+
self.run.log_message_with_code('EXCEL_SECURITY_VIOLATION', str(e))
|
|
661
855
|
return result
|
|
662
856
|
except ExcelParsingError as e:
|
|
663
857
|
# Parsing errors can be non-critical if user didn't explicitly provide Excel file
|
|
664
858
|
if self.params.get('excel_metadata_path'):
|
|
665
859
|
# User explicitly provided Excel file, treat as error
|
|
666
|
-
self.run.
|
|
667
|
-
self.run.log_message('Upload aborted due to Excel parsing failure.', context=Context.ERROR.value)
|
|
860
|
+
self.run.log_message_with_code('EXCEL_PARSING_ERROR', str(e))
|
|
668
861
|
return result
|
|
669
862
|
else:
|
|
670
863
|
# Default Excel file found but failed to parse, treat as warning and continue
|
|
671
|
-
self.run.
|
|
864
|
+
self.run.log_message_with_code('EXCEL_PARSING_ERROR', str(e))
|
|
672
865
|
excel_metadata = {}
|
|
673
866
|
|
|
674
867
|
# Analyze Collection file specifications to determine the data structure for upload.
|
|
@@ -684,29 +877,32 @@ class UploadAction(Action):
|
|
|
684
877
|
|
|
685
878
|
# Validate the organized files
|
|
686
879
|
if not self._validate_organized_files(organized_files, file_specification_template):
|
|
687
|
-
self.run.
|
|
880
|
+
self.run.log_message_with_code('VALIDATION_FAILED')
|
|
688
881
|
raise ActionError('Upload is aborted due to validation errors.')
|
|
689
882
|
|
|
690
883
|
# Upload files to synapse-backend.
|
|
691
884
|
if not organized_files:
|
|
692
|
-
self.run.
|
|
885
|
+
self.run.log_message_with_code('NO_FILES_FOUND')
|
|
693
886
|
raise ActionError('Upload is aborted due to missing files.')
|
|
694
887
|
uploaded_files = self._upload_files(organized_files)
|
|
695
888
|
result['uploaded_files_count'] = len(uploaded_files)
|
|
696
889
|
|
|
697
890
|
# Generate data units for the uploaded data.
|
|
698
891
|
if not uploaded_files:
|
|
699
|
-
self.run.
|
|
892
|
+
self.run.log_message_with_code('NO_FILES_UPLOADED')
|
|
700
893
|
raise ActionError('Upload is aborted due to no uploaded files.')
|
|
701
894
|
generated_data_units = self._generate_data_units(uploaded_files)
|
|
702
895
|
result['generated_data_units_count'] = len(generated_data_units)
|
|
703
896
|
|
|
704
897
|
# Setup task with uploaded synapse-backend data units.
|
|
705
898
|
if not generated_data_units:
|
|
706
|
-
self.run.
|
|
899
|
+
self.run.log_message_with_code('NO_DATA_UNITS_GENERATED')
|
|
707
900
|
raise ActionError('Upload is aborted due to no generated data units.')
|
|
708
901
|
|
|
709
|
-
|
|
902
|
+
# Clean up if temp dir exists
|
|
903
|
+
self._cleanup_temp_directory()
|
|
904
|
+
|
|
905
|
+
self.run.log_message_with_code('IMPORT_COMPLETED')
|
|
710
906
|
return result
|
|
711
907
|
|
|
712
908
|
def _analyze_collection(self) -> Dict[str, Any]:
|
|
@@ -736,7 +932,7 @@ class UploadAction(Action):
|
|
|
736
932
|
# Initialize progress
|
|
737
933
|
organized_files_count = len(organized_files)
|
|
738
934
|
self.run.set_progress(0, organized_files_count, category='upload_data_files')
|
|
739
|
-
self.run.
|
|
935
|
+
self.run.log_message_with_code('UPLOADING_DATA_FILES')
|
|
740
936
|
|
|
741
937
|
client = self.run.client
|
|
742
938
|
collection_id = self.params['data_collection']
|
|
@@ -756,7 +952,7 @@ class UploadAction(Action):
|
|
|
756
952
|
upload_result.append(uploaded_data_file)
|
|
757
953
|
except Exception as e:
|
|
758
954
|
self.run.log_data_file(organized_file, UploadStatus.FAILED)
|
|
759
|
-
self.run.
|
|
955
|
+
self.run.log_message_with_code('FILE_UPLOAD_FAILED', str(e))
|
|
760
956
|
failed_count += 1
|
|
761
957
|
|
|
762
958
|
current_progress += 1
|
|
@@ -779,7 +975,7 @@ class UploadAction(Action):
|
|
|
779
975
|
# Initialize progress
|
|
780
976
|
upload_result_count = len(uploaded_files)
|
|
781
977
|
self.run.set_progress(0, upload_result_count, category='generate_data_units')
|
|
782
|
-
self.run.
|
|
978
|
+
self.run.log_message_with_code('GENERATING_DATA_UNITS')
|
|
783
979
|
|
|
784
980
|
client = self.run.client
|
|
785
981
|
generated_data_units = []
|
|
@@ -804,7 +1000,7 @@ class UploadAction(Action):
|
|
|
804
1000
|
)
|
|
805
1001
|
except Exception as e:
|
|
806
1002
|
failed_count += len(batch)
|
|
807
|
-
self.run.
|
|
1003
|
+
self.run.log_message_with_code('DATA_UNIT_BATCH_FAILED', str(e))
|
|
808
1004
|
for _ in batch:
|
|
809
1005
|
self.run.log_data_unit(None, UploadStatus.FAILED, data_unit_meta=None)
|
|
810
1006
|
|
|
@@ -862,14 +1058,15 @@ class UploadAction(Action):
|
|
|
862
1058
|
type_dirs[spec_name] = spec_dir
|
|
863
1059
|
|
|
864
1060
|
if type_dirs:
|
|
865
|
-
self.run.
|
|
1061
|
+
self.run.log_message_with_code('TYPE_DIRECTORIES_FOUND', list(type_dirs.keys()))
|
|
866
1062
|
|
|
867
1063
|
# If type-based directories don't exist, exit early
|
|
868
1064
|
if not type_dirs:
|
|
869
|
-
self.run.
|
|
1065
|
+
self.run.log_message_with_code('NO_TYPE_DIRECTORIES')
|
|
870
1066
|
return organized_files
|
|
871
1067
|
|
|
872
|
-
self.run.
|
|
1068
|
+
self.run.log_message_with_code('TYPE_STRUCTURE_DETECTED')
|
|
1069
|
+
self.run.log_message_with_code('FILE_ORGANIZATION_STARTED', str(directory))
|
|
873
1070
|
|
|
874
1071
|
# Collect and process files in a single pass
|
|
875
1072
|
dataset_files = {}
|
|
@@ -894,10 +1091,10 @@ class UploadAction(Action):
|
|
|
894
1091
|
dataset_files[file_name][spec_name] = file_path
|
|
895
1092
|
|
|
896
1093
|
if not dataset_files:
|
|
897
|
-
self.run.
|
|
1094
|
+
self.run.log_message_with_code('NO_FILES_FOUND_WARNING')
|
|
898
1095
|
return organized_files
|
|
899
1096
|
|
|
900
|
-
self.run.
|
|
1097
|
+
self.run.log_message_with_code('FILES_DISCOVERED', len(dataset_files))
|
|
901
1098
|
|
|
902
1099
|
# Organize datasets - check requirements and create metadata
|
|
903
1100
|
for file_name, files_dict in sorted(dataset_files.items()):
|
|
@@ -926,13 +1123,24 @@ class UploadAction(Action):
|
|
|
926
1123
|
organized_files.append({'files': files_dict, 'meta': meta_data})
|
|
927
1124
|
else:
|
|
928
1125
|
missing = [req for req in required_specs if req not in files_dict]
|
|
929
|
-
self.run.
|
|
930
|
-
f'{file_name} missing required files: {", ".join(missing)}',
|
|
931
|
-
context=Context.WARNING.value,
|
|
932
|
-
)
|
|
1126
|
+
self.run.log_message_with_code('MISSING_REQUIRED_FILES', file_name, ', '.join(missing))
|
|
933
1127
|
|
|
934
1128
|
return organized_files
|
|
935
1129
|
|
|
1130
|
+
def _cleanup_temp_directory(self, temp_path: Optional[Path] = None) -> None:
|
|
1131
|
+
"""Clean up temporary directory.
|
|
1132
|
+
|
|
1133
|
+
Args:
|
|
1134
|
+
temp_path (Optional[Path]): Path to temporary directory.
|
|
1135
|
+
If None, uses default temp directory in current working directory.
|
|
1136
|
+
"""
|
|
1137
|
+
if temp_path is None:
|
|
1138
|
+
temp_path = Path(os.getcwd()) / 'temp'
|
|
1139
|
+
|
|
1140
|
+
if temp_path.exists():
|
|
1141
|
+
shutil.rmtree(temp_path, ignore_errors=True)
|
|
1142
|
+
self.run.log_message(f'Cleaned up temporary directory: {temp_path}')
|
|
1143
|
+
|
|
936
1144
|
def _update_metrics(self, total_count: int, success_count: int, failed_count: int, category: str):
|
|
937
1145
|
"""Update metrics for upload progress.
|
|
938
1146
|
|
synapse_sdk/utils/network.py
CHANGED
|
@@ -1,5 +1,277 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import queue as queue_module
|
|
3
|
+
import re
|
|
4
|
+
import ssl
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Dict, Generator, Optional
|
|
1
8
|
from urllib.parse import urlparse, urlunparse
|
|
2
9
|
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
from synapse_sdk.clients.exceptions import ClientError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class StreamLimits:
|
|
17
|
+
"""Configuration for streaming limits."""
|
|
18
|
+
|
|
19
|
+
max_messages: int = 10000
|
|
20
|
+
max_lines: int = 50000
|
|
21
|
+
max_bytes: int = 50 * 1024 * 1024 # 50MB
|
|
22
|
+
max_message_size: int = 10240 # 10KB
|
|
23
|
+
queue_size: int = 1000
|
|
24
|
+
exception_queue_size: int = 10
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def validate_resource_id(resource_id: Any, resource_name: str = 'resource') -> str:
|
|
28
|
+
"""Validate resource ID to prevent injection attacks."""
|
|
29
|
+
if not resource_id:
|
|
30
|
+
raise ClientError(400, f'{resource_name} ID cannot be empty')
|
|
31
|
+
|
|
32
|
+
# Allow numeric IDs and UUID formats
|
|
33
|
+
id_str = str(resource_id)
|
|
34
|
+
if not re.match(r'^[a-zA-Z0-9\-_]+$', id_str):
|
|
35
|
+
raise ClientError(400, f'Invalid {resource_name} ID format')
|
|
36
|
+
|
|
37
|
+
if len(id_str) > 100:
|
|
38
|
+
raise ClientError(400, f'{resource_name} ID too long')
|
|
39
|
+
|
|
40
|
+
return id_str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def validate_timeout(timeout: Any, max_timeout: int = 300) -> float:
|
|
44
|
+
"""Validate timeout value with bounds checking."""
|
|
45
|
+
if not isinstance(timeout, (int, float)) or timeout <= 0:
|
|
46
|
+
raise ClientError(400, 'Timeout must be a positive number')
|
|
47
|
+
|
|
48
|
+
if timeout > max_timeout:
|
|
49
|
+
raise ClientError(400, f'Timeout cannot exceed {max_timeout} seconds')
|
|
50
|
+
|
|
51
|
+
return float(timeout)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def sanitize_error_message(error_msg: str, context: str = '') -> str:
|
|
55
|
+
"""Sanitize error messages to prevent information disclosure."""
|
|
56
|
+
sanitized = str(error_msg)[:100]
|
|
57
|
+
# Remove any potential sensitive information
|
|
58
|
+
sanitized = re.sub(r'["\']([^"\']*)["\']', '"[REDACTED]"', sanitized)
|
|
59
|
+
|
|
60
|
+
if context:
|
|
61
|
+
return f'{context}: {sanitized}'
|
|
62
|
+
return sanitized
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def http_to_websocket_url(url: str) -> str:
|
|
66
|
+
"""Convert HTTP/HTTPS URL to WebSocket URL safely."""
|
|
67
|
+
try:
|
|
68
|
+
parsed = urlparse(url)
|
|
69
|
+
if parsed.scheme == 'http':
|
|
70
|
+
ws_scheme = 'ws'
|
|
71
|
+
elif parsed.scheme == 'https':
|
|
72
|
+
ws_scheme = 'wss'
|
|
73
|
+
else:
|
|
74
|
+
raise ClientError(400, f'Invalid URL scheme: {parsed.scheme}')
|
|
75
|
+
|
|
76
|
+
ws_url = urlunparse((ws_scheme, parsed.netloc, parsed.path, parsed.params, parsed.query, parsed.fragment))
|
|
77
|
+
return ws_url
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise ClientError(400, f'Invalid URL format: {str(e)[:50]}')
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def check_library_available(library_name: str) -> bool:
|
|
83
|
+
"""Check if optional library is available."""
|
|
84
|
+
try:
|
|
85
|
+
__import__(library_name)
|
|
86
|
+
return True
|
|
87
|
+
except ImportError:
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class WebSocketStreamManager:
|
|
92
|
+
"""Manages secure WebSocket streaming with rate limiting and error handling."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, thread_pool: ThreadPoolExecutor, limits: Optional[StreamLimits] = None):
|
|
95
|
+
self.thread_pool = thread_pool
|
|
96
|
+
self.limits = limits or StreamLimits()
|
|
97
|
+
|
|
98
|
+
def stream_logs(
|
|
99
|
+
self, ws_url: str, headers: Dict[str, str], timeout: float, context: str
|
|
100
|
+
) -> Generator[str, None, None]:
|
|
101
|
+
"""Stream logs from WebSocket with proper error handling and cleanup."""
|
|
102
|
+
if not check_library_available('websockets'):
|
|
103
|
+
raise ClientError(500, 'websockets library not available for WebSocket connections')
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
import websockets
|
|
107
|
+
|
|
108
|
+
# Use bounded queues to prevent memory exhaustion
|
|
109
|
+
message_queue = queue_module.Queue(maxsize=self.limits.queue_size)
|
|
110
|
+
exception_queue = queue_module.Queue(maxsize=self.limits.exception_queue_size)
|
|
111
|
+
|
|
112
|
+
async def websocket_client():
|
|
113
|
+
try:
|
|
114
|
+
# Add SSL verification and proper timeouts
|
|
115
|
+
connect_kwargs = {
|
|
116
|
+
'extra_headers': headers,
|
|
117
|
+
'close_timeout': timeout,
|
|
118
|
+
'ping_timeout': timeout,
|
|
119
|
+
'ping_interval': timeout // 2,
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
# For secure connections, add SSL context
|
|
123
|
+
if ws_url.startswith('wss://'):
|
|
124
|
+
ssl_context = ssl.create_default_context()
|
|
125
|
+
ssl_context.check_hostname = True
|
|
126
|
+
ssl_context.verify_mode = ssl.CERT_REQUIRED
|
|
127
|
+
connect_kwargs['ssl'] = ssl_context
|
|
128
|
+
|
|
129
|
+
async with websockets.connect(ws_url, **connect_kwargs) as websocket:
|
|
130
|
+
message_count = 0
|
|
131
|
+
|
|
132
|
+
async for message in websocket:
|
|
133
|
+
message_count += 1
|
|
134
|
+
if message_count > self.limits.max_messages:
|
|
135
|
+
exception_queue.put_nowait(ClientError(429, f'Message limit exceeded for {context}'))
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
# Validate message size
|
|
139
|
+
if len(str(message)) > self.limits.max_message_size:
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
message_queue.put_nowait(f'{message}\n')
|
|
144
|
+
except queue_module.Full:
|
|
145
|
+
exception_queue.put_nowait(ClientError(429, f'Message queue full for {context}'))
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
message_queue.put_nowait(None) # Signal end
|
|
149
|
+
|
|
150
|
+
except websockets.exceptions.ConnectionClosed:
|
|
151
|
+
exception_queue.put_nowait(ClientError(503, f'WebSocket connection closed for {context}'))
|
|
152
|
+
except asyncio.TimeoutError:
|
|
153
|
+
exception_queue.put_nowait(ClientError(408, f'WebSocket timed out for {context}'))
|
|
154
|
+
except Exception as e:
|
|
155
|
+
sanitized_error = sanitize_error_message(str(e), context)
|
|
156
|
+
exception_queue.put_nowait(ClientError(500, sanitized_error))
|
|
157
|
+
|
|
158
|
+
# Use thread pool instead of raw threading
|
|
159
|
+
future = self.thread_pool.submit(lambda: asyncio.run(websocket_client()))
|
|
160
|
+
|
|
161
|
+
# Yield messages with proper cleanup
|
|
162
|
+
try:
|
|
163
|
+
while True:
|
|
164
|
+
# Check for exceptions first
|
|
165
|
+
try:
|
|
166
|
+
exception = exception_queue.get_nowait()
|
|
167
|
+
raise exception
|
|
168
|
+
except queue_module.Empty:
|
|
169
|
+
pass
|
|
170
|
+
|
|
171
|
+
# Get message with timeout
|
|
172
|
+
try:
|
|
173
|
+
message = message_queue.get(timeout=1.0)
|
|
174
|
+
if message is None: # End signal
|
|
175
|
+
break
|
|
176
|
+
yield message
|
|
177
|
+
except queue_module.Empty:
|
|
178
|
+
# Check if future is done
|
|
179
|
+
if future.done():
|
|
180
|
+
try:
|
|
181
|
+
future.result() # This will raise any exception
|
|
182
|
+
break # Normal completion
|
|
183
|
+
except Exception:
|
|
184
|
+
break # Error already in queue
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
finally:
|
|
188
|
+
# Cleanup: cancel future if still running
|
|
189
|
+
if not future.done():
|
|
190
|
+
future.cancel()
|
|
191
|
+
|
|
192
|
+
except ImportError:
|
|
193
|
+
raise ClientError(500, 'websockets library not available for WebSocket connections')
|
|
194
|
+
except Exception as e:
|
|
195
|
+
if isinstance(e, ClientError):
|
|
196
|
+
raise
|
|
197
|
+
sanitized_error = sanitize_error_message(str(e), context)
|
|
198
|
+
raise ClientError(500, sanitized_error)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class HTTPStreamManager:
|
|
202
|
+
"""Manages HTTP streaming with rate limiting and proper resource cleanup."""
|
|
203
|
+
|
|
204
|
+
def __init__(self, requests_session: requests.Session, limits: Optional[StreamLimits] = None):
|
|
205
|
+
self.requests_session = requests_session
|
|
206
|
+
self.limits = limits or StreamLimits()
|
|
207
|
+
|
|
208
|
+
def stream_logs(
|
|
209
|
+
self, url: str, headers: Dict[str, str], timeout: tuple, context: str
|
|
210
|
+
) -> Generator[str, None, None]:
|
|
211
|
+
"""Stream logs from HTTP endpoint with proper error handling and cleanup."""
|
|
212
|
+
response = None
|
|
213
|
+
try:
|
|
214
|
+
# Use timeout for streaming to prevent hanging
|
|
215
|
+
response = self.requests_session.get(url, headers=headers, stream=True, timeout=timeout)
|
|
216
|
+
response.raise_for_status()
|
|
217
|
+
|
|
218
|
+
# Set up streaming with timeout and size limits
|
|
219
|
+
line_count = 0
|
|
220
|
+
total_bytes = 0
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
for line in response.iter_lines(decode_unicode=True, chunk_size=1024):
|
|
224
|
+
if line:
|
|
225
|
+
line_count += 1
|
|
226
|
+
total_bytes += len(line.encode('utf-8'))
|
|
227
|
+
|
|
228
|
+
# Rate limiting checks
|
|
229
|
+
if line_count > self.limits.max_lines:
|
|
230
|
+
raise ClientError(429, f'Line limit exceeded for {context}')
|
|
231
|
+
|
|
232
|
+
if total_bytes > self.limits.max_bytes:
|
|
233
|
+
raise ClientError(429, f'Size limit exceeded for {context}')
|
|
234
|
+
|
|
235
|
+
# Validate line size
|
|
236
|
+
if len(line) > self.limits.max_message_size:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
yield f'{line}\n'
|
|
240
|
+
|
|
241
|
+
except requests.exceptions.ChunkedEncodingError:
|
|
242
|
+
raise ClientError(503, f'Log stream interrupted for {context}')
|
|
243
|
+
except requests.exceptions.ReadTimeout:
|
|
244
|
+
raise ClientError(408, f'Log stream timed out for {context}')
|
|
245
|
+
|
|
246
|
+
except requests.exceptions.ConnectTimeout:
|
|
247
|
+
raise ClientError(408, f'Failed to connect to log stream for {context}')
|
|
248
|
+
except requests.exceptions.ReadTimeout:
|
|
249
|
+
raise ClientError(408, f'Log stream read timeout for {context}')
|
|
250
|
+
except requests.exceptions.ConnectionError as e:
|
|
251
|
+
if 'Connection refused' in str(e):
|
|
252
|
+
raise ClientError(503, f'Agent connection refused for {context}')
|
|
253
|
+
else:
|
|
254
|
+
sanitized_error = sanitize_error_message(str(e), context)
|
|
255
|
+
raise ClientError(503, f'Agent connection error: {sanitized_error}')
|
|
256
|
+
except requests.exceptions.HTTPError as e:
|
|
257
|
+
if hasattr(e.response, 'status_code'):
|
|
258
|
+
status_code = e.response.status_code
|
|
259
|
+
else:
|
|
260
|
+
status_code = 500
|
|
261
|
+
raise ClientError(status_code, f'HTTP error streaming logs for {context}')
|
|
262
|
+
except Exception as e:
|
|
263
|
+
if isinstance(e, ClientError):
|
|
264
|
+
raise
|
|
265
|
+
sanitized_error = sanitize_error_message(str(e), context)
|
|
266
|
+
raise ClientError(500, sanitized_error)
|
|
267
|
+
finally:
|
|
268
|
+
# Ensure response is properly closed
|
|
269
|
+
if response is not None:
|
|
270
|
+
try:
|
|
271
|
+
response.close()
|
|
272
|
+
except Exception:
|
|
273
|
+
pass # Ignore cleanup errors
|
|
274
|
+
|
|
3
275
|
|
|
4
276
|
def clean_url(url, remove_query_params=True, remove_fragment=True):
|
|
5
277
|
parsed = urlparse(url)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: synapse-sdk
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.0b10
|
|
4
4
|
Summary: synapse sdk
|
|
5
5
|
Author-email: datamaker <developer@datamaker.io>
|
|
6
6
|
License: MIT
|
|
@@ -22,6 +22,7 @@ Requires-Dist: universal-pathlib
|
|
|
22
22
|
Requires-Dist: fsspec[gcs,s3,sftp]
|
|
23
23
|
Requires-Dist: inquirer
|
|
24
24
|
Requires-Dist: pillow
|
|
25
|
+
Requires-Dist: websockets
|
|
25
26
|
Provides-Extra: all
|
|
26
27
|
Requires-Dist: ray[all]==2.44.1; extra == "all"
|
|
27
28
|
Requires-Dist: python-nmap; extra == "all"
|
|
@@ -29,7 +29,7 @@ synapse_sdk/clients/exceptions.py,sha256=ylv7x10eOp4aA3a48jwonnvqvkiYwzJYXjkVkRT
|
|
|
29
29
|
synapse_sdk/clients/utils.py,sha256=8pPJTdzHiRPSbZMoQYHAgR2BAMO6u_R_jMV6a2p34iQ,392
|
|
30
30
|
synapse_sdk/clients/agent/__init__.py,sha256=FqYbtzMJdzRfuU2SA-Yxdc0JKmVP1wxH6OlUNmB4lH8,2230
|
|
31
31
|
synapse_sdk/clients/agent/core.py,sha256=aeMSzf8BF7LjVcmHaL8zC7ofBZUff8kIeqkW1xUJ6Sk,745
|
|
32
|
-
synapse_sdk/clients/agent/ray.py,sha256=
|
|
32
|
+
synapse_sdk/clients/agent/ray.py,sha256=LRLX0ypz9qe6e8F4O8ffyhSMlvSfupz5gUzI1yIZYrw,11978
|
|
33
33
|
synapse_sdk/clients/agent/service.py,sha256=s7KuPK_DB1nr2VHrigttV1WyFonaGHNrPvU8loRxHcE,478
|
|
34
34
|
synapse_sdk/clients/backend/__init__.py,sha256=9FzjQn0ljRhtdaoG3n38Mdgte7GFwIh4OtEmoqVg2_E,2098
|
|
35
35
|
synapse_sdk/clients/backend/annotation.py,sha256=4MfX_ubDw6UxU_TFEWLqjqOT1lS8RYD29f5bJs0TfF4,1245
|
|
@@ -53,7 +53,7 @@ synapse_sdk/devtools/docs/README.md,sha256=yBzWf0K1ef4oymFXDaHo0nYWEgMQJqsOyrkNh
|
|
|
53
53
|
synapse_sdk/devtools/docs/docusaurus.config.ts,sha256=zH1dxP4hmKsYWTo1OYMBIeE0Cj5dC8KChVtkORNQB5E,3129
|
|
54
54
|
synapse_sdk/devtools/docs/package-lock.json,sha256=FfI_0BxNDB3tupLYxsH2PY4ogaBL6p22ujFWy8rRu88,653289
|
|
55
55
|
synapse_sdk/devtools/docs/package.json,sha256=9UcQc49bh7CM1e8Cc4XYPWxRvHaZSXkecheWIC8Aohk,1144
|
|
56
|
-
synapse_sdk/devtools/docs/sidebars.ts,sha256
|
|
56
|
+
synapse_sdk/devtools/docs/sidebars.ts,sha256=-DteBl5FSI1bdg9eHC__Q3LjcIUGYUhKjVoz9Xpc0u0,1443
|
|
57
57
|
synapse_sdk/devtools/docs/tsconfig.json,sha256=O9BNlRPjPiaVHW2_boShMbmTnh0Z2k0KQO6Alf9FMVY,215
|
|
58
58
|
synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md,sha256=iP7gl_FPqo-qX13lkSRcRoT6ayJNmCkXoyvlm7GH248,312
|
|
59
59
|
synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md,sha256=cM-dhhTeurEWMcdn0Kx-NpNts2YUUraSI_XFk_gVHEE,3122
|
|
@@ -141,7 +141,7 @@ synapse_sdk/plugins/categories/post_annotation/templates/plugin/post_annotation.
|
|
|
141
141
|
synapse_sdk/plugins/categories/pre_annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
142
142
|
synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
143
|
synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation.py,sha256=6ib3RmnGrjpsQ0e_G-mRH1lfFunQ3gh2M831vuDn7HU,344
|
|
144
|
-
synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py,sha256=
|
|
144
|
+
synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py,sha256=trdlqnBlUSRrAZisnGw-aSxaiMZECSi05zuRyJxfbLQ,40516
|
|
145
145
|
synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml,sha256=VREoCp9wsvZ8T2E1d_MEKlR8TC_herDJGVQtu3ezAYU,589
|
|
146
146
|
synapse_sdk/plugins/categories/pre_annotation/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
147
|
synapse_sdk/plugins/categories/pre_annotation/templates/plugin/pre_annotation.py,sha256=HBHxHuv2gMBzDB2alFfrzI_SZ1Ztk6mo7eFbR5GqHKw,106
|
|
@@ -154,7 +154,7 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
|
|
|
154
154
|
synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
|
|
155
155
|
synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
156
|
synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
157
|
-
synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=
|
|
157
|
+
synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=XIKbXqzarSy6Rt41g03pkq0loAkKWBTX4yv47E25_rs,45203
|
|
158
158
|
synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=6_dRa0_J2aS8NSUfO4MKbPxZcdPS2FpJzzp51edYAZc,281
|
|
159
159
|
synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
160
160
|
synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
|
|
@@ -184,7 +184,7 @@ synapse_sdk/utils/encryption.py,sha256=KMARrAk5aIHfBLC8CvdXiSIuaGvxljluubjF9PVLf
|
|
|
184
184
|
synapse_sdk/utils/file.py,sha256=wWBQAx0cB5a-fjfRMeJV-KjBil1ZyKRz-vXno3xBSoo,6834
|
|
185
185
|
synapse_sdk/utils/http.py,sha256=yRxYfru8tMnBVeBK-7S0Ga13yOf8oRHquG5e8K_FWcI,4759
|
|
186
186
|
synapse_sdk/utils/module_loading.py,sha256=chHpU-BZjtYaTBD_q0T7LcKWtqKvYBS4L0lPlKkoMQ8,1020
|
|
187
|
-
synapse_sdk/utils/network.py,sha256=
|
|
187
|
+
synapse_sdk/utils/network.py,sha256=4zWUbuEPcMXb_NVVeuUJ8zSl10ZAlWVHfzqbR25aez8,11978
|
|
188
188
|
synapse_sdk/utils/string.py,sha256=rEwuZ9SAaZLcQ8TYiwNKr1h2u4CfnrQx7SUL8NWmChg,216
|
|
189
189
|
synapse_sdk/utils/converters/__init__.py,sha256=xQi_n7xS9BNyDiolsxH2jw1CtD6avxMPj2cHnwvidi8,11311
|
|
190
190
|
synapse_sdk/utils/converters/coco/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -211,9 +211,9 @@ synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_n
|
|
|
211
211
|
synapse_sdk/utils/storage/providers/http.py,sha256=2DhIulND47JOnS5ZY7MZUex7Su3peAPksGo1Wwg07L4,5828
|
|
212
212
|
synapse_sdk/utils/storage/providers/s3.py,sha256=ZmqekAvIgcQBdRU-QVJYv1Rlp6VHfXwtbtjTSphua94,2573
|
|
213
213
|
synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
|
|
214
|
-
synapse_sdk-1.0.
|
|
215
|
-
synapse_sdk-1.0.
|
|
216
|
-
synapse_sdk-1.0.
|
|
217
|
-
synapse_sdk-1.0.
|
|
218
|
-
synapse_sdk-1.0.
|
|
219
|
-
synapse_sdk-1.0.
|
|
214
|
+
synapse_sdk-1.0.0b10.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
|
|
215
|
+
synapse_sdk-1.0.0b10.dist-info/METADATA,sha256=SbK-qGKAO8WGswV_JGf0qJVFBPfC_f6dx0I6o46MDlQ,3745
|
|
216
|
+
synapse_sdk-1.0.0b10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
217
|
+
synapse_sdk-1.0.0b10.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
|
|
218
|
+
synapse_sdk-1.0.0b10.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
|
|
219
|
+
synapse_sdk-1.0.0b10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|