synapse-sdk 1.0.0a56__py3-none-any.whl → 1.0.0a58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

@@ -1,6 +1,7 @@
1
1
  import json
2
2
  from datetime import datetime
3
3
  from enum import Enum
4
+ from pathlib import Path
4
5
  from typing import Annotated, Dict, List
5
6
 
6
7
  from pydantic import AfterValidator, BaseModel, field_validator
@@ -8,6 +9,7 @@ from pydantic_core import PydanticCustomError
8
9
 
9
10
  from synapse_sdk.clients.exceptions import ClientError
10
11
  from synapse_sdk.clients.utils import get_batched_list
12
+ from synapse_sdk.clients.validators.collections import FileSpecificationValidator
11
13
  from synapse_sdk.i18n import gettext as _
12
14
  from synapse_sdk.plugins.categories.base import Action
13
15
  from synapse_sdk.plugins.categories.decorators import register_action
@@ -45,6 +47,13 @@ class UploadRun(Run):
45
47
  status: UploadStatus
46
48
  created: str
47
49
 
50
+ class MetricsRecord(BaseModel):
51
+ """Metrics record model."""
52
+
53
+ stand_by: int
54
+ failed: int
55
+ success: int
56
+
48
57
  def log_data_file(self, data_file_info: dict, status: UploadStatus):
49
58
  """Upload data_file log.
50
59
 
@@ -82,6 +91,15 @@ class UploadRun(Run):
82
91
  now = datetime.now().isoformat()
83
92
  self.log('upload_task', self.TaskLog(task_id=task_id, status=status.value, created=now).model_dump())
84
93
 
94
+ def log_metrics(self, record: MetricsRecord, category: str):
95
+ """Log upload metrics.
96
+ Args:
97
+ record (MetricsRecord): The metrics record to log.
98
+ category (str): The category of the metrics.
99
+ """
100
+ record = self.MetricsRecord.model_validate(record)
101
+ self.set_metrics(value=record.model_dump(), category=category)
102
+
85
103
 
86
104
  class UploadParams(BaseModel):
87
105
  """Upload action parameters.
@@ -94,8 +112,6 @@ class UploadParams(BaseModel):
94
112
  storage (int): The storage of the action.
95
113
  collection (int): The collection of the action.
96
114
  project (int | None): The project of the action.
97
- is_generate_tasks (bool): The flag to generate tasks.
98
- is_generate_ground_truths (bool): The flag to generate ground truths
99
115
  """
100
116
 
101
117
  name: Annotated[str, AfterValidator(non_blank)]
@@ -104,6 +120,8 @@ class UploadParams(BaseModel):
104
120
  storage: int
105
121
  collection: int
106
122
  project: int | None
123
+ is_generate_tasks: bool = False
124
+ is_generate_ground_truths: bool = False
107
125
 
108
126
  @field_validator('storage', mode='before')
109
127
  @classmethod
@@ -162,6 +180,12 @@ class UploadAction(Action):
162
180
  analyze_collection: The progress category for the analyze collection process.
163
181
  data_file_upload: The progress category for the upload process.
164
182
  generate_data_units: The progress category for the generate data units process.
183
+ generate_tasks: The progress category for the generate tasks process.
184
+ generate_ground_truths: The progress category for the generate ground truths process.
185
+
186
+ Metrics Categories:
187
+ data_file: The metrics category for the data file.
188
+ data_unit: The metrics category for the data unit.
165
189
  """
166
190
 
167
191
  name = 'upload'
@@ -170,15 +194,16 @@ class UploadAction(Action):
170
194
  run_class = UploadRun
171
195
  progress_categories = {
172
196
  'analyze_collection': {
173
- 'proportion': 10,
197
+ 'proportion': 0,
174
198
  },
175
199
  'upload_data_files': {
176
- 'proportion': 50,
200
+ 'proportion': 0,
177
201
  },
178
202
  'generate_data_units': {
179
- 'proportion': 40,
203
+ 'proportion': 0,
180
204
  },
181
205
  }
206
+ metrics_categories = {'data_file', 'data_unit'}
182
207
 
183
208
  def get_uploader(self, path, file_specification, organized_files):
184
209
  """Get uploader from entrypoint."""
@@ -232,6 +257,12 @@ class UploadAction(Action):
232
257
  generated_data_units = self._generate_data_units(uploaded_files, upload_result_count)
233
258
  result['generated_data_units_count'] = len(generated_data_units)
234
259
 
260
+ # Setup task with uploaded synapse-backend data units.
261
+ if not len(generated_data_units):
262
+ self.run.log_message('No data units were generated.', context=Context.WARNING.value)
263
+ self.run.end_log()
264
+ return result
265
+
235
266
  self.run.end_log()
236
267
  return result
237
268
 
@@ -246,7 +277,7 @@ class UploadAction(Action):
246
277
  self.run.set_progress(0, 1, category='analyze_collection')
247
278
 
248
279
  client = self.run.client
249
- collection_id = self.params['collection']
280
+ collection_id = self.params['data_collection']
250
281
  collection = client.get_data_collection(collection_id)
251
282
 
252
283
  # Finish progress
@@ -263,15 +294,19 @@ class UploadAction(Action):
263
294
  # Initialize progress
264
295
  self.run.set_progress(0, organized_files_count, category='upload_data_files')
265
296
  self.run.log_message('Uploading data files...')
297
+ data_file_metrics_record = self.run.MetricsRecord(stand_by=organized_files_count, success=0, failed=0)
266
298
 
267
299
  client = self.run.client
268
- collection_id = self.params['collection']
300
+ collection_id = self.params['data_collection']
269
301
  upload_result = []
270
302
  organized_files_count = len(organized_files)
271
303
  current_progress = 0
272
304
  for organized_file in organized_files:
273
305
  uploaded_data_file = client.upload_data_file(organized_file, collection_id)
274
306
  self.run.log_data_file(organized_file, UploadStatus.SUCCESS)
307
+ data_file_metrics_record.stand_by -= 1
308
+ data_file_metrics_record.success += 1
309
+ self.run.log_metrics(record=data_file_metrics_record, category='data_file')
275
310
  upload_result.append(uploaded_data_file)
276
311
  self.run.set_progress(current_progress, organized_files_count, category='upload_data_files')
277
312
  current_progress += 1
@@ -292,6 +327,7 @@ class UploadAction(Action):
292
327
  """
293
328
  # Initialize progress
294
329
  self.run.set_progress(0, upload_result_count, category='generate_data_units')
330
+ data_unit_metrics_record = self.run.MetricsRecord(stand_by=upload_result_count, success=0, failed=0)
295
331
 
296
332
  client = self.run.client
297
333
 
@@ -301,6 +337,9 @@ class UploadAction(Action):
301
337
  batches_count = len(batches)
302
338
  for batch in batches:
303
339
  created_data_units = client.create_data_units(batch)
340
+ data_unit_metrics_record.stand_by -= len(created_data_units)
341
+ data_unit_metrics_record.success += len(created_data_units)
342
+ self.run.log_metrics(record=data_unit_metrics_record, category='data_unit')
304
343
  generated_data_units.append(created_data_units)
305
344
  self.run.set_progress(current_progress, batches_count, category='generate_data_units')
306
345
  current_progress += 1
@@ -311,3 +350,120 @@ class UploadAction(Action):
311
350
  self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
312
351
 
313
352
  return sum(generated_data_units, [])
353
+
354
+ def _validate_organized_files(self, organized_files: List, file_specification_template: Dict) -> bool:
355
+ """Validate organized files from Uploader."""
356
+ validator = FileSpecificationValidator(file_specification_template, organized_files)
357
+ return validator.validate()
358
+
359
+ def _organize_files(self, directory: Path, file_specification: List) -> List:
360
+ """Organize files according to the file specification.
361
+ This method handles type-based directory structure where files are organized in
362
+ directories named after file types (e.g., 'image_1/' directory contains image files
363
+ like '1.jpg', '2.jpg'). For each dataset ID found in the primary directory, it attempts
364
+ to find corresponding files in all type directories.
365
+
366
+ TODO : Add Logic to handle file specific name patterns and extensions.
367
+ (e.g. pcd:S_DCH_230725_0156_LR_037.pcd, image_1:S_DCH_230725_0156_FC_037, image_2:S_DCH_230725_0156_LF_037.jpg)
368
+ Args:
369
+ directory (Path): Root directory containing files to organize.
370
+ file_specification (List): File specification list.
371
+ Returns:
372
+ List: List of dictionaries containing organized files.
373
+ """
374
+ organized_files = []
375
+ self.run.log_message(f'Looking for files in {directory}...')
376
+
377
+ # Check for type-based directory structure (e.g., image_1/, pcd_1/)
378
+ type_dirs = {}
379
+ type_extensions = {} # Store common extensions for each type directory
380
+
381
+ for spec in file_specification:
382
+ spec_name = spec['name']
383
+
384
+ spec_dir = directory / spec_name
385
+ if spec_dir.exists() and spec_dir.is_dir():
386
+ type_dirs[spec_name] = spec_dir
387
+
388
+ # Analyze file extensions in this directory
389
+ extensions = {}
390
+ for file_path in spec_dir.glob('*'):
391
+ if file_path.is_file():
392
+ ext = file_path.suffix.lower()
393
+ extensions[ext] = extensions.get(ext, 0) + 1
394
+
395
+ # Find the most common extension
396
+ if extensions:
397
+ common_ext = max(extensions.items(), key=lambda x: x[1])[0]
398
+ type_extensions[spec_name] = common_ext
399
+ self.run.log_message(f'Found type directory: {spec_name} (common extension: {common_ext})')
400
+
401
+ # If type-based directories don't exist, exit early
402
+ if not type_dirs:
403
+ self.run.log_message('No type-based directory structure found.', context=Context.INFO.value)
404
+ return organized_files
405
+
406
+ self.run.log_message('Detected type-based directory structure')
407
+
408
+ # Build a comprehensive map of all dataset IDs across all type directories
409
+ dataset_files = {} # Dictionary: file_name -> {spec_name -> file_path}
410
+
411
+ # First pass: collect all dataset IDs from all type directories
412
+ for spec_name, dir_path in type_dirs.items():
413
+ for file_path in dir_path.glob('*'):
414
+ if file_path.is_file():
415
+ file_name = file_path.stem
416
+
417
+ # Initialize dataset entry if it doesn't exist
418
+ if file_name not in dataset_files:
419
+ dataset_files[file_name] = {}
420
+
421
+ # Map this file to its specification
422
+ if spec_name not in dataset_files[file_name]:
423
+ dataset_files[file_name][spec_name] = file_path
424
+ else:
425
+ # If multiple files with same file_name for same spec, use most recent
426
+ existing_file = dataset_files[file_name][spec_name]
427
+ if file_path.stat().st_mtime > existing_file.stat().st_mtime:
428
+ dataset_files[file_name][spec_name] = file_path
429
+ self.run.log_message(
430
+ f"Found newer file for name of {file_name}, spec '{spec_name}': "
431
+ f'{file_path.name} (replacing {existing_file.name})'
432
+ )
433
+
434
+ if not dataset_files:
435
+ self.run.log_message('No dataset files found.', context=Context.WARNING.value)
436
+ return organized_files
437
+
438
+ self.run.log_message(f'Found {len(dataset_files)} potential datasets by ID')
439
+
440
+ # Second pass: organize valid datasets
441
+ for file_name, files_dict in sorted(dataset_files.items()):
442
+ self.run.log_message(f'Processing file name: {file_name}')
443
+
444
+ # Add file spec details for logging
445
+ for spec_name, file_path in files_dict.items():
446
+ self.run.log_message(f"Mapped '{spec_name}' to: {file_path.name}")
447
+
448
+ # Check if all required files are present
449
+ required_specs = [spec['name'] for spec in file_specification if spec.get('is_required', False)]
450
+ if all(req in files_dict for req in required_specs):
451
+ # Create metadata for this dataset
452
+ meta_data = {
453
+ 'origin_file_stem': file_name,
454
+ 'created_at': datetime.now().isoformat(),
455
+ }
456
+
457
+ # Add the organized dataset
458
+ organized_files.append({'files': files_dict, 'meta': meta_data})
459
+ self.run.log_message(f'Successfully organized dataset for ID {file_name}')
460
+ else:
461
+ # Missing required files warning
462
+ missing = [req for req in required_specs if req not in files_dict]
463
+ self.run.log_message(
464
+ f'Dataset ID {file_name} is missing required files: {", ".join(missing)}',
465
+ context=Context.WARNING.value,
466
+ )
467
+
468
+ self.run.log_message(f'Total datasets organized: {len(organized_files)}')
469
+ return organized_files
@@ -1,10 +1,13 @@
1
1
  actions:
2
2
  upload:
3
3
  entrypoint: plugin.upload.Uploader
4
+ options:
5
+ allow_generate_tasks: false # Allow the plugin to generate tasks for the uploaded data
6
+ allow_generate_ground_truths: false # Allow the plugin to generate ground truths for the uploaded data
4
7
  supported_data_type: image # A primary data type of synapse backend collection. (e.g. 'image', 'text', 'video', 'pcd', 'audio')
5
8
  ui_schema: |
6
9
  Dumped FormKit Schema for upload plugin custom options
7
10
  task_pre_annotation:
8
11
  entrypoint: plugin.upload.TaskPreAnnotation
9
12
  ui_schema: |
10
- Dumped FormKit Schema for upload plugin custom options
13
+ Dumped FormKit Schema for upload plugin custom options
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synapse-sdk
3
- Version: 1.0.0a56
3
+ Version: 1.0.0a58
4
4
  Summary: synapse sdk
5
5
  Author-email: datamaker <developer@datamaker.io>
6
6
  License: MIT
@@ -101,8 +101,8 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=
101
101
  synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
102
  synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
103
  synapse_sdk/plugins/categories/upload/actions/task_pre_annotation.py,sha256=YkQZ7QECu6-PnSEv2lAbbL3smxeIHxUiu9ruBdA0_0k,3066
104
- synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=WSzTulI3GAejXKy6DsmxP2zE7fRAX_bYTUaL0Za7Ci8,11287
105
- synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=1O0kMfkFMGYwnpBcttrlC9bu4xzU9docw2MBOq_Elmo,417
104
+ synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=zZx14F3789IJUH7BmZdXI43HXy0RdLknqT3aZk0WIHQ,18965
105
+ synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=1ru7RFSXIUXeaIE1v7GD5K5YkGebwjzDQfzpTDxgsEg,631
106
106
  synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  synapse_sdk/plugins/categories/upload/templates/plugin/task_pre_annotation.py,sha256=9XkUZu7USjVjDPufM0NlYmkdKfV7Hf_9v5GN1RgZzS0,350
108
108
  synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
@@ -136,9 +136,9 @@ synapse_sdk/utils/storage/providers/__init__.py,sha256=x7RGwZryT2FpVxS7fGWryRVpq
136
136
  synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_ncGITZrL0u5wEA,363
137
137
  synapse_sdk/utils/storage/providers/s3.py,sha256=W94rQvhGRXti3R4mYP7gmU5pcyCQpGFIBLvxxqLVdRM,2231
138
138
  synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
139
- synapse_sdk-1.0.0a56.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
140
- synapse_sdk-1.0.0a56.dist-info/METADATA,sha256=Ms_yriRQzC_lTwXwNUzA_rhSafE6YG69OurqybsaQPs,1303
141
- synapse_sdk-1.0.0a56.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
142
- synapse_sdk-1.0.0a56.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
143
- synapse_sdk-1.0.0a56.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
144
- synapse_sdk-1.0.0a56.dist-info/RECORD,,
139
+ synapse_sdk-1.0.0a58.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
140
+ synapse_sdk-1.0.0a58.dist-info/METADATA,sha256=jGVaRNWWU0MKtiyUl3XuU8uOufFcE4B3M11jrLtQYzI,1303
141
+ synapse_sdk-1.0.0a58.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
142
+ synapse_sdk-1.0.0a58.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
143
+ synapse_sdk-1.0.0a58.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
144
+ synapse_sdk-1.0.0a58.dist-info/RECORD,,