synapse-sdk 1.0.0a53__py3-none-any.whl → 1.0.0a55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  from synapse_sdk.clients.backend.annotation import AnnotationClientMixin
2
2
  from synapse_sdk.clients.backend.core import CoreClientMixin
3
- from synapse_sdk.clients.backend.data_collection import DataCollectionClientMixin
3
+ from synapse_sdk.clients.backend.dataset import DatasetClientMixin
4
4
  from synapse_sdk.clients.backend.hitl import HITLClientMixin
5
5
  from synapse_sdk.clients.backend.integration import IntegrationClientMixin
6
6
  from synapse_sdk.clients.backend.ml import MLClientMixin
@@ -9,7 +9,7 @@ from synapse_sdk.clients.backend.ml import MLClientMixin
9
9
  class BackendClient(
10
10
  AnnotationClientMixin,
11
11
  CoreClientMixin,
12
- DataCollectionClientMixin,
12
+ DatasetClientMixin,
13
13
  IntegrationClientMixin,
14
14
  MLClientMixin,
15
15
  HITLClientMixin,
@@ -8,13 +8,13 @@ from synapse_sdk.clients.base import BaseClient
8
8
  from synapse_sdk.clients.utils import get_batched_list
9
9
 
10
10
 
11
- class DataCollectionClientMixin(BaseClient):
12
- def list_data_collection(self):
11
+ class DatasetClientMixin(BaseClient):
12
+ def list_dataset(self):
13
13
  path = 'data_collections/'
14
14
  return self._list(path)
15
15
 
16
- def get_data_collection(self, data_collection_id):
17
- """Get data_collection from synapse-backend.
16
+ def get_dataset(self, data_collection_id):
17
+ """Get dataset from synapse-backend.
18
18
 
19
19
  Args:
20
20
  data_collection_id: The data_collection id to get.
@@ -40,19 +40,19 @@ class DataCollectionClientMixin(BaseClient):
40
40
  path = 'data_units/'
41
41
  return self._post(path, data=data)
42
42
 
43
- def upload_data_collection(
43
+ def upload_dataset(
44
44
  self,
45
- data_collection_id: int,
46
- data_collection: Dict,
45
+ dataset_id: int,
46
+ dataset: Dict,
47
47
  project_id: Optional[int] = None,
48
48
  batch_size: int = 1000,
49
49
  process_pool: int = 10,
50
50
  ):
51
- """Upload data_collection to synapse-backend.
51
+ """Upload dataset to synapse-backend.
52
52
 
53
53
  Args:
54
- data_collection_id: The data_collection id to upload the data to.
55
- data_collection: The data_collection to upload.
54
+ dataset_id: The dataset id to upload the data to.
55
+ dataset: The dataset to upload.
56
56
  * structure:
57
57
  - files: The files to upload. (key: file name, value: file pathlib object)
58
58
  - meta: The meta data to upload.
@@ -60,14 +60,14 @@ class DataCollectionClientMixin(BaseClient):
60
60
  batch_size: The batch size to upload the data.
61
61
  process_pool: The process pool to upload the data.
62
62
  """
63
- # TODO validate data_collection with schema
63
+ # TODO validate dataset with schema
64
64
 
65
- params = [(data, data_collection_id) for data in data_collection]
65
+ params = [(data, dataset_id) for data in dataset]
66
66
 
67
67
  with Pool(processes=process_pool) as pool:
68
- data_collection = pool.starmap(self.upload_data_file, tqdm(params))
68
+ dataset = pool.starmap(self.upload_data_file, tqdm(params))
69
69
 
70
- batches = get_batched_list(data_collection, batch_size)
70
+ batches = get_batched_list(dataset, batch_size)
71
71
 
72
72
  for batch in tqdm(batches):
73
73
  data_units = self.create_data_units(batch)
@@ -90,7 +90,7 @@ class DataCollectionClientMixin(BaseClient):
90
90
  * structure:
91
91
  - files: The files to upload. (key: file name, value: file pathlib object)
92
92
  - meta: The meta data to upload.
93
- data_collection_id: The data_collection id to upload the data to.
93
+ data_collection_id: The dataset id to upload the data to.
94
94
 
95
95
  Returns:
96
96
  Dict: The result of the upload.
@@ -8,6 +8,7 @@ class ServeClientMixin(BaseClient):
8
8
  response = self._get(path, params=params)
9
9
  for key, item in response['applications'].items():
10
10
  response['applications'][key]['deployments'] = list(item['deployments'].values())
11
+ response['applications'][key]['route_prefix'] = item['route_prefix']
11
12
  return list(response['applications'].values())
12
13
 
13
14
  def get_serve_application(self, pk, params=None):
@@ -15,6 +16,7 @@ class ServeClientMixin(BaseClient):
15
16
  response = self._get(path, params=params)
16
17
  try:
17
18
  response['applications'][pk]['deployments'] = list(response['applications'][pk]['deployments'].values())
19
+ response['applications'][pk]['route_prefix'] = response['applications'][pk]['route_prefix']
18
20
  return response['applications'][pk]
19
21
  except KeyError:
20
22
  raise ClientError(404, 'Serve Application Not Found')
@@ -27,13 +27,6 @@ class ExportRun(Run):
27
27
  error: str | None = None
28
28
  created: str
29
29
 
30
- class MetricsRecord(BaseModel):
31
- """Metrics record model."""
32
-
33
- stand_by: int
34
- failed: int
35
- success: int
36
-
37
30
  def log_file(
38
31
  self, log_type: str, target_id: int, data_file_info: dict, status: ExportStatus, error: str | None = None
39
32
  ):
@@ -58,16 +51,6 @@ class ExportRun(Run):
58
51
  ).model_dump(),
59
52
  )
60
53
 
61
- def log_metrics(self, record: MetricsRecord, category: str):
62
- """Log export metrics.
63
-
64
- Args:
65
- record (MetricsRecord): The metrics record to log.
66
- category (str): The category of the metrics.
67
- """
68
- record = self.MetricsRecord.model_validate(record)
69
- self.set_metrics(value=record.dict(), category=category)
70
-
71
54
  def export_log_json_file(
72
55
  self,
73
56
  target_id: int,
@@ -280,7 +263,6 @@ class ExportAction(Action):
280
263
  'proportion': 100,
281
264
  }
282
265
  }
283
- metrics_categories = {'data_file', 'original_file'}
284
266
 
285
267
  def get_filtered_results(self, filters, handler):
286
268
  """Get filtered target results."""
@@ -42,8 +42,6 @@ def export(run, export_items, path_root, **params):
42
42
  origin_files_output_path.mkdir(parents=True, exist_ok=True)
43
43
 
44
44
  total = params['count']
45
- original_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
46
- data_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
47
45
  # progress init
48
46
  run.set_progress(0, total, category='dataset_conversion')
49
47
  for no, export_item in enumerate(export_items, start=1):
@@ -58,30 +56,12 @@ def export(run, export_items, path_root, **params):
58
56
  if save_original_file_flag:
59
57
  if no == 1:
60
58
  run.log_message('Saving original file.')
61
- original_status = save_original_file(run, final_data, origin_files_output_path, errors_original_file_list)
62
-
63
- original_file_metrics_record.stand_by -= 1
64
- if original_status == ExportStatus.FAILED:
65
- original_file_metrics_record.failed += 1
66
- continue
67
- else:
68
- original_file_metrics_record.success += 1
69
-
70
- run.log_metrics(record=original_file_metrics_record, category='original_file')
59
+ save_original_file(run, final_data, origin_files_output_path, errors_original_file_list)
71
60
 
72
61
  # Extract data as JSON files
73
62
  if no == 1:
74
63
  run.log_message('Saving json file.')
75
- data_status = save_as_json(run, final_data, json_output_path, errors_json_file_list)
76
-
77
- data_file_metrics_record.stand_by -= 1
78
- if data_status == ExportStatus.FAILED:
79
- data_file_metrics_record.failed += 1
80
- continue
81
- else:
82
- data_file_metrics_record.success += 1
83
-
84
- run.log_metrics(record=data_file_metrics_record, category='data_file')
64
+ save_as_json(run, final_data, json_output_path, errors_json_file_list)
85
65
 
86
66
  run.end_log()
87
67
 
@@ -146,7 +126,6 @@ def save_original_file(run, result, base_path, error_file_list):
146
126
  status = ExportStatus.FAILED
147
127
 
148
128
  run.export_log_original_file(result['id'], file_info, status, error_msg)
149
- return status
150
129
 
151
130
 
152
131
  def save_as_json(run, result, base_path, error_file_list):
@@ -173,4 +152,3 @@ def save_as_json(run, result, base_path, error_file_list):
173
152
  status = ExportStatus.FAILED
174
153
 
175
154
  run.export_log_json_file(result['id'], file_info, status, error_msg)
176
- return status
@@ -1,6 +1,8 @@
1
1
  import json
2
+ import re
2
3
  from datetime import datetime
3
4
  from enum import Enum
5
+ from pathlib import Path
4
6
  from typing import Annotated, Dict, List
5
7
 
6
8
  from pydantic import AfterValidator, BaseModel, field_validator
@@ -105,6 +107,8 @@ class UploadParams(BaseModel):
105
107
  storage: int
106
108
  collection: int
107
109
  project: int | None
110
+ is_generate_tasks: bool = False
111
+ is_generate_ground_truths: bool = False
108
112
 
109
113
  @field_validator('storage', mode='before')
110
114
  @classmethod
@@ -129,7 +133,7 @@ class UploadParams(BaseModel):
129
133
  action = info.context['action']
130
134
  client = action.client
131
135
  try:
132
- client.get_data_collection(value)
136
+ client.get_dataset(value)
133
137
  except ClientError:
134
138
  raise PydanticCustomError('client_error', _('Error occurred while checking collection exists.'))
135
139
  return value
@@ -163,6 +167,8 @@ class UploadAction(Action):
163
167
  analyze_collection: The progress category for the analyze collection process.
164
168
  data_file_upload: The progress category for the upload process.
165
169
  generate_data_units: The progress category for the generate data units process.
170
+ generate_tasks: The progress category for the generate tasks process.
171
+ generate_ground_truths: The progress category for the generate ground truths process.
166
172
  """
167
173
 
168
174
  name = 'upload'
@@ -171,19 +177,51 @@ class UploadAction(Action):
171
177
  run_class = UploadRun
172
178
  progress_categories = {
173
179
  'analyze_collection': {
174
- 'proportion': 10,
180
+ 'proportion': 0,
175
181
  },
176
182
  'upload_data_files': {
177
- 'proportion': 50,
183
+ 'proportion': 0,
178
184
  },
179
185
  'generate_data_units': {
180
- 'proportion': 40,
186
+ 'proportion': 0,
187
+ },
188
+ 'generate_tasks': {
189
+ 'proportion': 0,
190
+ },
191
+ 'generate_ground_truths': {
192
+ 'proportion': 0,
181
193
  },
182
194
  }
183
195
 
184
- def get_uploader(self, path):
196
+ def __init__(self, *args, **kwargs):
197
+ """Initialize UploadAction."""
198
+ super().__init__(*args, **kwargs)
199
+
200
+ # Setup progress categories ratio by options.
201
+ progress_ratios = {
202
+ 'upload_only': (5, 60, 35, 0, 0),
203
+ 'generate_tasks': (5, 45, 25, 25, 0),
204
+ 'generate_ground_truths': (5, 35, 30, 15, 15),
205
+ }
206
+ options = self.config.get('options', {})
207
+ progress_categories = self.progress_categories
208
+ if options['allow_generate_tasks'] and not kwargs['params']['allow_generate_ground_truths']:
209
+ ratio_name = 'generate_tasks'
210
+ elif options['allow_generate_ground_truths'] and kwargs['params']['allow_generate_tasks']:
211
+ ratio_name = 'generate_ground_truths'
212
+ else:
213
+ ratio_name = 'upload_only'
214
+
215
+ assert len(progress_categories) == len(progress_ratios[ratio_name]), (
216
+ 'Progress categories and ratios length mismatch.'
217
+ )
218
+ for i, category in enumerate(progress_categories):
219
+ progress_categories[category]['proportion'] = progress_ratios[ratio_name][i]
220
+ self.progress_categories = progress_categories
221
+
222
+ def get_uploader(self, path, file_specification, organized_files):
185
223
  """Get uploader from entrypoint."""
186
- return self.entrypoint(self.run, path)
224
+ return self.entrypoint(self.run, path, file_specification, organized_files)
187
225
 
188
226
  def start(self) -> Dict:
189
227
  """Start upload process.
@@ -195,19 +233,23 @@ class UploadAction(Action):
195
233
  storage = self.client.get_storage(self.params['storage'])
196
234
  pathlib_cwd = get_pathlib(storage, self.params['path'])
197
235
 
198
- # Initialize uploader.
199
- uploader = self.get_uploader(pathlib_cwd)
200
-
201
236
  # Analyze Collection file specifications to determine the data structure for upload.
202
237
  file_specification_template = self._analyze_collection()
238
+ organized_files = self._organize_files(pathlib_cwd, file_specification_template)
239
+
240
+ # Initialize uploader.
241
+ uploader = self.get_uploader(pathlib_cwd, file_specification_template, organized_files)
203
242
 
204
243
  # Setup result dict.
205
244
  result = {}
206
245
 
207
- # Organize data according to Collection file specification structure.
246
+ # Get organized files from the uploader (plugin developer's custom implementation)
247
+ # or use the default organization method if uploader doesn't provide valid files
208
248
  organized_files = uploader.handle_upload_files()
209
- if not self._validate_organized_files(file_specification_template, organized_files):
210
- self.run.log_message('Validate organized files failed.')
249
+
250
+ # Validate the organized files
251
+ if not self._validate_organized_files(organized_files, file_specification_template):
252
+ self.run.log_message('Validation failed.', context=Context.ERROR.value)
211
253
  self.run.end_log()
212
254
  return result
213
255
 
@@ -229,6 +271,26 @@ class UploadAction(Action):
229
271
  generated_data_units = self._generate_data_units(uploaded_files, upload_result_count)
230
272
  result['generated_data_units_count'] = len(generated_data_units)
231
273
 
274
+ # Setup task with uploaded synapse-backend data units.
275
+ if not len(generated_data_units):
276
+ self.run.log_message('No data units were generated.', context=Context.WARNING.value)
277
+ self.run.end_log()
278
+ return result
279
+
280
+ if self.config['options']['allow_generate_tasks'] and self.params['is_generate_tasks']:
281
+ generated_tasks = self._generate_tasks(generated_data_units)
282
+ result['generated_tasks_count'] = len(generated_tasks)
283
+ else:
284
+ self.run.log_message('Generating tasks process has passed.')
285
+
286
+ # Generate ground truths for the uploaded data.
287
+ # TODO: Need to add ground truths generation logic later.
288
+ if self.config['options']['allow_generate_ground_truths'] and self.params['is_generate_ground_truths']:
289
+ generated_ground_truths = self._generate_ground_truths()
290
+ result['generated_ground_truths_count'] = len(generated_ground_truths)
291
+ else:
292
+ self.run.log_message('Generating ground truths process has passed.')
293
+
232
294
  self.run.end_log()
233
295
  return result
234
296
 
@@ -244,18 +306,13 @@ class UploadAction(Action):
244
306
 
245
307
  client = self.run.client
246
308
  collection_id = self.params['collection']
247
- collection = client.get_data_collection(collection_id)
309
+ collection = client.get_dataset(collection_id)
248
310
 
249
311
  # Finish progress
250
312
  self.run.set_progress(1, 1, category='analyze_collection')
251
313
 
252
314
  return collection['file_specifications']
253
315
 
254
- def _validate_organized_files(self, file_specification_template: Dict, organized_files: List) -> bool:
255
- """Validate organized files from Uploader."""
256
- validator = FileSpecificationValidator(file_specification_template, organized_files)
257
- return validator.validate()
258
-
259
316
  def _upload_files(self, organized_files, organized_files_count: int) -> List:
260
317
  """Upload files to synapse-backend.
261
318
 
@@ -313,3 +370,224 @@ class UploadAction(Action):
313
370
  self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
314
371
 
315
372
  return sum(generated_data_units, [])
373
+
374
+ def _generate_tasks(self, generated_data_units: List) -> List:
375
+ """Setup task with uploaded synapse-backend data units.
376
+
377
+ TODO: make batch size configurable.
378
+ """
379
+ # Initialize progress
380
+ self.run.set_progress(0, 1, category='generate_tasks')
381
+ self.run.log_message('Generating tasks with data files...')
382
+
383
+ # Prepare batches for processing
384
+ client = self.run.client
385
+ project_id = self.params['project']
386
+ current_progress = 0
387
+
388
+ # Generate tasks
389
+ generated_tasks = []
390
+ generated_data_units_count = len(generated_data_units)
391
+ for data_unit in generated_data_units:
392
+ tasks_data = []
393
+ task_data = {'project': project_id, 'data_unit': data_unit['id']}
394
+ tasks_data.append(task_data)
395
+ if tasks_data:
396
+ created_tasks = client.create_tasks(tasks_data)
397
+ created_task_ids = [created_task['id'] for created_task in created_tasks]
398
+ generated_tasks.append(created_task_ids)
399
+ for created_task_id in created_task_ids:
400
+ self.run.log_task(created_task_id, UploadStatus.SUCCESS)
401
+
402
+ self.run.set_progress(current_progress, generated_data_units_count, category='generate_tasks')
403
+ current_progress += 1
404
+
405
+ # Finish progress
406
+ self.run.log_message('Generating tasks completed')
407
+ self.run.set_progress(1, 1, category='generate_tasks')
408
+
409
+ return sum(generated_tasks, [])
410
+
411
+ def _generate_ground_truths(self):
412
+ """Generate ground truths for the uploaded data.
413
+
414
+ TODO: Need to add ground truths generation logic later.
415
+ """
416
+ # Initialize progress
417
+ self.run.set_progress(0, 1, category='generate_ground_truths')
418
+ self.run.log_message('Generating ground truths...')
419
+
420
+ # Finish progress
421
+ self.run.log_message('Generating ground truths completed')
422
+ self.run.set_progress(1, 1, category='generate_ground_truths')
423
+
424
+ def _validate_organized_files(self, organized_files: List, file_specification_template: Dict) -> bool:
425
+ """Validate organized files from Uploader."""
426
+ validator = FileSpecificationValidator(file_specification_template, organized_files)
427
+ return validator.validate()
428
+
429
+ def _organize_files(self, directory: Path, file_specification: List) -> List:
430
+ """Organize files according to the file specification.
431
+ This method handles type-based directory structure where files are organized in
432
+ directories named after file types (e.g., 'image_1/' directory contains image files
433
+ like '1.jpg', '2.jpg'). For each dataset ID found in the primary directory, it attempts
434
+ to find corresponding files in all type directories.
435
+
436
+ TODO : Add Logic to handle file specific name patterns and extensions.
437
+ (e.g. pcd:S_DCH_230725_0156_LR_037.pcd, image_1:S_DCH_230725_0156_FC_037, image_2:S_DCH_230725_0156_LF_037.jpg)
438
+ Args:
439
+ directory (Path): Root directory containing files to organize.
440
+ file_specification (List): File specification list.
441
+ Returns:
442
+ List: List of dictionaries containing organized files.
443
+ """
444
+ organized_files = []
445
+ self.run.log_message(f'Looking for files in {directory}...')
446
+
447
+ # Check for type-based directory structure (e.g., image_1/, pcd_1/)
448
+ type_dirs = {}
449
+ type_extensions = {} # Store common extensions for each type directory
450
+
451
+ for spec in file_specification:
452
+ spec_name = spec['name']
453
+
454
+ spec_dir = directory / spec_name
455
+ if spec_dir.exists() and spec_dir.is_dir():
456
+ type_dirs[spec_name] = spec_dir
457
+
458
+ # Analyze file extensions in this directory
459
+ extensions = {}
460
+ for file_path in spec_dir.glob('*'):
461
+ if file_path.is_file():
462
+ ext = file_path.suffix.lower()
463
+ extensions[ext] = extensions.get(ext, 0) + 1
464
+
465
+ # Find the most common extension
466
+ if extensions:
467
+ common_ext = max(extensions.items(), key=lambda x: x[1])[0]
468
+ type_extensions[spec_name] = common_ext
469
+ self.run.log_message(f'Found type directory: {spec_name} (common extension: {common_ext})')
470
+
471
+ # If type-based directories don't exist, exit early
472
+ if not type_dirs:
473
+ self.run.log_message('No type-based directory structure found.', context=Context.INFO.value)
474
+ return organized_files
475
+
476
+ self.run.log_message('Detected type-based directory structure')
477
+
478
+ # Build a comprehensive map of all dataset IDs across all type directories
479
+ dataset_files = {} # Dictionary: file_name -> {spec_name -> file_path}
480
+
481
+ # First pass: collect all dataset IDs from all type directories
482
+ for spec_name, dir_path in type_dirs.items():
483
+ for file_path in dir_path.glob('*'):
484
+ if file_path.is_file():
485
+ file_name = file_path.stem
486
+
487
+ # Initialize dataset entry if it doesn't exist
488
+ if file_name not in dataset_files:
489
+ dataset_files[file_name] = {}
490
+
491
+ # Map this file to its specification
492
+ if spec_name not in dataset_files[file_name]:
493
+ dataset_files[file_name][spec_name] = file_path
494
+ else:
495
+ # If multiple files with same file_name for same spec, use most recent
496
+ existing_file = dataset_files[file_name][spec_name]
497
+ if file_path.stat().st_mtime > existing_file.stat().st_mtime:
498
+ dataset_files[file_name][spec_name] = file_path
499
+ self.run.log_message(
500
+ f"Found newer file for name of {file_name}, spec '{spec_name}': "
501
+ f'{file_path.name} (replacing {existing_file.name})'
502
+ )
503
+
504
+ if not dataset_files:
505
+ self.run.log_message('No dataset files found.', context=Context.WARNING.value)
506
+ return organized_files
507
+
508
+ self.run.log_message(f'Found {len(dataset_files)} potential datasets by ID')
509
+
510
+ # Second pass: organize valid datasets
511
+ for file_name, files_dict in sorted(dataset_files.items()):
512
+ self.run.log_message(f'Processing file name: {file_name}')
513
+
514
+ # Add file spec details for logging
515
+ for spec_name, file_path in files_dict.items():
516
+ self.run.log_message(f"Mapped '{spec_name}' to: {file_path.name}")
517
+
518
+ # Check if all required files are present
519
+ required_specs = [spec['name'] for spec in file_specification if spec.get('is_required', False)]
520
+ if all(req in files_dict for req in required_specs):
521
+ # Create metadata for this dataset
522
+ meta_data = {
523
+ 'origin_file_stem': file_name,
524
+ 'created_at': datetime.now().isoformat(),
525
+ }
526
+
527
+ # Add the organized dataset
528
+ organized_files.append({'files': files_dict, 'meta': meta_data})
529
+ self.run.log_message(f'Successfully organized dataset for ID {file_name}')
530
+ else:
531
+ # Missing required files warning
532
+ missing = [req for req in required_specs if req not in files_dict]
533
+ self.run.log_message(
534
+ f'Dataset ID {file_name} is missing required files: {", ".join(missing)}',
535
+ context=Context.WARNING.value,
536
+ )
537
+
538
+ self.run.log_message(f'Total datasets organized: {len(organized_files)}')
539
+ return organized_files
540
+
541
+ def _map_files_to_specification(self, directory: Path, file_specification: List) -> Dict[str, Path]:
542
+ """Map files in a directory to the file specification.
543
+
544
+ Args:
545
+ directory (Path): Directory containing files to map.
546
+ file_specification (List): File specification list.
547
+
548
+ Returns:
549
+ Dict[str, Path]: Dictionary mapping file specification names to file paths.
550
+ """
551
+ files_dict = {}
552
+
553
+ # Get all files in the directory once
554
+ all_files = [f for f in directory.iterdir() if f.is_file()]
555
+
556
+ # Process each file specification
557
+ for file_spec in file_specification:
558
+ file_name = file_spec['name']
559
+ is_required = file_spec.get('is_required', False)
560
+
561
+ # Generate name pattern based on the specification
562
+ name_parts = re.split(r'_(\d+)$', file_name)
563
+
564
+ # Find files matching the pattern
565
+ matching_files = []
566
+ if len(name_parts) > 1:
567
+ base_name = name_parts[0]
568
+ index = name_parts[1]
569
+ # Match patterns like "pcd_1.ext", "point_cloud_1.ext", etc.
570
+ for file in all_files:
571
+ if base_name in file.stem and f'_{index}' in file.stem:
572
+ matching_files.append(file)
573
+ else:
574
+ # Simple match - just find files containing the pattern
575
+ for file in all_files:
576
+ if file_name in file.stem:
577
+ matching_files.append(file)
578
+
579
+ # Process matching files
580
+ if matching_files:
581
+ # Sort by modification time (newest first) if multiple files match
582
+ if len(matching_files) > 1:
583
+ matching_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
584
+ self.run.log_message(
585
+ f"Multiple files match '{file_name}'. Using most recent: {matching_files[0].name}"
586
+ )
587
+
588
+ files_dict[file_name] = matching_files[0]
589
+ self.run.log_message(f"Mapped '{file_name}' to: {matching_files[0].name}")
590
+ elif is_required:
591
+ self.run.log_message(f"Required file '{file_name}' not found.", context=Context.WARNING.value)
592
+
593
+ return files_dict
@@ -1,10 +1,9 @@
1
1
  actions:
2
2
  upload:
3
3
  entrypoint: plugin.upload.Uploader
4
+ options:
5
+ allow_generate_tasks: false # Allow the plugin to generate tasks for the uploaded data
6
+ allow_generate_ground_truths: false # Allow the plugin to generate ground truths for the uploaded data
4
7
  supported_data_type: image # A primary data type of synapse backend collection. (e.g. 'image', 'text', 'video', 'pcd', 'audio')
5
8
  ui_schema: |
6
9
  Dumped FormKit Schema for upload plugin custom options
7
- task_pre_annotation:
8
- entrypoint: plugin.upload.TaskPreAnnotation
9
- ui_schema: |
10
- Dumped FormKit Schema for upload plugin custom options
@@ -3,42 +3,38 @@ from typing import List
3
3
 
4
4
 
5
5
  class Uploader:
6
- """Plugin upload action class.
6
+ """Plugin upload action interface for organizing files.
7
7
 
8
- * Organize, upload, setup task, generate ground truths for the uploaded data.
8
+ This class provides a minimal interface for plugin developers to implement
9
+ their own file organization logic.
9
10
  """
10
11
 
11
- def __init__(self, run, path: Path, *args, **kwargs):
12
+ def __init__(self, run, path: Path, file_specification: List = None, organized_files: List = None):
12
13
  """Initialize the plugin upload action class.
13
14
 
14
15
  Args:
15
- run: Plugin run object.
16
- path: pathlib object by upload target destination path.
16
+ run: Plugin run object with logging capabilities.
17
+ path: Path object pointing to the upload target directory.
18
+ file_specification: List of specifications that define the structure of files to be uploaded.
19
+ Each specification contains details like file name, type, and requirements.
17
20
  """
18
21
  self.run = run
19
22
  self.path = path
23
+ self.file_specification = file_specification
24
+ self.organized_files = organized_files
20
25
 
21
26
  def handle_upload_files(self) -> List:
22
- """Handle upload files.
27
+ """Customize the organization of files for upload.
23
28
 
24
- * Organize data according to collection file specification structure.
25
- * Structure files according to the file specification of the target collection.
29
+ This method provides a hook for plugin developers to modify the default file organization.
30
+ You can override this method to filter files, transform data, or add custom metadata
31
+ based on your specific requirements.
32
+
33
+ Args:
34
+ organized_files (List): The default organized files structure.
35
+ Each item is a dictionary with 'files' and 'meta' keys.
26
36
 
27
37
  Returns:
28
- List: List of dictionaries containing 'files' and 'meta'.
29
-
30
- Examples:
31
- [
32
- {
33
- "files": {
34
- 'image_1': image_1_pathlib_object,
35
- 'image_2': image_2_pathlib_object,
36
- 'meta_1': meta_1_pathlib_object,
37
- },
38
- "meta": {
39
- "key": "value"
40
- }
41
- }
42
- ]
38
+ List: The modified list of organized files to be uploaded.
43
39
  """
44
- return []
40
+ return self.organized_files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synapse-sdk
3
- Version: 1.0.0a53
3
+ Version: 1.0.0a55
4
4
  Summary: synapse sdk
5
5
  Author-email: datamaker <developer@datamaker.io>
6
6
  License: MIT
@@ -28,17 +28,17 @@ synapse_sdk/clients/agent/__init__.py,sha256=Pz8_iTbIbnb7ywGJ3feqoZVmO2I3mEbwpWs
28
28
  synapse_sdk/clients/agent/core.py,sha256=x2jgORTjT7pJY67SLuc-5lMG6CD5OWpy8UgGeTf7IhA,270
29
29
  synapse_sdk/clients/agent/ray.py,sha256=JrwLyVOUDG2yYsbPrxyUtWbM-FWp9B6Bl_GdDby0rt8,1559
30
30
  synapse_sdk/clients/agent/service.py,sha256=s7KuPK_DB1nr2VHrigttV1WyFonaGHNrPvU8loRxHcE,478
31
- synapse_sdk/clients/backend/__init__.py,sha256=MC3pndBk-SPyW9L6WnrTozoub9-EK7auXFvPHCaxeFU,1209
31
+ synapse_sdk/clients/backend/__init__.py,sha256=Fiehino2n3voaHTdpJHXSY7K_CDnMkQeokapbgeoTBk,1187
32
32
  synapse_sdk/clients/backend/annotation.py,sha256=f4jS4qlXH7M7mQ3EuCq-NrjJ_hJNDz8pEFAYqf-e008,996
33
33
  synapse_sdk/clients/backend/core.py,sha256=5XAOdo6JZ0drfk-FMPJ96SeTd9oja-VnTwzGXdvK7Bg,1027
34
- synapse_sdk/clients/backend/data_collection.py,sha256=kj9TurBAljK_mFF75oaazlqnL0bd6PHbgRfR3KyTUmI,3623
34
+ synapse_sdk/clients/backend/dataset.py,sha256=eQ0O43Ck91z5Om7mb-vW_P5NIaX1OZKirjGs-WQHdM4,3480
35
35
  synapse_sdk/clients/backend/hitl.py,sha256=na2mSXFud92p4zUEuagcDWk2klxO7xn-e86cm0VZEvs,709
36
36
  synapse_sdk/clients/backend/integration.py,sha256=9LjkYcBpi7aog-MODSDS4RlmYahypu65qxBj-AcY7xc,2683
37
37
  synapse_sdk/clients/backend/ml.py,sha256=JoPH9Ly2E3HJ7S5mdGLtcGq7ruQVVrYfWArogwZLlms,1193
38
38
  synapse_sdk/clients/backend/models.py,sha256=s5d9sGGQ0Elj0HOGC1TuwE-eBkY1aTfJPl6ls11bNCk,1961
39
39
  synapse_sdk/clients/ray/__init__.py,sha256=9ZSPXVVxlJ8Wp8ku7l021ENtPjVrGgQDgqifkkVAXgM,187
40
40
  synapse_sdk/clients/ray/core.py,sha256=a4wyCocAma2HAm-BHlbZnoVbpfdR-Aad2FM0z6vPFvw,731
41
- synapse_sdk/clients/ray/serve.py,sha256=rbCpXZYWf0oP8XJ9faa9QFNPYU7h8dltIG8xn9ZconY,907
41
+ synapse_sdk/clients/ray/serve.py,sha256=eFhCYIv_irc_2RyuV3bzeWIVyz_1NlqwoNVh5KSWilY,1092
42
42
  synapse_sdk/clients/validators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
43
  synapse_sdk/clients/validators/collections.py,sha256=LtnwvutsScubOUcZ2reGHLCzseXxtNIdnH2nv098aUU,1195
44
44
  synapse_sdk/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -61,10 +61,10 @@ synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py,sh
61
61
  synapse_sdk/plugins/categories/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  synapse_sdk/plugins/categories/export/enums.py,sha256=gtyngvQ1DKkos9iKGcbecwTVQQ6sDwbrBPSGPNb5Am0,127
63
63
  synapse_sdk/plugins/categories/export/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
- synapse_sdk/plugins/categories/export/actions/export.py,sha256=2lIjur8EiwTB9sc16FV8ZaPXFxUtGRPx9hreG_DKLQA,10483
64
+ synapse_sdk/plugins/categories/export/actions/export.py,sha256=xqPB_MufeMP3riaKCbGVFGukV8RdXcg6-zUrkw4t1-A,9922
65
65
  synapse_sdk/plugins/categories/export/templates/config.yaml,sha256=N7YmnFROb3s3M35SA9nmabyzoSb5O2t2TRPicwFNN2o,56
66
66
  synapse_sdk/plugins/categories/export/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- synapse_sdk/plugins/categories/export/templates/plugin/export.py,sha256=zG8mSn7ZGIj8cttWmb7GEPcGgQRbZ97brJCzkuK7RP8,6106
67
+ synapse_sdk/plugins/categories/export/templates/plugin/export.py,sha256=JA2Y_A30QyJekSqDq8PeRuFR9k0yjQjOG-Xy6C8zPew,5196
68
68
  synapse_sdk/plugins/categories/neural_net/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
69
  synapse_sdk/plugins/categories/neural_net/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
70
  synapse_sdk/plugins/categories/neural_net/actions/deployment.py,sha256=y2LrS-pwazqRI5O0q1NUy45NQYsBj6ykbrXnDMs_fqE,1987
@@ -100,12 +100,10 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
100
100
  synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
101
101
  synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
102
  synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
- synapse_sdk/plugins/categories/upload/actions/task_pre_annotation.py,sha256=YkQZ7QECu6-PnSEv2lAbbL3smxeIHxUiu9ruBdA0_0k,3066
104
- synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=xTO40dB00uE9xSQxnQYIyuZEvTzx99cSxfCOXLKZAsQ,11294
105
- synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=1O0kMfkFMGYwnpBcttrlC9bu4xzU9docw2MBOq_Elmo,417
103
+ synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=L9OqHWNyzO5qXi9-afkgRI1hfL7ysJjY0z0a5kujJrQ,24202
104
+ synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=kwHNWHFYbzDi1mEh40KozatPZbZGH44dlP0t0J7ejJw,483
106
105
  synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
- synapse_sdk/plugins/categories/upload/templates/plugin/task_pre_annotation.py,sha256=9XkUZu7USjVjDPufM0NlYmkdKfV7Hf_9v5GN1RgZzS0,350
108
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=dnK8gy33GjG5ettayawDJv1gM3xCm1K6lM-PfeeTjQw,1163
106
+ synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
109
107
  synapse_sdk/plugins/templates/cookiecutter.json,sha256=NxOWk9A_v1pO0Ny4IYT9Cj5iiJ16--cIQrGC67QdR0I,396
110
108
  synapse_sdk/plugins/templates/hooks/post_gen_project.py,sha256=jqlYkY1O2TxIR-Vh3gnwILYy8k-D39Xx66d2KNQVMCs,147
111
109
  synapse_sdk/plugins/templates/hooks/pre_prompt.py,sha256=aOAMM623s0sKFGjTZaotAOYFvsNMxeii4tPyhOAFKVE,539
@@ -136,9 +134,9 @@ synapse_sdk/utils/storage/providers/__init__.py,sha256=x7RGwZryT2FpVxS7fGWryRVpq
136
134
  synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_ncGITZrL0u5wEA,363
137
135
  synapse_sdk/utils/storage/providers/s3.py,sha256=W94rQvhGRXti3R4mYP7gmU5pcyCQpGFIBLvxxqLVdRM,2231
138
136
  synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
139
- synapse_sdk-1.0.0a53.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
140
- synapse_sdk-1.0.0a53.dist-info/METADATA,sha256=QZFYv1IY06vc9HM-VCqi8aL3G4D54ruvDPUgX4RlRDg,1303
141
- synapse_sdk-1.0.0a53.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
142
- synapse_sdk-1.0.0a53.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
143
- synapse_sdk-1.0.0a53.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
144
- synapse_sdk-1.0.0a53.dist-info/RECORD,,
137
+ synapse_sdk-1.0.0a55.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
138
+ synapse_sdk-1.0.0a55.dist-info/METADATA,sha256=d2fJLQn1LuXa0NeLokHzsjeE8zgIyOJrOH3bRBUoIw0,1303
139
+ synapse_sdk-1.0.0a55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
140
+ synapse_sdk-1.0.0a55.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
141
+ synapse_sdk-1.0.0a55.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
142
+ synapse_sdk-1.0.0a55.dist-info/RECORD,,
@@ -1,101 +0,0 @@
1
- from enum import Enum
2
- from typing import Annotated
3
-
4
- from pydantic import AfterValidator, BaseModel, field_validator
5
- from pydantic_core import PydanticCustomError
6
-
7
- from synapse_sdk.clients.exceptions import ClientError
8
- from synapse_sdk.plugins.categories.base import Action
9
- from synapse_sdk.plugins.categories.decorators import register_action
10
- from synapse_sdk.plugins.enums import PluginCategory, RunMethod
11
- from synapse_sdk.plugins.models import Run
12
- from synapse_sdk.utils.pydantic.validators import non_blank
13
-
14
-
15
- class TaskDataAnnotationType(str, Enum):
16
- FILE = 'file'
17
- INFERENCE = 'inference'
18
-
19
-
20
- class TaskPreAnnotationRun(Run):
21
- pass
22
-
23
-
24
- class TaskPreAnnotationParams(BaseModel):
25
- """TaskPreAnnotation action parameters.
26
-
27
- Args:
28
- name (str): The name of the action.
29
- description (str | None): The description of the action.
30
- project (int): The project ID.
31
- data_collection (int): The data collection ID.
32
- task_data_annotation_type (TaskDataAnnotationType): The type of task data annotation.
33
- """
34
-
35
- name: Annotated[str, AfterValidator(non_blank)]
36
- description: str | None
37
- project: int
38
- data_collection: int
39
- task_data_annotation_type: TaskDataAnnotationType
40
-
41
- @field_validator('data_collection', mode='before')
42
- @classmethod
43
- def check_data_collection_exists(cls, value: str, info) -> str:
44
- """Validate synapse-backend collection exists."""
45
- action = info.context['action']
46
- client = action.client
47
- try:
48
- client.get_data_collection(value)
49
- except ClientError:
50
- raise PydanticCustomError('client_error', 'Error occurred while checking data collection exists.')
51
- return value
52
-
53
- @field_validator('project', mode='before')
54
- @classmethod
55
- def check_project_exists(cls, value: str, info) -> str:
56
- """Validate synapse-backend project exists."""
57
- if not value:
58
- return value
59
-
60
- action = info.context['action']
61
- client = action.client
62
- try:
63
- client.get_project(value)
64
- except ClientError:
65
- raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
66
- return value
67
-
68
-
69
- @register_action
70
- class TaskPreAnnotationAction(Action):
71
- """TaskPreAnnotation action class.
72
-
73
- * Annotate data to tasks.
74
- """
75
-
76
- name = 'task_pre_annotation'
77
- category = PluginCategory.UPLOAD
78
- method = RunMethod.JOB
79
- run_class = TaskPreAnnotationRun
80
- progress_categories = {
81
- 'generate_tasks': {
82
- 'proportion': 10,
83
- },
84
- 'annotate_task_data': {
85
- 'proportion': 90,
86
- },
87
- }
88
-
89
- def start(self):
90
- """Start task_pre_annotation action.
91
-
92
- * Generate tasks.
93
- * Annotate data to tasks.
94
- """
95
- task_pre_annotation = self.get_task_pre_annotation()
96
- task_pre_annotation.handle_annotate_data_from_files()
97
- return {}
98
-
99
- def get_task_pre_annotation(self):
100
- """Get task pre annotation entrypoint."""
101
- return self.entrypoint()
@@ -1,14 +0,0 @@
1
- class TaskPreAnnotation:
2
- def __init__(self, run, *args, **kwargs):
3
- """Initialize the plugin task pre annotation action class.
4
-
5
- Args:
6
- run: Plugin run object.
7
- """
8
- self.run = run
9
-
10
- def handle_annotate_data_from_files(self):
11
- pass
12
-
13
- def handle_annotate_data_with_inference(self):
14
- pass