synapse-sdk 1.0.0a55__py3-none-any.whl → 1.0.0a56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  from synapse_sdk.clients.backend.annotation import AnnotationClientMixin
2
2
  from synapse_sdk.clients.backend.core import CoreClientMixin
3
- from synapse_sdk.clients.backend.dataset import DatasetClientMixin
3
+ from synapse_sdk.clients.backend.data_collection import DataCollectionClientMixin
4
4
  from synapse_sdk.clients.backend.hitl import HITLClientMixin
5
5
  from synapse_sdk.clients.backend.integration import IntegrationClientMixin
6
6
  from synapse_sdk.clients.backend.ml import MLClientMixin
@@ -9,7 +9,7 @@ from synapse_sdk.clients.backend.ml import MLClientMixin
9
9
  class BackendClient(
10
10
  AnnotationClientMixin,
11
11
  CoreClientMixin,
12
- DatasetClientMixin,
12
+ DataCollectionClientMixin,
13
13
  IntegrationClientMixin,
14
14
  MLClientMixin,
15
15
  HITLClientMixin,
@@ -8,13 +8,13 @@ from synapse_sdk.clients.base import BaseClient
8
8
  from synapse_sdk.clients.utils import get_batched_list
9
9
 
10
10
 
11
- class DatasetClientMixin(BaseClient):
12
- def list_dataset(self):
11
+ class DataCollectionClientMixin(BaseClient):
12
+ def list_data_collection(self):
13
13
  path = 'data_collections/'
14
14
  return self._list(path)
15
15
 
16
- def get_dataset(self, data_collection_id):
17
- """Get dataset from synapse-backend.
16
+ def get_data_collection(self, data_collection_id):
17
+ """Get data_collection from synapse-backend.
18
18
 
19
19
  Args:
20
20
  data_collection_id: The data_collection id to get.
@@ -40,19 +40,19 @@ class DatasetClientMixin(BaseClient):
40
40
  path = 'data_units/'
41
41
  return self._post(path, data=data)
42
42
 
43
- def upload_dataset(
43
+ def upload_data_collection(
44
44
  self,
45
- dataset_id: int,
46
- dataset: Dict,
45
+ data_collection_id: int,
46
+ data_collection: Dict,
47
47
  project_id: Optional[int] = None,
48
48
  batch_size: int = 1000,
49
49
  process_pool: int = 10,
50
50
  ):
51
- """Upload dataset to synapse-backend.
51
+ """Upload data_collection to synapse-backend.
52
52
 
53
53
  Args:
54
- dataset_id: The dataset id to upload the data to.
55
- dataset: The dataset to upload.
54
+ data_collection_id: The data_collection id to upload the data to.
55
+ data_collection: The data_collection to upload.
56
56
  * structure:
57
57
  - files: The files to upload. (key: file name, value: file pathlib object)
58
58
  - meta: The meta data to upload.
@@ -60,14 +60,14 @@ class DatasetClientMixin(BaseClient):
60
60
  batch_size: The batch size to upload the data.
61
61
  process_pool: The process pool to upload the data.
62
62
  """
63
- # TODO validate dataset with schema
63
+ # TODO validate data_collection with schema
64
64
 
65
- params = [(data, dataset_id) for data in dataset]
65
+ params = [(data, data_collection_id) for data in data_collection]
66
66
 
67
67
  with Pool(processes=process_pool) as pool:
68
- dataset = pool.starmap(self.upload_data_file, tqdm(params))
68
+ data_collection = pool.starmap(self.upload_data_file, tqdm(params))
69
69
 
70
- batches = get_batched_list(dataset, batch_size)
70
+ batches = get_batched_list(data_collection, batch_size)
71
71
 
72
72
  for batch in tqdm(batches):
73
73
  data_units = self.create_data_units(batch)
@@ -90,7 +90,7 @@ class DatasetClientMixin(BaseClient):
90
90
  * structure:
91
91
  - files: The files to upload. (key: file name, value: file pathlib object)
92
92
  - meta: The meta data to upload.
93
- data_collection_id: The dataset id to upload the data to.
93
+ data_collection_id: The data_collection id to upload the data to.
94
94
 
95
95
  Returns:
96
96
  Dict: The result of the upload.
@@ -27,6 +27,13 @@ class ExportRun(Run):
27
27
  error: str | None = None
28
28
  created: str
29
29
 
30
+ class MetricsRecord(BaseModel):
31
+ """Metrics record model."""
32
+
33
+ stand_by: int
34
+ failed: int
35
+ success: int
36
+
30
37
  def log_file(
31
38
  self, log_type: str, target_id: int, data_file_info: dict, status: ExportStatus, error: str | None = None
32
39
  ):
@@ -51,6 +58,16 @@ class ExportRun(Run):
51
58
  ).model_dump(),
52
59
  )
53
60
 
61
+ def log_metrics(self, record: MetricsRecord, category: str):
62
+ """Log export metrics.
63
+
64
+ Args:
65
+ record (MetricsRecord): The metrics record to log.
66
+ category (str): The category of the metrics.
67
+ """
68
+ record = self.MetricsRecord.model_validate(record)
69
+ self.set_metrics(value=record.dict(), category=category)
70
+
54
71
  def export_log_json_file(
55
72
  self,
56
73
  target_id: int,
@@ -263,6 +280,7 @@ class ExportAction(Action):
263
280
  'proportion': 100,
264
281
  }
265
282
  }
283
+ metrics_categories = {'data_file', 'original_file'}
266
284
 
267
285
  def get_filtered_results(self, filters, handler):
268
286
  """Get filtered target results."""
@@ -42,6 +42,8 @@ def export(run, export_items, path_root, **params):
42
42
  origin_files_output_path.mkdir(parents=True, exist_ok=True)
43
43
 
44
44
  total = params['count']
45
+ original_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
46
+ data_file_metrics_record = run.MetricsRecord(stand_by=total, success=0, failed=0)
45
47
  # progress init
46
48
  run.set_progress(0, total, category='dataset_conversion')
47
49
  for no, export_item in enumerate(export_items, start=1):
@@ -56,12 +58,30 @@ def export(run, export_items, path_root, **params):
56
58
  if save_original_file_flag:
57
59
  if no == 1:
58
60
  run.log_message('Saving original file.')
59
- save_original_file(run, final_data, origin_files_output_path, errors_original_file_list)
61
+ original_status = save_original_file(run, final_data, origin_files_output_path, errors_original_file_list)
62
+
63
+ original_file_metrics_record.stand_by -= 1
64
+ if original_status == ExportStatus.FAILED:
65
+ original_file_metrics_record.failed += 1
66
+ continue
67
+ else:
68
+ original_file_metrics_record.success += 1
69
+
70
+ run.log_metrics(record=original_file_metrics_record, category='original_file')
60
71
 
61
72
  # Extract data as JSON files
62
73
  if no == 1:
63
74
  run.log_message('Saving json file.')
64
- save_as_json(run, final_data, json_output_path, errors_json_file_list)
75
+ data_status = save_as_json(run, final_data, json_output_path, errors_json_file_list)
76
+
77
+ data_file_metrics_record.stand_by -= 1
78
+ if data_status == ExportStatus.FAILED:
79
+ data_file_metrics_record.failed += 1
80
+ continue
81
+ else:
82
+ data_file_metrics_record.success += 1
83
+
84
+ run.log_metrics(record=data_file_metrics_record, category='data_file')
65
85
 
66
86
  run.end_log()
67
87
 
@@ -126,6 +146,7 @@ def save_original_file(run, result, base_path, error_file_list):
126
146
  status = ExportStatus.FAILED
127
147
 
128
148
  run.export_log_original_file(result['id'], file_info, status, error_msg)
149
+ return status
129
150
 
130
151
 
131
152
  def save_as_json(run, result, base_path, error_file_list):
@@ -152,3 +173,4 @@ def save_as_json(run, result, base_path, error_file_list):
152
173
  status = ExportStatus.FAILED
153
174
 
154
175
  run.export_log_json_file(result['id'], file_info, status, error_msg)
176
+ return status
@@ -0,0 +1,101 @@
1
+ from enum import Enum
2
+ from typing import Annotated
3
+
4
+ from pydantic import AfterValidator, BaseModel, field_validator
5
+ from pydantic_core import PydanticCustomError
6
+
7
+ from synapse_sdk.clients.exceptions import ClientError
8
+ from synapse_sdk.plugins.categories.base import Action
9
+ from synapse_sdk.plugins.categories.decorators import register_action
10
+ from synapse_sdk.plugins.enums import PluginCategory, RunMethod
11
+ from synapse_sdk.plugins.models import Run
12
+ from synapse_sdk.utils.pydantic.validators import non_blank
13
+
14
+
15
+ class TaskDataAnnotationType(str, Enum):
16
+ FILE = 'file'
17
+ INFERENCE = 'inference'
18
+
19
+
20
+ class TaskPreAnnotationRun(Run):
21
+ pass
22
+
23
+
24
+ class TaskPreAnnotationParams(BaseModel):
25
+ """TaskPreAnnotation action parameters.
26
+
27
+ Args:
28
+ name (str): The name of the action.
29
+ description (str | None): The description of the action.
30
+ project (int): The project ID.
31
+ data_collection (int): The data collection ID.
32
+ task_data_annotation_type (TaskDataAnnotationType): The type of task data annotation.
33
+ """
34
+
35
+ name: Annotated[str, AfterValidator(non_blank)]
36
+ description: str | None
37
+ project: int
38
+ data_collection: int
39
+ task_data_annotation_type: TaskDataAnnotationType
40
+
41
+ @field_validator('data_collection', mode='before')
42
+ @classmethod
43
+ def check_data_collection_exists(cls, value: str, info) -> str:
44
+ """Validate synapse-backend collection exists."""
45
+ action = info.context['action']
46
+ client = action.client
47
+ try:
48
+ client.get_data_collection(value)
49
+ except ClientError:
50
+ raise PydanticCustomError('client_error', 'Error occurred while checking data collection exists.')
51
+ return value
52
+
53
+ @field_validator('project', mode='before')
54
+ @classmethod
55
+ def check_project_exists(cls, value: str, info) -> str:
56
+ """Validate synapse-backend project exists."""
57
+ if not value:
58
+ return value
59
+
60
+ action = info.context['action']
61
+ client = action.client
62
+ try:
63
+ client.get_project(value)
64
+ except ClientError:
65
+ raise PydanticCustomError('client_error', 'Error occurred while checking project exists.')
66
+ return value
67
+
68
+
69
+ @register_action
70
+ class TaskPreAnnotationAction(Action):
71
+ """TaskPreAnnotation action class.
72
+
73
+ * Annotate data to tasks.
74
+ """
75
+
76
+ name = 'task_pre_annotation'
77
+ category = PluginCategory.UPLOAD
78
+ method = RunMethod.JOB
79
+ run_class = TaskPreAnnotationRun
80
+ progress_categories = {
81
+ 'generate_tasks': {
82
+ 'proportion': 10,
83
+ },
84
+ 'annotate_task_data': {
85
+ 'proportion': 90,
86
+ },
87
+ }
88
+
89
+ def start(self):
90
+ """Start task_pre_annotation action.
91
+
92
+ * Generate tasks.
93
+ * Annotate data to tasks.
94
+ """
95
+ task_pre_annotation = self.get_task_pre_annotation()
96
+ task_pre_annotation.handle_annotate_data_from_files()
97
+ return {}
98
+
99
+ def get_task_pre_annotation(self):
100
+ """Get task pre annotation entrypoint."""
101
+ return self.entrypoint()
@@ -1,8 +1,6 @@
1
1
  import json
2
- import re
3
2
  from datetime import datetime
4
3
  from enum import Enum
5
- from pathlib import Path
6
4
  from typing import Annotated, Dict, List
7
5
 
8
6
  from pydantic import AfterValidator, BaseModel, field_validator
@@ -10,7 +8,6 @@ from pydantic_core import PydanticCustomError
10
8
 
11
9
  from synapse_sdk.clients.exceptions import ClientError
12
10
  from synapse_sdk.clients.utils import get_batched_list
13
- from synapse_sdk.clients.validators.collections import FileSpecificationValidator
14
11
  from synapse_sdk.i18n import gettext as _
15
12
  from synapse_sdk.plugins.categories.base import Action
16
13
  from synapse_sdk.plugins.categories.decorators import register_action
@@ -107,8 +104,6 @@ class UploadParams(BaseModel):
107
104
  storage: int
108
105
  collection: int
109
106
  project: int | None
110
- is_generate_tasks: bool = False
111
- is_generate_ground_truths: bool = False
112
107
 
113
108
  @field_validator('storage', mode='before')
114
109
  @classmethod
@@ -133,7 +128,7 @@ class UploadParams(BaseModel):
133
128
  action = info.context['action']
134
129
  client = action.client
135
130
  try:
136
- client.get_dataset(value)
131
+ client.get_data_collection(value)
137
132
  except ClientError:
138
133
  raise PydanticCustomError('client_error', _('Error occurred while checking collection exists.'))
139
134
  return value
@@ -167,8 +162,6 @@ class UploadAction(Action):
167
162
  analyze_collection: The progress category for the analyze collection process.
168
163
  data_file_upload: The progress category for the upload process.
169
164
  generate_data_units: The progress category for the generate data units process.
170
- generate_tasks: The progress category for the generate tasks process.
171
- generate_ground_truths: The progress category for the generate ground truths process.
172
165
  """
173
166
 
174
167
  name = 'upload'
@@ -177,48 +170,16 @@ class UploadAction(Action):
177
170
  run_class = UploadRun
178
171
  progress_categories = {
179
172
  'analyze_collection': {
180
- 'proportion': 0,
173
+ 'proportion': 10,
181
174
  },
182
175
  'upload_data_files': {
183
- 'proportion': 0,
176
+ 'proportion': 50,
184
177
  },
185
178
  'generate_data_units': {
186
- 'proportion': 0,
187
- },
188
- 'generate_tasks': {
189
- 'proportion': 0,
190
- },
191
- 'generate_ground_truths': {
192
- 'proportion': 0,
179
+ 'proportion': 40,
193
180
  },
194
181
  }
195
182
 
196
- def __init__(self, *args, **kwargs):
197
- """Initialize UploadAction."""
198
- super().__init__(*args, **kwargs)
199
-
200
- # Setup progress categories ratio by options.
201
- progress_ratios = {
202
- 'upload_only': (5, 60, 35, 0, 0),
203
- 'generate_tasks': (5, 45, 25, 25, 0),
204
- 'generate_ground_truths': (5, 35, 30, 15, 15),
205
- }
206
- options = self.config.get('options', {})
207
- progress_categories = self.progress_categories
208
- if options['allow_generate_tasks'] and not kwargs['params']['allow_generate_ground_truths']:
209
- ratio_name = 'generate_tasks'
210
- elif options['allow_generate_ground_truths'] and kwargs['params']['allow_generate_tasks']:
211
- ratio_name = 'generate_ground_truths'
212
- else:
213
- ratio_name = 'upload_only'
214
-
215
- assert len(progress_categories) == len(progress_ratios[ratio_name]), (
216
- 'Progress categories and ratios length mismatch.'
217
- )
218
- for i, category in enumerate(progress_categories):
219
- progress_categories[category]['proportion'] = progress_ratios[ratio_name][i]
220
- self.progress_categories = progress_categories
221
-
222
183
  def get_uploader(self, path, file_specification, organized_files):
223
184
  """Get uploader from entrypoint."""
224
185
  return self.entrypoint(self.run, path, file_specification, organized_files)
@@ -271,26 +232,6 @@ class UploadAction(Action):
271
232
  generated_data_units = self._generate_data_units(uploaded_files, upload_result_count)
272
233
  result['generated_data_units_count'] = len(generated_data_units)
273
234
 
274
- # Setup task with uploaded synapse-backend data units.
275
- if not len(generated_data_units):
276
- self.run.log_message('No data units were generated.', context=Context.WARNING.value)
277
- self.run.end_log()
278
- return result
279
-
280
- if self.config['options']['allow_generate_tasks'] and self.params['is_generate_tasks']:
281
- generated_tasks = self._generate_tasks(generated_data_units)
282
- result['generated_tasks_count'] = len(generated_tasks)
283
- else:
284
- self.run.log_message('Generating tasks process has passed.')
285
-
286
- # Generate ground truths for the uploaded data.
287
- # TODO: Need to add ground truths generation logic later.
288
- if self.config['options']['allow_generate_ground_truths'] and self.params['is_generate_ground_truths']:
289
- generated_ground_truths = self._generate_ground_truths()
290
- result['generated_ground_truths_count'] = len(generated_ground_truths)
291
- else:
292
- self.run.log_message('Generating ground truths process has passed.')
293
-
294
235
  self.run.end_log()
295
236
  return result
296
237
 
@@ -306,7 +247,7 @@ class UploadAction(Action):
306
247
 
307
248
  client = self.run.client
308
249
  collection_id = self.params['collection']
309
- collection = client.get_dataset(collection_id)
250
+ collection = client.get_data_collection(collection_id)
310
251
 
311
252
  # Finish progress
312
253
  self.run.set_progress(1, 1, category='analyze_collection')
@@ -370,224 +311,3 @@ class UploadAction(Action):
370
311
  self.run.set_progress(upload_result_count, upload_result_count, category='generate_data_units')
371
312
 
372
313
  return sum(generated_data_units, [])
373
-
374
- def _generate_tasks(self, generated_data_units: List) -> List:
375
- """Setup task with uploaded synapse-backend data units.
376
-
377
- TODO: make batch size configurable.
378
- """
379
- # Initialize progress
380
- self.run.set_progress(0, 1, category='generate_tasks')
381
- self.run.log_message('Generating tasks with data files...')
382
-
383
- # Prepare batches for processing
384
- client = self.run.client
385
- project_id = self.params['project']
386
- current_progress = 0
387
-
388
- # Generate tasks
389
- generated_tasks = []
390
- generated_data_units_count = len(generated_data_units)
391
- for data_unit in generated_data_units:
392
- tasks_data = []
393
- task_data = {'project': project_id, 'data_unit': data_unit['id']}
394
- tasks_data.append(task_data)
395
- if tasks_data:
396
- created_tasks = client.create_tasks(tasks_data)
397
- created_task_ids = [created_task['id'] for created_task in created_tasks]
398
- generated_tasks.append(created_task_ids)
399
- for created_task_id in created_task_ids:
400
- self.run.log_task(created_task_id, UploadStatus.SUCCESS)
401
-
402
- self.run.set_progress(current_progress, generated_data_units_count, category='generate_tasks')
403
- current_progress += 1
404
-
405
- # Finish progress
406
- self.run.log_message('Generating tasks completed')
407
- self.run.set_progress(1, 1, category='generate_tasks')
408
-
409
- return sum(generated_tasks, [])
410
-
411
- def _generate_ground_truths(self):
412
- """Generate ground truths for the uploaded data.
413
-
414
- TODO: Need to add ground truths generation logic later.
415
- """
416
- # Initialize progress
417
- self.run.set_progress(0, 1, category='generate_ground_truths')
418
- self.run.log_message('Generating ground truths...')
419
-
420
- # Finish progress
421
- self.run.log_message('Generating ground truths completed')
422
- self.run.set_progress(1, 1, category='generate_ground_truths')
423
-
424
- def _validate_organized_files(self, organized_files: List, file_specification_template: Dict) -> bool:
425
- """Validate organized files from Uploader."""
426
- validator = FileSpecificationValidator(file_specification_template, organized_files)
427
- return validator.validate()
428
-
429
- def _organize_files(self, directory: Path, file_specification: List) -> List:
430
- """Organize files according to the file specification.
431
- This method handles type-based directory structure where files are organized in
432
- directories named after file types (e.g., 'image_1/' directory contains image files
433
- like '1.jpg', '2.jpg'). For each dataset ID found in the primary directory, it attempts
434
- to find corresponding files in all type directories.
435
-
436
- TODO : Add Logic to handle file specific name patterns and extensions.
437
- (e.g. pcd:S_DCH_230725_0156_LR_037.pcd, image_1:S_DCH_230725_0156_FC_037, image_2:S_DCH_230725_0156_LF_037.jpg)
438
- Args:
439
- directory (Path): Root directory containing files to organize.
440
- file_specification (List): File specification list.
441
- Returns:
442
- List: List of dictionaries containing organized files.
443
- """
444
- organized_files = []
445
- self.run.log_message(f'Looking for files in {directory}...')
446
-
447
- # Check for type-based directory structure (e.g., image_1/, pcd_1/)
448
- type_dirs = {}
449
- type_extensions = {} # Store common extensions for each type directory
450
-
451
- for spec in file_specification:
452
- spec_name = spec['name']
453
-
454
- spec_dir = directory / spec_name
455
- if spec_dir.exists() and spec_dir.is_dir():
456
- type_dirs[spec_name] = spec_dir
457
-
458
- # Analyze file extensions in this directory
459
- extensions = {}
460
- for file_path in spec_dir.glob('*'):
461
- if file_path.is_file():
462
- ext = file_path.suffix.lower()
463
- extensions[ext] = extensions.get(ext, 0) + 1
464
-
465
- # Find the most common extension
466
- if extensions:
467
- common_ext = max(extensions.items(), key=lambda x: x[1])[0]
468
- type_extensions[spec_name] = common_ext
469
- self.run.log_message(f'Found type directory: {spec_name} (common extension: {common_ext})')
470
-
471
- # If type-based directories don't exist, exit early
472
- if not type_dirs:
473
- self.run.log_message('No type-based directory structure found.', context=Context.INFO.value)
474
- return organized_files
475
-
476
- self.run.log_message('Detected type-based directory structure')
477
-
478
- # Build a comprehensive map of all dataset IDs across all type directories
479
- dataset_files = {} # Dictionary: file_name -> {spec_name -> file_path}
480
-
481
- # First pass: collect all dataset IDs from all type directories
482
- for spec_name, dir_path in type_dirs.items():
483
- for file_path in dir_path.glob('*'):
484
- if file_path.is_file():
485
- file_name = file_path.stem
486
-
487
- # Initialize dataset entry if it doesn't exist
488
- if file_name not in dataset_files:
489
- dataset_files[file_name] = {}
490
-
491
- # Map this file to its specification
492
- if spec_name not in dataset_files[file_name]:
493
- dataset_files[file_name][spec_name] = file_path
494
- else:
495
- # If multiple files with same file_name for same spec, use most recent
496
- existing_file = dataset_files[file_name][spec_name]
497
- if file_path.stat().st_mtime > existing_file.stat().st_mtime:
498
- dataset_files[file_name][spec_name] = file_path
499
- self.run.log_message(
500
- f"Found newer file for name of {file_name}, spec '{spec_name}': "
501
- f'{file_path.name} (replacing {existing_file.name})'
502
- )
503
-
504
- if not dataset_files:
505
- self.run.log_message('No dataset files found.', context=Context.WARNING.value)
506
- return organized_files
507
-
508
- self.run.log_message(f'Found {len(dataset_files)} potential datasets by ID')
509
-
510
- # Second pass: organize valid datasets
511
- for file_name, files_dict in sorted(dataset_files.items()):
512
- self.run.log_message(f'Processing file name: {file_name}')
513
-
514
- # Add file spec details for logging
515
- for spec_name, file_path in files_dict.items():
516
- self.run.log_message(f"Mapped '{spec_name}' to: {file_path.name}")
517
-
518
- # Check if all required files are present
519
- required_specs = [spec['name'] for spec in file_specification if spec.get('is_required', False)]
520
- if all(req in files_dict for req in required_specs):
521
- # Create metadata for this dataset
522
- meta_data = {
523
- 'origin_file_stem': file_name,
524
- 'created_at': datetime.now().isoformat(),
525
- }
526
-
527
- # Add the organized dataset
528
- organized_files.append({'files': files_dict, 'meta': meta_data})
529
- self.run.log_message(f'Successfully organized dataset for ID {file_name}')
530
- else:
531
- # Missing required files warning
532
- missing = [req for req in required_specs if req not in files_dict]
533
- self.run.log_message(
534
- f'Dataset ID {file_name} is missing required files: {", ".join(missing)}',
535
- context=Context.WARNING.value,
536
- )
537
-
538
- self.run.log_message(f'Total datasets organized: {len(organized_files)}')
539
- return organized_files
540
-
541
- def _map_files_to_specification(self, directory: Path, file_specification: List) -> Dict[str, Path]:
542
- """Map files in a directory to the file specification.
543
-
544
- Args:
545
- directory (Path): Directory containing files to map.
546
- file_specification (List): File specification list.
547
-
548
- Returns:
549
- Dict[str, Path]: Dictionary mapping file specification names to file paths.
550
- """
551
- files_dict = {}
552
-
553
- # Get all files in the directory once
554
- all_files = [f for f in directory.iterdir() if f.is_file()]
555
-
556
- # Process each file specification
557
- for file_spec in file_specification:
558
- file_name = file_spec['name']
559
- is_required = file_spec.get('is_required', False)
560
-
561
- # Generate name pattern based on the specification
562
- name_parts = re.split(r'_(\d+)$', file_name)
563
-
564
- # Find files matching the pattern
565
- matching_files = []
566
- if len(name_parts) > 1:
567
- base_name = name_parts[0]
568
- index = name_parts[1]
569
- # Match patterns like "pcd_1.ext", "point_cloud_1.ext", etc.
570
- for file in all_files:
571
- if base_name in file.stem and f'_{index}' in file.stem:
572
- matching_files.append(file)
573
- else:
574
- # Simple match - just find files containing the pattern
575
- for file in all_files:
576
- if file_name in file.stem:
577
- matching_files.append(file)
578
-
579
- # Process matching files
580
- if matching_files:
581
- # Sort by modification time (newest first) if multiple files match
582
- if len(matching_files) > 1:
583
- matching_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
584
- self.run.log_message(
585
- f"Multiple files match '{file_name}'. Using most recent: {matching_files[0].name}"
586
- )
587
-
588
- files_dict[file_name] = matching_files[0]
589
- self.run.log_message(f"Mapped '{file_name}' to: {matching_files[0].name}")
590
- elif is_required:
591
- self.run.log_message(f"Required file '{file_name}' not found.", context=Context.WARNING.value)
592
-
593
- return files_dict
@@ -1,9 +1,10 @@
1
1
  actions:
2
2
  upload:
3
3
  entrypoint: plugin.upload.Uploader
4
- options:
5
- allow_generate_tasks: false # Allow the plugin to generate tasks for the uploaded data
6
- allow_generate_ground_truths: false # Allow the plugin to generate ground truths for the uploaded data
7
4
  supported_data_type: image # A primary data type of synapse backend collection. (e.g. 'image', 'text', 'video', 'pcd', 'audio')
8
5
  ui_schema: |
9
6
  Dumped FormKit Schema for upload plugin custom options
7
+ task_pre_annotation:
8
+ entrypoint: plugin.upload.TaskPreAnnotation
9
+ ui_schema: |
10
+ Dumped FormKit Schema for upload plugin custom options
@@ -0,0 +1,14 @@
1
+ class TaskPreAnnotation:
2
+ def __init__(self, run, *args, **kwargs):
3
+ """Initialize the plugin task pre annotation action class.
4
+
5
+ Args:
6
+ run: Plugin run object.
7
+ """
8
+ self.run = run
9
+
10
+ def handle_annotate_data_from_files(self):
11
+ pass
12
+
13
+ def handle_annotate_data_with_inference(self):
14
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synapse-sdk
3
- Version: 1.0.0a55
3
+ Version: 1.0.0a56
4
4
  Summary: synapse sdk
5
5
  Author-email: datamaker <developer@datamaker.io>
6
6
  License: MIT
@@ -28,10 +28,10 @@ synapse_sdk/clients/agent/__init__.py,sha256=Pz8_iTbIbnb7ywGJ3feqoZVmO2I3mEbwpWs
28
28
  synapse_sdk/clients/agent/core.py,sha256=x2jgORTjT7pJY67SLuc-5lMG6CD5OWpy8UgGeTf7IhA,270
29
29
  synapse_sdk/clients/agent/ray.py,sha256=JrwLyVOUDG2yYsbPrxyUtWbM-FWp9B6Bl_GdDby0rt8,1559
30
30
  synapse_sdk/clients/agent/service.py,sha256=s7KuPK_DB1nr2VHrigttV1WyFonaGHNrPvU8loRxHcE,478
31
- synapse_sdk/clients/backend/__init__.py,sha256=Fiehino2n3voaHTdpJHXSY7K_CDnMkQeokapbgeoTBk,1187
31
+ synapse_sdk/clients/backend/__init__.py,sha256=MC3pndBk-SPyW9L6WnrTozoub9-EK7auXFvPHCaxeFU,1209
32
32
  synapse_sdk/clients/backend/annotation.py,sha256=f4jS4qlXH7M7mQ3EuCq-NrjJ_hJNDz8pEFAYqf-e008,996
33
33
  synapse_sdk/clients/backend/core.py,sha256=5XAOdo6JZ0drfk-FMPJ96SeTd9oja-VnTwzGXdvK7Bg,1027
34
- synapse_sdk/clients/backend/dataset.py,sha256=eQ0O43Ck91z5Om7mb-vW_P5NIaX1OZKirjGs-WQHdM4,3480
34
+ synapse_sdk/clients/backend/data_collection.py,sha256=kj9TurBAljK_mFF75oaazlqnL0bd6PHbgRfR3KyTUmI,3623
35
35
  synapse_sdk/clients/backend/hitl.py,sha256=na2mSXFud92p4zUEuagcDWk2klxO7xn-e86cm0VZEvs,709
36
36
  synapse_sdk/clients/backend/integration.py,sha256=9LjkYcBpi7aog-MODSDS4RlmYahypu65qxBj-AcY7xc,2683
37
37
  synapse_sdk/clients/backend/ml.py,sha256=JoPH9Ly2E3HJ7S5mdGLtcGq7ruQVVrYfWArogwZLlms,1193
@@ -61,10 +61,10 @@ synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py,sh
61
61
  synapse_sdk/plugins/categories/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  synapse_sdk/plugins/categories/export/enums.py,sha256=gtyngvQ1DKkos9iKGcbecwTVQQ6sDwbrBPSGPNb5Am0,127
63
63
  synapse_sdk/plugins/categories/export/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
- synapse_sdk/plugins/categories/export/actions/export.py,sha256=xqPB_MufeMP3riaKCbGVFGukV8RdXcg6-zUrkw4t1-A,9922
64
+ synapse_sdk/plugins/categories/export/actions/export.py,sha256=2lIjur8EiwTB9sc16FV8ZaPXFxUtGRPx9hreG_DKLQA,10483
65
65
  synapse_sdk/plugins/categories/export/templates/config.yaml,sha256=N7YmnFROb3s3M35SA9nmabyzoSb5O2t2TRPicwFNN2o,56
66
66
  synapse_sdk/plugins/categories/export/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- synapse_sdk/plugins/categories/export/templates/plugin/export.py,sha256=JA2Y_A30QyJekSqDq8PeRuFR9k0yjQjOG-Xy6C8zPew,5196
67
+ synapse_sdk/plugins/categories/export/templates/plugin/export.py,sha256=zG8mSn7ZGIj8cttWmb7GEPcGgQRbZ97brJCzkuK7RP8,6106
68
68
  synapse_sdk/plugins/categories/neural_net/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
69
  synapse_sdk/plugins/categories/neural_net/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
70
  synapse_sdk/plugins/categories/neural_net/actions/deployment.py,sha256=y2LrS-pwazqRI5O0q1NUy45NQYsBj6ykbrXnDMs_fqE,1987
@@ -100,9 +100,11 @@ synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py,sha256=47
100
100
  synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py,sha256=eevNg0nOcYFR4z_L_R-sCvVOYoLWSAH1jwDkAf3YCjY,320
101
101
  synapse_sdk/plugins/categories/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
102
  synapse_sdk/plugins/categories/upload/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
- synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=L9OqHWNyzO5qXi9-afkgRI1hfL7ysJjY0z0a5kujJrQ,24202
104
- synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=kwHNWHFYbzDi1mEh40KozatPZbZGH44dlP0t0J7ejJw,483
103
+ synapse_sdk/plugins/categories/upload/actions/task_pre_annotation.py,sha256=YkQZ7QECu6-PnSEv2lAbbL3smxeIHxUiu9ruBdA0_0k,3066
104
+ synapse_sdk/plugins/categories/upload/actions/upload.py,sha256=WSzTulI3GAejXKy6DsmxP2zE7fRAX_bYTUaL0Za7Ci8,11287
105
+ synapse_sdk/plugins/categories/upload/templates/config.yaml,sha256=1O0kMfkFMGYwnpBcttrlC9bu4xzU9docw2MBOq_Elmo,417
105
106
  synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
+ synapse_sdk/plugins/categories/upload/templates/plugin/task_pre_annotation.py,sha256=9XkUZu7USjVjDPufM0NlYmkdKfV7Hf_9v5GN1RgZzS0,350
106
108
  synapse_sdk/plugins/categories/upload/templates/plugin/upload.py,sha256=IZU4sdSMSLKPCtlNqF7DP2howTdYR6hr74HCUZsGdPk,1559
107
109
  synapse_sdk/plugins/templates/cookiecutter.json,sha256=NxOWk9A_v1pO0Ny4IYT9Cj5iiJ16--cIQrGC67QdR0I,396
108
110
  synapse_sdk/plugins/templates/hooks/post_gen_project.py,sha256=jqlYkY1O2TxIR-Vh3gnwILYy8k-D39Xx66d2KNQVMCs,147
@@ -134,9 +136,9 @@ synapse_sdk/utils/storage/providers/__init__.py,sha256=x7RGwZryT2FpVxS7fGWryRVpq
134
136
  synapse_sdk/utils/storage/providers/gcp.py,sha256=i2BQCu1Kej1If9SuNr2_lEyTcr5M_ncGITZrL0u5wEA,363
135
137
  synapse_sdk/utils/storage/providers/s3.py,sha256=W94rQvhGRXti3R4mYP7gmU5pcyCQpGFIBLvxxqLVdRM,2231
136
138
  synapse_sdk/utils/storage/providers/sftp.py,sha256=_8s9hf0JXIO21gvm-JVS00FbLsbtvly4c-ETLRax68A,1426
137
- synapse_sdk-1.0.0a55.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
138
- synapse_sdk-1.0.0a55.dist-info/METADATA,sha256=d2fJLQn1LuXa0NeLokHzsjeE8zgIyOJrOH3bRBUoIw0,1303
139
- synapse_sdk-1.0.0a55.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
140
- synapse_sdk-1.0.0a55.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
141
- synapse_sdk-1.0.0a55.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
142
- synapse_sdk-1.0.0a55.dist-info/RECORD,,
139
+ synapse_sdk-1.0.0a56.dist-info/licenses/LICENSE,sha256=bKzmC5YAg4V1Fhl8OO_tqY8j62hgdncAkN7VrdjmrGk,1101
140
+ synapse_sdk-1.0.0a56.dist-info/METADATA,sha256=Ms_yriRQzC_lTwXwNUzA_rhSafE6YG69OurqybsaQPs,1303
141
+ synapse_sdk-1.0.0a56.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
142
+ synapse_sdk-1.0.0a56.dist-info/entry_points.txt,sha256=VNptJoGoNJI8yLXfBmhgUefMsmGI0m3-0YoMvrOgbxo,48
143
+ synapse_sdk-1.0.0a56.dist-info/top_level.txt,sha256=ytgJMRK1slVOKUpgcw3LEyHHP7S34J6n_gJzdkcSsw8,12
144
+ synapse_sdk-1.0.0a56.dist-info/RECORD,,