synapse-sdk 1.0.0a11__py3-none-any.whl → 2026.1.1b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/__init__.py +24 -0
- synapse_sdk/cli/__init__.py +9 -8
- synapse_sdk/cli/agent/__init__.py +25 -0
- synapse_sdk/cli/agent/config.py +104 -0
- synapse_sdk/cli/agent/select.py +197 -0
- synapse_sdk/cli/auth.py +104 -0
- synapse_sdk/cli/main.py +1025 -0
- synapse_sdk/cli/plugin/__init__.py +58 -0
- synapse_sdk/cli/plugin/create.py +566 -0
- synapse_sdk/cli/plugin/job.py +196 -0
- synapse_sdk/cli/plugin/publish.py +322 -0
- synapse_sdk/cli/plugin/run.py +131 -0
- synapse_sdk/cli/plugin/test.py +200 -0
- synapse_sdk/clients/README.md +239 -0
- synapse_sdk/clients/__init__.py +5 -0
- synapse_sdk/clients/_template.py +266 -0
- synapse_sdk/clients/agent/__init__.py +84 -29
- synapse_sdk/clients/agent/async_ray.py +289 -0
- synapse_sdk/clients/agent/container.py +83 -0
- synapse_sdk/clients/agent/plugin.py +101 -0
- synapse_sdk/clients/agent/ray.py +296 -39
- synapse_sdk/clients/backend/__init__.py +152 -12
- synapse_sdk/clients/backend/annotation.py +164 -22
- synapse_sdk/clients/backend/core.py +101 -0
- synapse_sdk/clients/backend/data_collection.py +292 -0
- synapse_sdk/clients/backend/hitl.py +87 -0
- synapse_sdk/clients/backend/integration.py +374 -46
- synapse_sdk/clients/backend/ml.py +134 -22
- synapse_sdk/clients/backend/models.py +247 -0
- synapse_sdk/clients/base.py +538 -59
- synapse_sdk/clients/exceptions.py +35 -7
- synapse_sdk/clients/pipeline/__init__.py +5 -0
- synapse_sdk/clients/pipeline/client.py +636 -0
- synapse_sdk/clients/protocols.py +178 -0
- synapse_sdk/clients/utils.py +86 -8
- synapse_sdk/clients/validation.py +58 -0
- synapse_sdk/enums.py +76 -0
- synapse_sdk/exceptions.py +168 -0
- synapse_sdk/integrations/__init__.py +74 -0
- synapse_sdk/integrations/_base.py +119 -0
- synapse_sdk/integrations/_context.py +53 -0
- synapse_sdk/integrations/ultralytics/__init__.py +78 -0
- synapse_sdk/integrations/ultralytics/_callbacks.py +126 -0
- synapse_sdk/integrations/ultralytics/_patches.py +124 -0
- synapse_sdk/loggers.py +476 -95
- synapse_sdk/mcp/MCP.md +69 -0
- synapse_sdk/mcp/__init__.py +48 -0
- synapse_sdk/mcp/__main__.py +6 -0
- synapse_sdk/mcp/config.py +349 -0
- synapse_sdk/mcp/prompts/__init__.py +4 -0
- synapse_sdk/mcp/resources/__init__.py +4 -0
- synapse_sdk/mcp/server.py +1352 -0
- synapse_sdk/mcp/tools/__init__.py +6 -0
- synapse_sdk/plugins/__init__.py +133 -9
- synapse_sdk/plugins/action.py +229 -0
- synapse_sdk/plugins/actions/__init__.py +82 -0
- synapse_sdk/plugins/actions/dataset/__init__.py +37 -0
- synapse_sdk/plugins/actions/dataset/action.py +471 -0
- synapse_sdk/plugins/actions/export/__init__.py +55 -0
- synapse_sdk/plugins/actions/export/action.py +183 -0
- synapse_sdk/plugins/actions/export/context.py +59 -0
- synapse_sdk/plugins/actions/inference/__init__.py +84 -0
- synapse_sdk/plugins/actions/inference/action.py +285 -0
- synapse_sdk/plugins/actions/inference/context.py +81 -0
- synapse_sdk/plugins/actions/inference/deployment.py +322 -0
- synapse_sdk/plugins/actions/inference/serve.py +252 -0
- synapse_sdk/plugins/actions/train/__init__.py +54 -0
- synapse_sdk/plugins/actions/train/action.py +326 -0
- synapse_sdk/plugins/actions/train/context.py +57 -0
- synapse_sdk/plugins/actions/upload/__init__.py +49 -0
- synapse_sdk/plugins/actions/upload/action.py +165 -0
- synapse_sdk/plugins/actions/upload/context.py +61 -0
- synapse_sdk/plugins/config.py +98 -0
- synapse_sdk/plugins/context/__init__.py +109 -0
- synapse_sdk/plugins/context/env.py +113 -0
- synapse_sdk/plugins/datasets/__init__.py +113 -0
- synapse_sdk/plugins/datasets/converters/__init__.py +76 -0
- synapse_sdk/plugins/datasets/converters/base.py +347 -0
- synapse_sdk/plugins/datasets/converters/yolo/__init__.py +9 -0
- synapse_sdk/plugins/datasets/converters/yolo/from_dm.py +468 -0
- synapse_sdk/plugins/datasets/converters/yolo/to_dm.py +381 -0
- synapse_sdk/plugins/datasets/formats/__init__.py +82 -0
- synapse_sdk/plugins/datasets/formats/dm.py +351 -0
- synapse_sdk/plugins/datasets/formats/yolo.py +240 -0
- synapse_sdk/plugins/decorators.py +83 -0
- synapse_sdk/plugins/discovery.py +790 -0
- synapse_sdk/plugins/docs/ACTION_DEV_GUIDE.md +933 -0
- synapse_sdk/plugins/docs/ARCHITECTURE.md +1225 -0
- synapse_sdk/plugins/docs/LOGGING_SYSTEM.md +683 -0
- synapse_sdk/plugins/docs/OVERVIEW.md +531 -0
- synapse_sdk/plugins/docs/PIPELINE_GUIDE.md +145 -0
- synapse_sdk/plugins/docs/README.md +513 -0
- synapse_sdk/plugins/docs/STEP.md +656 -0
- synapse_sdk/plugins/enums.py +70 -10
- synapse_sdk/plugins/errors.py +92 -0
- synapse_sdk/plugins/executors/__init__.py +43 -0
- synapse_sdk/plugins/executors/local.py +99 -0
- synapse_sdk/plugins/executors/ray/__init__.py +18 -0
- synapse_sdk/plugins/executors/ray/base.py +282 -0
- synapse_sdk/plugins/executors/ray/job.py +298 -0
- synapse_sdk/plugins/executors/ray/jobs_api.py +511 -0
- synapse_sdk/plugins/executors/ray/packaging.py +137 -0
- synapse_sdk/plugins/executors/ray/pipeline.py +792 -0
- synapse_sdk/plugins/executors/ray/task.py +257 -0
- synapse_sdk/plugins/models/__init__.py +26 -0
- synapse_sdk/plugins/models/logger.py +173 -0
- synapse_sdk/plugins/models/pipeline.py +25 -0
- synapse_sdk/plugins/pipelines/__init__.py +81 -0
- synapse_sdk/plugins/pipelines/action_pipeline.py +417 -0
- synapse_sdk/plugins/pipelines/context.py +107 -0
- synapse_sdk/plugins/pipelines/display.py +311 -0
- synapse_sdk/plugins/runner.py +114 -0
- synapse_sdk/plugins/schemas/__init__.py +19 -0
- synapse_sdk/plugins/schemas/results.py +152 -0
- synapse_sdk/plugins/steps/__init__.py +63 -0
- synapse_sdk/plugins/steps/base.py +128 -0
- synapse_sdk/plugins/steps/context.py +90 -0
- synapse_sdk/plugins/steps/orchestrator.py +128 -0
- synapse_sdk/plugins/steps/registry.py +103 -0
- synapse_sdk/plugins/steps/utils/__init__.py +20 -0
- synapse_sdk/plugins/steps/utils/logging.py +85 -0
- synapse_sdk/plugins/steps/utils/timing.py +71 -0
- synapse_sdk/plugins/steps/utils/validation.py +68 -0
- synapse_sdk/plugins/templates/__init__.py +50 -0
- synapse_sdk/plugins/templates/base/.gitignore.j2 +26 -0
- synapse_sdk/plugins/templates/base/.synapseignore.j2 +11 -0
- synapse_sdk/plugins/templates/base/README.md.j2 +26 -0
- synapse_sdk/plugins/templates/base/plugin/__init__.py.j2 +1 -0
- synapse_sdk/plugins/templates/base/pyproject.toml.j2 +14 -0
- synapse_sdk/plugins/templates/base/requirements.txt.j2 +1 -0
- synapse_sdk/plugins/templates/custom/plugin/main.py.j2 +18 -0
- synapse_sdk/plugins/templates/data_validation/plugin/validate.py.j2 +32 -0
- synapse_sdk/plugins/templates/export/plugin/export.py.j2 +36 -0
- synapse_sdk/plugins/templates/neural_net/plugin/inference.py.j2 +36 -0
- synapse_sdk/plugins/templates/neural_net/plugin/train.py.j2 +33 -0
- synapse_sdk/plugins/templates/post_annotation/plugin/post_annotate.py.j2 +32 -0
- synapse_sdk/plugins/templates/pre_annotation/plugin/pre_annotate.py.j2 +32 -0
- synapse_sdk/plugins/templates/smart_tool/plugin/auto_label.py.j2 +44 -0
- synapse_sdk/plugins/templates/upload/plugin/upload.py.j2 +35 -0
- synapse_sdk/plugins/testing/__init__.py +25 -0
- synapse_sdk/plugins/testing/sample_actions.py +98 -0
- synapse_sdk/plugins/types.py +206 -0
- synapse_sdk/plugins/upload.py +595 -64
- synapse_sdk/plugins/utils.py +325 -37
- synapse_sdk/shared/__init__.py +25 -0
- synapse_sdk/utils/__init__.py +1 -0
- synapse_sdk/utils/auth.py +74 -0
- synapse_sdk/utils/file/__init__.py +58 -0
- synapse_sdk/utils/file/archive.py +449 -0
- synapse_sdk/utils/file/checksum.py +167 -0
- synapse_sdk/utils/file/download.py +286 -0
- synapse_sdk/utils/file/io.py +129 -0
- synapse_sdk/utils/file/requirements.py +36 -0
- synapse_sdk/utils/network.py +168 -0
- synapse_sdk/utils/storage/__init__.py +238 -0
- synapse_sdk/utils/storage/config.py +188 -0
- synapse_sdk/utils/storage/errors.py +52 -0
- synapse_sdk/utils/storage/providers/__init__.py +13 -0
- synapse_sdk/utils/storage/providers/base.py +76 -0
- synapse_sdk/utils/storage/providers/gcs.py +168 -0
- synapse_sdk/utils/storage/providers/http.py +250 -0
- synapse_sdk/utils/storage/providers/local.py +126 -0
- synapse_sdk/utils/storage/providers/s3.py +177 -0
- synapse_sdk/utils/storage/providers/sftp.py +208 -0
- synapse_sdk/utils/storage/registry.py +125 -0
- synapse_sdk/utils/websocket.py +99 -0
- synapse_sdk-2026.1.1b2.dist-info/METADATA +715 -0
- synapse_sdk-2026.1.1b2.dist-info/RECORD +172 -0
- {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/WHEEL +1 -1
- synapse_sdk-2026.1.1b2.dist-info/licenses/LICENSE +201 -0
- locale/en/LC_MESSAGES/messages.mo +0 -0
- locale/en/LC_MESSAGES/messages.po +0 -39
- locale/ko/LC_MESSAGES/messages.mo +0 -0
- locale/ko/LC_MESSAGES/messages.po +0 -34
- synapse_sdk/cli/create_plugin.py +0 -10
- synapse_sdk/clients/agent/core.py +0 -7
- synapse_sdk/clients/agent/service.py +0 -15
- synapse_sdk/clients/backend/dataset.py +0 -51
- synapse_sdk/clients/ray/__init__.py +0 -6
- synapse_sdk/clients/ray/core.py +0 -22
- synapse_sdk/clients/ray/serve.py +0 -20
- synapse_sdk/i18n.py +0 -35
- synapse_sdk/plugins/categories/__init__.py +0 -0
- synapse_sdk/plugins/categories/base.py +0 -235
- synapse_sdk/plugins/categories/data_validation/__init__.py +0 -0
- synapse_sdk/plugins/categories/data_validation/actions/__init__.py +0 -0
- synapse_sdk/plugins/categories/data_validation/actions/validation.py +0 -10
- synapse_sdk/plugins/categories/data_validation/templates/config.yaml +0 -3
- synapse_sdk/plugins/categories/data_validation/templates/plugin/__init__.py +0 -0
- synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +0 -5
- synapse_sdk/plugins/categories/decorators.py +0 -13
- synapse_sdk/plugins/categories/export/__init__.py +0 -0
- synapse_sdk/plugins/categories/export/actions/__init__.py +0 -0
- synapse_sdk/plugins/categories/export/actions/export.py +0 -10
- synapse_sdk/plugins/categories/import/__init__.py +0 -0
- synapse_sdk/plugins/categories/import/actions/__init__.py +0 -0
- synapse_sdk/plugins/categories/import/actions/import.py +0 -10
- synapse_sdk/plugins/categories/neural_net/__init__.py +0 -0
- synapse_sdk/plugins/categories/neural_net/actions/__init__.py +0 -0
- synapse_sdk/plugins/categories/neural_net/actions/deployment.py +0 -45
- synapse_sdk/plugins/categories/neural_net/actions/inference.py +0 -18
- synapse_sdk/plugins/categories/neural_net/actions/test.py +0 -10
- synapse_sdk/plugins/categories/neural_net/actions/train.py +0 -143
- synapse_sdk/plugins/categories/neural_net/templates/config.yaml +0 -12
- synapse_sdk/plugins/categories/neural_net/templates/plugin/__init__.py +0 -0
- synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +0 -4
- synapse_sdk/plugins/categories/neural_net/templates/plugin/test.py +0 -2
- synapse_sdk/plugins/categories/neural_net/templates/plugin/train.py +0 -14
- synapse_sdk/plugins/categories/post_annotation/__init__.py +0 -0
- synapse_sdk/plugins/categories/post_annotation/actions/__init__.py +0 -0
- synapse_sdk/plugins/categories/post_annotation/actions/post_annotation.py +0 -10
- synapse_sdk/plugins/categories/post_annotation/templates/config.yaml +0 -3
- synapse_sdk/plugins/categories/post_annotation/templates/plugin/__init__.py +0 -0
- synapse_sdk/plugins/categories/post_annotation/templates/plugin/post_annotation.py +0 -3
- synapse_sdk/plugins/categories/pre_annotation/__init__.py +0 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +0 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation.py +0 -10
- synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +0 -3
- synapse_sdk/plugins/categories/pre_annotation/templates/plugin/__init__.py +0 -0
- synapse_sdk/plugins/categories/pre_annotation/templates/plugin/pre_annotation.py +0 -3
- synapse_sdk/plugins/categories/registry.py +0 -16
- synapse_sdk/plugins/categories/smart_tool/__init__.py +0 -0
- synapse_sdk/plugins/categories/smart_tool/actions/__init__.py +0 -0
- synapse_sdk/plugins/categories/smart_tool/actions/auto_label.py +0 -37
- synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +0 -7
- synapse_sdk/plugins/categories/smart_tool/templates/plugin/__init__.py +0 -0
- synapse_sdk/plugins/categories/smart_tool/templates/plugin/auto_label.py +0 -11
- synapse_sdk/plugins/categories/templates.py +0 -32
- synapse_sdk/plugins/cli/__init__.py +0 -21
- synapse_sdk/plugins/cli/publish.py +0 -37
- synapse_sdk/plugins/cli/run.py +0 -67
- synapse_sdk/plugins/exceptions.py +0 -22
- synapse_sdk/plugins/models.py +0 -121
- synapse_sdk/plugins/templates/cookiecutter.json +0 -11
- synapse_sdk/plugins/templates/hooks/post_gen_project.py +0 -3
- synapse_sdk/plugins/templates/hooks/pre_prompt.py +0 -21
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.gitignore +0 -27
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.pre-commit-config.yaml +0 -7
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/README.md +0 -5
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +0 -6
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/plugin/__init__.py +0 -0
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/pyproject.toml +0 -13
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +0 -1
- synapse_sdk/shared/enums.py +0 -8
- synapse_sdk/utils/debug.py +0 -5
- synapse_sdk/utils/file.py +0 -87
- synapse_sdk/utils/module_loading.py +0 -29
- synapse_sdk/utils/pydantic/__init__.py +0 -0
- synapse_sdk/utils/pydantic/config.py +0 -4
- synapse_sdk/utils/pydantic/errors.py +0 -33
- synapse_sdk/utils/pydantic/validators.py +0 -7
- synapse_sdk/utils/storage.py +0 -91
- synapse_sdk/utils/string.py +0 -11
- synapse_sdk-1.0.0a11.dist-info/LICENSE +0 -21
- synapse_sdk-1.0.0a11.dist-info/METADATA +0 -43
- synapse_sdk-1.0.0a11.dist-info/RECORD +0 -111
- {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0a11.dist-info → synapse_sdk-2026.1.1b2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""Dataset action with download and convert operations.
|
|
2
|
+
|
|
3
|
+
A single action class that handles both dataset download and format conversion,
|
|
4
|
+
selected via the operation parameter. Designed for pipeline composition.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import shutil
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
from enum import StrEnum
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
from synapse_sdk.plugins.action import BaseAction
|
|
19
|
+
from synapse_sdk.plugins.datasets.converters import DatasetFormat, get_converter
|
|
20
|
+
from synapse_sdk.plugins.datasets.formats.dm import DMVersion
|
|
21
|
+
from synapse_sdk.plugins.enums import PluginCategory
|
|
22
|
+
from synapse_sdk.plugins.types import YOLODataset
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from synapse_sdk.clients.backend import BackendClient
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DatasetOperation(StrEnum):
|
|
29
|
+
"""Dataset operation types."""
|
|
30
|
+
|
|
31
|
+
DOWNLOAD = 'download'
|
|
32
|
+
CONVERT = 'convert'
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DatasetParams(BaseModel):
|
|
36
|
+
"""Parameters for DatasetAction.
|
|
37
|
+
|
|
38
|
+
The operation field determines which operation to perform:
|
|
39
|
+
- download: Downloads dataset from backend
|
|
40
|
+
- convert: Converts dataset from one format to another
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
operation: Which operation to perform.
|
|
44
|
+
dataset_id: Data collection ID (for download).
|
|
45
|
+
splits: Split definitions for categorized download.
|
|
46
|
+
path: Source dataset path (for convert, or set by download).
|
|
47
|
+
source_format: Source format (for convert).
|
|
48
|
+
target_format: Target format (for convert).
|
|
49
|
+
dm_version: Datamaker version (for convert from DM).
|
|
50
|
+
output_dir: Output directory (optional for both).
|
|
51
|
+
is_categorized: Whether dataset has train/valid/test splits.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
operation: DatasetOperation = DatasetOperation.DOWNLOAD
|
|
55
|
+
|
|
56
|
+
# Download params
|
|
57
|
+
dataset_id: int | None = Field(default=None, description='Data collection ID')
|
|
58
|
+
splits: dict[str, dict[str, Any]] | None = Field(
|
|
59
|
+
default=None,
|
|
60
|
+
description='Split definitions: {"train": {...filters}, "valid": {...}}',
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Convert params
|
|
64
|
+
path: Path | str | None = Field(default=None, description='Dataset path')
|
|
65
|
+
source_format: str = Field(default='dm_v2', description='Source format')
|
|
66
|
+
target_format: str = Field(default='yolo', description='Target format')
|
|
67
|
+
dm_version: str = Field(default='v2', description='Datamaker version')
|
|
68
|
+
|
|
69
|
+
# Shared params
|
|
70
|
+
output_dir: Path | str | None = Field(default=None, description='Output directory')
|
|
71
|
+
is_categorized: bool = Field(default=False, description='Has splits')
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class DatasetResult(BaseModel):
|
|
75
|
+
"""Result from DatasetAction.
|
|
76
|
+
|
|
77
|
+
Contains paths and metadata about the processed dataset.
|
|
78
|
+
|
|
79
|
+
Attributes:
|
|
80
|
+
path: Path to dataset directory.
|
|
81
|
+
format: Dataset format (e.g., 'dm_v2', 'yolo').
|
|
82
|
+
is_categorized: Whether dataset has splits.
|
|
83
|
+
config_path: Path to config file (e.g., dataset.yaml for YOLO).
|
|
84
|
+
count: Number of items processed.
|
|
85
|
+
source_path: Original source path (for convert).
|
|
86
|
+
data_path: Computed property returning config_path if set, otherwise path.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
path: Path
|
|
90
|
+
format: str
|
|
91
|
+
is_categorized: bool = False
|
|
92
|
+
config_path: Path | None = None
|
|
93
|
+
count: int | None = None
|
|
94
|
+
source_path: Path | None = None
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def data_path(self) -> Path:
|
|
98
|
+
"""Returns config_path if set, otherwise path.
|
|
99
|
+
|
|
100
|
+
Use this for downstream actions that need a single path
|
|
101
|
+
to the dataset (e.g., training with YOLO format).
|
|
102
|
+
"""
|
|
103
|
+
return self.config_path if self.config_path is not None else self.path
|
|
104
|
+
|
|
105
|
+
class Config:
|
|
106
|
+
arbitrary_types_allowed = True
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class DatasetAction(BaseAction[DatasetParams]):
|
|
110
|
+
"""Dataset action with download and convert operations.
|
|
111
|
+
|
|
112
|
+
A unified action for dataset operations that can be used in pipelines.
|
|
113
|
+
The operation is determined by the params.operation field.
|
|
114
|
+
|
|
115
|
+
Type declarations:
|
|
116
|
+
- input_type: None (accepts initial params)
|
|
117
|
+
- output_type: Dynamic based on operation and target_format
|
|
118
|
+
- download: 'dm_dataset'
|
|
119
|
+
- convert to yolo: 'yolo_dataset'
|
|
120
|
+
- convert to coco: 'coco_dataset'
|
|
121
|
+
|
|
122
|
+
For download:
|
|
123
|
+
- Requires: dataset_id
|
|
124
|
+
- Optional: splits, output_dir
|
|
125
|
+
- Returns: path, format='dm_v2', is_categorized, count
|
|
126
|
+
|
|
127
|
+
For convert:
|
|
128
|
+
- Requires: path, target_format
|
|
129
|
+
- Optional: source_format, dm_version, output_dir
|
|
130
|
+
- Returns: path, format, config_path, source_path
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
>>> # Standalone usage
|
|
134
|
+
>>> action = DatasetAction(
|
|
135
|
+
... DatasetParams(operation='download', dataset_id=123),
|
|
136
|
+
... ctx,
|
|
137
|
+
... )
|
|
138
|
+
>>> result = action.execute()
|
|
139
|
+
>>>
|
|
140
|
+
>>> # Pipeline usage
|
|
141
|
+
>>> pipeline = ActionPipeline([DatasetAction, DatasetAction, TrainAction])
|
|
142
|
+
>>> result = pipeline.execute({
|
|
143
|
+
... 'operation': 'download',
|
|
144
|
+
... 'dataset_id': 123,
|
|
145
|
+
... 'target_format': 'yolo', # Used by second DatasetAction
|
|
146
|
+
... }, ctx)
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
category = PluginCategory.NEURAL_NET
|
|
150
|
+
|
|
151
|
+
# Input type is flexible (accepts various initial params)
|
|
152
|
+
input_type = None
|
|
153
|
+
# Output type: use YOLODataset for convert (most common), DMv2Dataset for download
|
|
154
|
+
# For precise typing, use separate DownloadAction/ConvertAction classes
|
|
155
|
+
output_type = YOLODataset # Default assumes convert to YOLO
|
|
156
|
+
|
|
157
|
+
result_model = DatasetResult
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def client(self) -> BackendClient:
|
|
161
|
+
"""Backend client from context."""
|
|
162
|
+
if self.ctx.client is None:
|
|
163
|
+
raise RuntimeError('No backend client in context')
|
|
164
|
+
return self.ctx.client
|
|
165
|
+
|
|
166
|
+
def execute(self) -> DatasetResult:
|
|
167
|
+
"""Execute the dataset operation based on params.operation."""
|
|
168
|
+
if self.params.operation == DatasetOperation.DOWNLOAD:
|
|
169
|
+
return self.download()
|
|
170
|
+
elif self.params.operation == DatasetOperation.CONVERT:
|
|
171
|
+
return self.convert()
|
|
172
|
+
else:
|
|
173
|
+
raise ValueError(f'Unknown operation: {self.params.operation}')
|
|
174
|
+
|
|
175
|
+
def download(self) -> DatasetResult:
|
|
176
|
+
"""Download dataset from backend.
|
|
177
|
+
|
|
178
|
+
Downloads data units from a data collection and saves them
|
|
179
|
+
locally in Datamaker format (json/ + original_files/).
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
DatasetResult with path, format, count.
|
|
183
|
+
|
|
184
|
+
Raises:
|
|
185
|
+
ValueError: If dataset_id not provided.
|
|
186
|
+
"""
|
|
187
|
+
from synapse_sdk.utils.file import get_temp_path
|
|
188
|
+
|
|
189
|
+
if self.params.dataset_id is None:
|
|
190
|
+
raise ValueError('dataset_id is required for download operation')
|
|
191
|
+
|
|
192
|
+
dataset_id = self.params.dataset_id
|
|
193
|
+
splits = self.params.splits
|
|
194
|
+
is_categorized = splits is not None and len(splits) > 0
|
|
195
|
+
|
|
196
|
+
# Determine output directory
|
|
197
|
+
output_dir = Path(self.params.output_dir) if self.params.output_dir else get_temp_path(f'datasets/{dataset_id}')
|
|
198
|
+
output_dir = Path(output_dir)
|
|
199
|
+
|
|
200
|
+
# Report initial progress
|
|
201
|
+
self.set_progress(0, 100, 'init')
|
|
202
|
+
|
|
203
|
+
# Get collection info
|
|
204
|
+
collection = self.client.get_data_collection(dataset_id)
|
|
205
|
+
self.log(
|
|
206
|
+
'download_start',
|
|
207
|
+
{
|
|
208
|
+
'dataset_id': dataset_id,
|
|
209
|
+
'collection_name': collection.get('name'),
|
|
210
|
+
'is_categorized': is_categorized,
|
|
211
|
+
},
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Report collection fetched
|
|
215
|
+
self.set_progress(1, 100, 'init')
|
|
216
|
+
|
|
217
|
+
total_downloaded = 0
|
|
218
|
+
|
|
219
|
+
if is_categorized and splits:
|
|
220
|
+
# Download each split separately
|
|
221
|
+
for split_name, filters in splits.items():
|
|
222
|
+
split_dir = output_dir / split_name
|
|
223
|
+
count = self._download_split(
|
|
224
|
+
dataset_id=dataset_id,
|
|
225
|
+
output_dir=split_dir,
|
|
226
|
+
filters=filters or {},
|
|
227
|
+
)
|
|
228
|
+
total_downloaded += count
|
|
229
|
+
self.log(
|
|
230
|
+
'split_downloaded',
|
|
231
|
+
{
|
|
232
|
+
'split': split_name,
|
|
233
|
+
'count': count,
|
|
234
|
+
},
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
# Download all data units
|
|
238
|
+
total_downloaded = self._download_split(
|
|
239
|
+
dataset_id=dataset_id,
|
|
240
|
+
output_dir=output_dir,
|
|
241
|
+
filters={},
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
self.log(
|
|
245
|
+
'download_complete',
|
|
246
|
+
{
|
|
247
|
+
'path': str(output_dir),
|
|
248
|
+
'total_units': total_downloaded,
|
|
249
|
+
},
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return DatasetResult(
|
|
253
|
+
path=output_dir,
|
|
254
|
+
format='dm_v2',
|
|
255
|
+
is_categorized=is_categorized,
|
|
256
|
+
count=total_downloaded,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
def _download_split(
|
|
260
|
+
self,
|
|
261
|
+
dataset_id: int,
|
|
262
|
+
output_dir: Path,
|
|
263
|
+
filters: dict[str, Any],
|
|
264
|
+
max_workers: int = 10,
|
|
265
|
+
) -> int:
|
|
266
|
+
"""Download a single split of the dataset."""
|
|
267
|
+
# Create output directories
|
|
268
|
+
json_dir = output_dir / 'json'
|
|
269
|
+
files_dir = output_dir / 'original_files'
|
|
270
|
+
json_dir.mkdir(parents=True, exist_ok=True)
|
|
271
|
+
files_dir.mkdir(parents=True, exist_ok=True)
|
|
272
|
+
|
|
273
|
+
# Report fetching data units
|
|
274
|
+
self.set_progress(2, 100, 'fetch')
|
|
275
|
+
|
|
276
|
+
# List data units
|
|
277
|
+
params = {'data_collection': dataset_id, **filters}
|
|
278
|
+
data_units_gen, total_count = self.client.list_data_units(
|
|
279
|
+
params=params,
|
|
280
|
+
list_all=True,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Report data units fetched
|
|
284
|
+
self.set_progress(5, 100, 'fetch')
|
|
285
|
+
self.log('data_units_listed', {'total_count': total_count})
|
|
286
|
+
|
|
287
|
+
downloaded = 0
|
|
288
|
+
|
|
289
|
+
def download_unit(unit: dict) -> bool:
|
|
290
|
+
"""Download a single data unit."""
|
|
291
|
+
try:
|
|
292
|
+
unit_id = unit.get('id') or unit.get('data_unit_id')
|
|
293
|
+
files = unit.get('files', {})
|
|
294
|
+
|
|
295
|
+
# Build DM v2 JSON structure
|
|
296
|
+
dm_json = self._build_dm_json(unit)
|
|
297
|
+
|
|
298
|
+
# Determine base name from first file or unit ID
|
|
299
|
+
base_name = None
|
|
300
|
+
for file_info in files.values():
|
|
301
|
+
if isinstance(file_info, dict):
|
|
302
|
+
file_path = file_info.get('path') or file_info.get('url', '')
|
|
303
|
+
else:
|
|
304
|
+
file_path = str(file_info)
|
|
305
|
+
if file_path:
|
|
306
|
+
base_name = Path(file_path).stem
|
|
307
|
+
break
|
|
308
|
+
|
|
309
|
+
if not base_name:
|
|
310
|
+
base_name = str(unit_id)
|
|
311
|
+
|
|
312
|
+
# Save JSON
|
|
313
|
+
json_path = json_dir / f'{base_name}.json'
|
|
314
|
+
json_path.write_text(json.dumps(dm_json, indent=2, ensure_ascii=False))
|
|
315
|
+
|
|
316
|
+
# Copy/download files
|
|
317
|
+
for file_name, file_info in files.items():
|
|
318
|
+
if isinstance(file_info, dict):
|
|
319
|
+
file_path = file_info.get('path')
|
|
320
|
+
else:
|
|
321
|
+
file_path = str(file_info)
|
|
322
|
+
|
|
323
|
+
if file_path and Path(file_path).exists():
|
|
324
|
+
dest = files_dir / Path(file_path).name
|
|
325
|
+
if not dest.exists():
|
|
326
|
+
shutil.copy(file_path, dest)
|
|
327
|
+
|
|
328
|
+
return True
|
|
329
|
+
except Exception as e:
|
|
330
|
+
self.log('download_unit_error', {'unit_id': unit_id, 'error': str(e)})
|
|
331
|
+
return False
|
|
332
|
+
|
|
333
|
+
# Process units with thread pool
|
|
334
|
+
# Note: data_units_gen is a lazy generator that fetches pages from API
|
|
335
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
336
|
+
futures = []
|
|
337
|
+
units_fetched = 0
|
|
338
|
+
|
|
339
|
+
# Submit downloads as we iterate (reports fetch progress)
|
|
340
|
+
for unit in data_units_gen:
|
|
341
|
+
futures.append(executor.submit(download_unit, unit))
|
|
342
|
+
units_fetched += 1
|
|
343
|
+
# Report fetch progress (5-50% range)
|
|
344
|
+
fetch_progress = 5 + (units_fetched / total_count) * 45
|
|
345
|
+
if units_fetched % 10 == 0 or units_fetched == total_count:
|
|
346
|
+
self.set_progress(int(fetch_progress), 100, 'fetch')
|
|
347
|
+
|
|
348
|
+
# Process completed downloads (50-100% range)
|
|
349
|
+
for i, future in enumerate(as_completed(futures)):
|
|
350
|
+
if future.result():
|
|
351
|
+
downloaded += 1
|
|
352
|
+
# Report download progress (50-100% range)
|
|
353
|
+
download_progress = 50 + ((i + 1) / total_count) * 50
|
|
354
|
+
self.set_progress(int(download_progress), 100, 'download')
|
|
355
|
+
|
|
356
|
+
return downloaded
|
|
357
|
+
|
|
358
|
+
def _build_dm_json(self, unit: dict) -> dict[str, Any]:
|
|
359
|
+
"""Build Datamaker v2 JSON from a data unit."""
|
|
360
|
+
annotations = unit.get('annotations', {})
|
|
361
|
+
|
|
362
|
+
dm_image: dict[str, list] = {
|
|
363
|
+
'bounding_box': [],
|
|
364
|
+
'polygon': [],
|
|
365
|
+
'polyline': [],
|
|
366
|
+
'keypoint': [],
|
|
367
|
+
'relation': [],
|
|
368
|
+
'group': [],
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if isinstance(annotations, dict):
|
|
372
|
+
for key in dm_image.keys():
|
|
373
|
+
if key in annotations:
|
|
374
|
+
dm_image[key] = annotations[key]
|
|
375
|
+
|
|
376
|
+
# Build classification map from annotations
|
|
377
|
+
classifications: dict[str, set[str]] = {}
|
|
378
|
+
for ann_type, anns in dm_image.items():
|
|
379
|
+
if anns:
|
|
380
|
+
classifications[ann_type] = set()
|
|
381
|
+
for ann in anns:
|
|
382
|
+
if 'classification' in ann:
|
|
383
|
+
classifications[ann_type].add(ann['classification'])
|
|
384
|
+
|
|
385
|
+
return {
|
|
386
|
+
'classification': {k: sorted(v) for k, v in classifications.items() if v},
|
|
387
|
+
'images': [dm_image],
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
def convert(self) -> DatasetResult:
|
|
391
|
+
"""Convert dataset from one format to another.
|
|
392
|
+
|
|
393
|
+
Converts the dataset at params.path to params.target_format.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
DatasetResult with converted path, format, config_path.
|
|
397
|
+
|
|
398
|
+
Raises:
|
|
399
|
+
ValueError: If path not provided.
|
|
400
|
+
"""
|
|
401
|
+
if self.params.path is None:
|
|
402
|
+
raise ValueError('path is required for convert operation')
|
|
403
|
+
|
|
404
|
+
source_path = Path(self.params.path)
|
|
405
|
+
if not source_path.exists():
|
|
406
|
+
raise FileNotFoundError(f'Dataset path does not exist: {source_path}')
|
|
407
|
+
|
|
408
|
+
# Parse formats
|
|
409
|
+
target_format = DatasetFormat(self.params.target_format)
|
|
410
|
+
dm_version = DMVersion.V1 if self.params.dm_version == 'v1' else DMVersion.V2
|
|
411
|
+
|
|
412
|
+
# Determine source format
|
|
413
|
+
source_format_str = self.params.source_format
|
|
414
|
+
if source_format_str in ('dm_v1', 'dm_v2', 'dm'):
|
|
415
|
+
src_format = DatasetFormat.DM_V1 if dm_version == DMVersion.V1 else DatasetFormat.DM_V2
|
|
416
|
+
else:
|
|
417
|
+
src_format = DatasetFormat(source_format_str)
|
|
418
|
+
|
|
419
|
+
# Determine output directory
|
|
420
|
+
if self.params.output_dir:
|
|
421
|
+
output_dir = Path(self.params.output_dir)
|
|
422
|
+
else:
|
|
423
|
+
output_dir = source_path.parent / f'{source_path.name}_{target_format.value}'
|
|
424
|
+
|
|
425
|
+
self.log(
|
|
426
|
+
'convert_start',
|
|
427
|
+
{
|
|
428
|
+
'source_path': str(source_path),
|
|
429
|
+
'source_format': src_format.value,
|
|
430
|
+
'target_format': target_format.value,
|
|
431
|
+
'is_categorized': self.params.is_categorized,
|
|
432
|
+
},
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# Get converter and run conversion
|
|
436
|
+
converter = get_converter(
|
|
437
|
+
source=src_format,
|
|
438
|
+
target=target_format,
|
|
439
|
+
root_dir=source_path,
|
|
440
|
+
is_categorized=self.params.is_categorized,
|
|
441
|
+
dm_version=dm_version,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
converter.convert()
|
|
445
|
+
converter.save_to_folder(output_dir)
|
|
446
|
+
|
|
447
|
+
# Determine config path
|
|
448
|
+
config_path = None
|
|
449
|
+
if target_format == DatasetFormat.YOLO:
|
|
450
|
+
config_path = output_dir / 'dataset.yaml'
|
|
451
|
+
if not config_path.exists():
|
|
452
|
+
config_path = None
|
|
453
|
+
|
|
454
|
+
self.log(
|
|
455
|
+
'convert_complete',
|
|
456
|
+
{
|
|
457
|
+
'output_path': str(output_dir),
|
|
458
|
+
'config_path': str(config_path) if config_path else None,
|
|
459
|
+
},
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
return DatasetResult(
|
|
463
|
+
path=output_dir,
|
|
464
|
+
format=target_format.value,
|
|
465
|
+
is_categorized=self.params.is_categorized,
|
|
466
|
+
config_path=config_path,
|
|
467
|
+
source_path=source_path,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
__all__ = ['DatasetAction', 'DatasetOperation', 'DatasetParams', 'DatasetResult']
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Export action module with optional workflow step support.
|
|
2
|
+
|
|
3
|
+
Provides the export action base class:
|
|
4
|
+
- BaseExportAction: Base class for export workflows
|
|
5
|
+
- ExportContext: Export-specific context extending BaseStepContext
|
|
6
|
+
- ExportProgressCategories: Standard progress category names
|
|
7
|
+
|
|
8
|
+
For step infrastructure (BaseStep, StepRegistry, Orchestrator),
|
|
9
|
+
use the steps module:
|
|
10
|
+
from synapse_sdk.plugins.steps import BaseStep, StepRegistry
|
|
11
|
+
|
|
12
|
+
Example (simple execute):
|
|
13
|
+
>>> class MyExportAction(BaseExportAction[MyParams]):
|
|
14
|
+
... def get_filtered_results(self, filters: dict) -> tuple[Any, int]:
|
|
15
|
+
... return self.client.get_assignments(filters)
|
|
16
|
+
...
|
|
17
|
+
... def execute(self) -> dict[str, Any]:
|
|
18
|
+
... results, count = self.get_filtered_results(self.params.filter)
|
|
19
|
+
... # ... export items ...
|
|
20
|
+
... return {'exported': count}
|
|
21
|
+
|
|
22
|
+
Example (step-based):
|
|
23
|
+
>>> from synapse_sdk.plugins.steps import BaseStep, StepResult
|
|
24
|
+
>>>
|
|
25
|
+
>>> class FetchResultsStep(BaseStep[ExportContext]):
|
|
26
|
+
... @property
|
|
27
|
+
... def name(self) -> str:
|
|
28
|
+
... return 'fetch_results'
|
|
29
|
+
...
|
|
30
|
+
... @property
|
|
31
|
+
... def progress_weight(self) -> float:
|
|
32
|
+
... return 0.2
|
|
33
|
+
...
|
|
34
|
+
... def execute(self, context: ExportContext) -> StepResult:
|
|
35
|
+
... context.results, context.total_count = fetch_data(context.params)
|
|
36
|
+
... return StepResult(success=True)
|
|
37
|
+
>>>
|
|
38
|
+
>>> class MyExportAction(BaseExportAction[MyParams]):
|
|
39
|
+
... def setup_steps(self, registry) -> None:
|
|
40
|
+
... registry.register(FetchResultsStep())
|
|
41
|
+
... registry.register(ProcessStep())
|
|
42
|
+
... registry.register(FinalizeStep())
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
from synapse_sdk.plugins.actions.export.action import (
|
|
46
|
+
BaseExportAction,
|
|
47
|
+
ExportProgressCategories,
|
|
48
|
+
)
|
|
49
|
+
from synapse_sdk.plugins.actions.export.context import ExportContext
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
'BaseExportAction',
|
|
53
|
+
'ExportContext',
|
|
54
|
+
'ExportProgressCategories',
|
|
55
|
+
]
|