synapse-sdk 1.0.0a35__py3-none-any.whl → 2025.11.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synapse-sdk might be problematic. Click here for more details.
- synapse_sdk/__init__.py +24 -0
- synapse_sdk/cli/__init__.py +308 -5
- synapse_sdk/cli/alias/utils.py +1 -1
- synapse_sdk/cli/code_server.py +687 -0
- synapse_sdk/cli/config.py +440 -0
- synapse_sdk/cli/devtools.py +90 -0
- synapse_sdk/cli/plugin/publish.py +23 -15
- synapse_sdk/clients/agent/__init__.py +9 -3
- synapse_sdk/clients/agent/container.py +133 -0
- synapse_sdk/clients/agent/core.py +19 -0
- synapse_sdk/clients/agent/ray.py +298 -9
- synapse_sdk/clients/backend/__init__.py +28 -12
- synapse_sdk/clients/backend/annotation.py +9 -1
- synapse_sdk/clients/backend/core.py +31 -4
- synapse_sdk/clients/backend/data_collection.py +186 -0
- synapse_sdk/clients/backend/hitl.py +1 -1
- synapse_sdk/clients/backend/integration.py +4 -3
- synapse_sdk/clients/backend/ml.py +1 -1
- synapse_sdk/clients/backend/models.py +35 -1
- synapse_sdk/clients/base.py +309 -36
- synapse_sdk/clients/ray/serve.py +2 -0
- synapse_sdk/devtools/__init__.py +0 -0
- synapse_sdk/devtools/config.py +94 -0
- synapse_sdk/devtools/docs/.gitignore +20 -0
- synapse_sdk/devtools/docs/README.md +41 -0
- synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +12 -0
- synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +44 -0
- synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
- synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
- synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +29 -0
- synapse_sdk/devtools/docs/blog/authors.yml +25 -0
- synapse_sdk/devtools/docs/blog/tags.yml +19 -0
- synapse_sdk/devtools/docs/docs/api/clients/agent.md +43 -0
- synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
- synapse_sdk/devtools/docs/docs/api/clients/backend.md +420 -0
- synapse_sdk/devtools/docs/docs/api/clients/base.md +257 -0
- synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
- synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
- synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
- synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
- synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
- synapse_sdk/devtools/docs/docs/api/clients/ray.md +342 -0
- synapse_sdk/devtools/docs/docs/api/index.md +52 -0
- synapse_sdk/devtools/docs/docs/api/plugins/categories.md +43 -0
- synapse_sdk/devtools/docs/docs/api/plugins/models.md +114 -0
- synapse_sdk/devtools/docs/docs/api/plugins/utils.md +328 -0
- synapse_sdk/devtools/docs/docs/categories.md +0 -0
- synapse_sdk/devtools/docs/docs/cli-usage.md +280 -0
- synapse_sdk/devtools/docs/docs/concepts/index.md +38 -0
- synapse_sdk/devtools/docs/docs/configuration.md +83 -0
- synapse_sdk/devtools/docs/docs/contributing.md +306 -0
- synapse_sdk/devtools/docs/docs/examples/index.md +29 -0
- synapse_sdk/devtools/docs/docs/faq.md +179 -0
- synapse_sdk/devtools/docs/docs/features/converters/index.md +455 -0
- synapse_sdk/devtools/docs/docs/features/index.md +24 -0
- synapse_sdk/devtools/docs/docs/features/utils/file.md +415 -0
- synapse_sdk/devtools/docs/docs/features/utils/network.md +378 -0
- synapse_sdk/devtools/docs/docs/features/utils/storage.md +57 -0
- synapse_sdk/devtools/docs/docs/features/utils/types.md +51 -0
- synapse_sdk/devtools/docs/docs/installation.md +94 -0
- synapse_sdk/devtools/docs/docs/introduction.md +47 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/neural-net-plugins/train-action-overview.md +814 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +948 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +544 -0
- synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +766 -0
- synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +1092 -0
- synapse_sdk/devtools/docs/docs/plugins/plugins.md +852 -0
- synapse_sdk/devtools/docs/docs/quickstart.md +78 -0
- synapse_sdk/devtools/docs/docs/troubleshooting.md +519 -0
- synapse_sdk/devtools/docs/docs/tutorial-basics/_category_.json +8 -0
- synapse_sdk/devtools/docs/docs/tutorial-basics/congratulations.md +23 -0
- synapse_sdk/devtools/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
- synapse_sdk/devtools/docs/docs/tutorial-basics/create-a-document.md +57 -0
- synapse_sdk/devtools/docs/docs/tutorial-basics/create-a-page.md +43 -0
- synapse_sdk/devtools/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
- synapse_sdk/devtools/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
- synapse_sdk/devtools/docs/docs/tutorial-extras/_category_.json +7 -0
- synapse_sdk/devtools/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
- synapse_sdk/devtools/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
- synapse_sdk/devtools/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
- synapse_sdk/devtools/docs/docs/tutorial-extras/translate-your-site.md +88 -0
- synapse_sdk/devtools/docs/docusaurus.config.ts +148 -0
- synapse_sdk/devtools/docs/i18n/ko/code.json +325 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/agent.md +43 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +420 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/base.md +257 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ray.md +342 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/index.md +52 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/plugins/models.md +114 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/categories.md +0 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/cli-usage.md +280 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/concepts/index.md +38 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/configuration.md +83 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/contributing.md +306 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/examples/index.md +29 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/faq.md +179 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/converters/index.md +30 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/index.md +24 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/file.md +415 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/network.md +378 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/storage.md +60 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/types.md +51 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/installation.md +94 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/introduction.md +47 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/neural-net-plugins/train-action-overview.md +815 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +948 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +544 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +766 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +1092 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/plugins.md +117 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/quickstart.md +78 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/troubleshooting.md +519 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +34 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-theme-classic/footer.json +42 -0
- synapse_sdk/devtools/docs/i18n/ko/docusaurus-theme-classic/navbar.json +18 -0
- synapse_sdk/devtools/docs/package-lock.json +18784 -0
- synapse_sdk/devtools/docs/package.json +48 -0
- synapse_sdk/devtools/docs/sidebars.ts +122 -0
- synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +71 -0
- synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +11 -0
- synapse_sdk/devtools/docs/src/css/custom.css +30 -0
- synapse_sdk/devtools/docs/src/pages/index.module.css +23 -0
- synapse_sdk/devtools/docs/src/pages/index.tsx +21 -0
- synapse_sdk/devtools/docs/src/pages/markdown-page.md +7 -0
- synapse_sdk/devtools/docs/static/.nojekyll +0 -0
- synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
- synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
- synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
- synapse_sdk/devtools/docs/static/img/logo.png +0 -0
- synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
- synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +170 -0
- synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +40 -0
- synapse_sdk/devtools/docs/tsconfig.json +8 -0
- synapse_sdk/devtools/server.py +41 -0
- synapse_sdk/devtools/streamlit_app/__init__.py +5 -0
- synapse_sdk/devtools/streamlit_app/app.py +128 -0
- synapse_sdk/devtools/streamlit_app/services/__init__.py +11 -0
- synapse_sdk/devtools/streamlit_app/services/job_service.py +233 -0
- synapse_sdk/devtools/streamlit_app/services/plugin_service.py +236 -0
- synapse_sdk/devtools/streamlit_app/services/serve_service.py +95 -0
- synapse_sdk/devtools/streamlit_app/ui/__init__.py +15 -0
- synapse_sdk/devtools/streamlit_app/ui/config_tab.py +76 -0
- synapse_sdk/devtools/streamlit_app/ui/deployment_tab.py +66 -0
- synapse_sdk/devtools/streamlit_app/ui/http_tab.py +125 -0
- synapse_sdk/devtools/streamlit_app/ui/jobs_tab.py +573 -0
- synapse_sdk/devtools/streamlit_app/ui/serve_tab.py +346 -0
- synapse_sdk/devtools/streamlit_app/ui/status_bar.py +118 -0
- synapse_sdk/devtools/streamlit_app/utils/__init__.py +40 -0
- synapse_sdk/devtools/streamlit_app/utils/json_viewer.py +197 -0
- synapse_sdk/devtools/streamlit_app/utils/log_formatter.py +38 -0
- synapse_sdk/devtools/streamlit_app/utils/styles.py +241 -0
- synapse_sdk/devtools/streamlit_app/utils/ui_components.py +289 -0
- synapse_sdk/devtools/streamlit_app.py +10 -0
- synapse_sdk/loggers.py +65 -7
- synapse_sdk/plugins/README.md +1340 -0
- synapse_sdk/plugins/categories/base.py +73 -11
- synapse_sdk/plugins/categories/data_validation/actions/validation.py +72 -0
- synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +33 -5
- synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
- synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
- synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
- synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
- synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
- synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
- synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
- synapse_sdk/plugins/categories/export/actions/{export.py → export/utils.py} +47 -82
- synapse_sdk/plugins/categories/export/templates/config.yaml +19 -1
- synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
- synapse_sdk/plugins/categories/export/templates/plugin/export.py +153 -129
- synapse_sdk/plugins/categories/neural_net/actions/deployment.py +9 -62
- synapse_sdk/plugins/categories/neural_net/actions/train.py +1062 -32
- synapse_sdk/plugins/categories/neural_net/actions/tune.py +534 -0
- synapse_sdk/plugins/categories/neural_net/templates/config.yaml +27 -5
- synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +26 -10
- synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/action.py +10 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +287 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
- synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +966 -0
- synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +19 -0
- synapse_sdk/plugins/categories/pre_annotation/templates/plugin/to_task.py +40 -0
- synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
- synapse_sdk/plugins/categories/upload/actions/upload/action.py +232 -0
- synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
- synapse_sdk/plugins/categories/upload/actions/upload/enums.py +471 -0
- synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
- synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
- synapse_sdk/plugins/categories/upload/actions/upload/models.py +203 -0
- synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
- synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
- synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +84 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +203 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +97 -0
- synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +258 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +281 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
- synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
- synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
- synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
- synapse_sdk/plugins/categories/upload/templates/config.yaml +29 -2
- synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +294 -0
- synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +88 -30
- synapse_sdk/plugins/models.py +122 -16
- synapse_sdk/plugins/templates/plugin-config-schema.json +406 -0
- synapse_sdk/plugins/templates/schema.json +491 -0
- synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +1 -1
- synapse_sdk/plugins/utils/__init__.py +46 -0
- synapse_sdk/plugins/utils/actions.py +119 -0
- synapse_sdk/plugins/utils/config.py +203 -0
- synapse_sdk/plugins/{utils.py → utils/legacy.py} +26 -46
- synapse_sdk/plugins/utils/ray_gcs.py +66 -0
- synapse_sdk/plugins/utils/registry.py +58 -0
- synapse_sdk/shared/__init__.py +25 -0
- synapse_sdk/shared/enums.py +93 -0
- synapse_sdk/utils/converters/__init__.py +240 -0
- synapse_sdk/utils/converters/coco/__init__.py +0 -0
- synapse_sdk/utils/converters/coco/from_dm.py +322 -0
- synapse_sdk/utils/converters/coco/to_dm.py +215 -0
- synapse_sdk/utils/converters/dm/__init__.py +56 -0
- synapse_sdk/utils/converters/dm/from_v1.py +627 -0
- synapse_sdk/utils/converters/dm/to_v1.py +367 -0
- synapse_sdk/utils/converters/pascal/__init__.py +0 -0
- synapse_sdk/utils/converters/pascal/from_dm.py +244 -0
- synapse_sdk/utils/converters/pascal/to_dm.py +214 -0
- synapse_sdk/utils/converters/yolo/__init__.py +0 -0
- synapse_sdk/utils/converters/yolo/from_dm.py +384 -0
- synapse_sdk/utils/converters/yolo/to_dm.py +267 -0
- synapse_sdk/utils/dataset.py +46 -0
- synapse_sdk/utils/encryption.py +158 -0
- synapse_sdk/utils/file/__init__.py +39 -0
- synapse_sdk/utils/file/archive.py +32 -0
- synapse_sdk/utils/file/checksum.py +56 -0
- synapse_sdk/utils/file/chunking.py +31 -0
- synapse_sdk/utils/file/download.py +385 -0
- synapse_sdk/utils/file/encoding.py +40 -0
- synapse_sdk/utils/file/io.py +22 -0
- synapse_sdk/utils/file/video/__init__.py +29 -0
- synapse_sdk/utils/file/video/transcode.py +307 -0
- synapse_sdk/utils/{file.py → file.py.backup} +84 -2
- synapse_sdk/utils/http.py +138 -0
- synapse_sdk/utils/network.py +293 -0
- synapse_sdk/utils/storage/__init__.py +36 -2
- synapse_sdk/utils/storage/providers/__init__.py +141 -0
- synapse_sdk/utils/storage/providers/file_system.py +134 -0
- synapse_sdk/utils/storage/providers/http.py +190 -0
- synapse_sdk/utils/storage/providers/s3.py +54 -6
- synapse_sdk/utils/storage/providers/sftp.py +31 -0
- synapse_sdk/utils/storage/registry.py +6 -0
- synapse_sdk-2025.11.7.dist-info/METADATA +122 -0
- synapse_sdk-2025.11.7.dist-info/RECORD +386 -0
- {synapse_sdk-1.0.0a35.dist-info → synapse_sdk-2025.11.7.dist-info}/WHEEL +1 -1
- synapse_sdk/clients/backend/dataset.py +0 -102
- synapse_sdk/plugins/categories/upload/actions/upload.py +0 -293
- synapse_sdk-1.0.0a35.dist-info/METADATA +0 -47
- synapse_sdk-1.0.0a35.dist-info/RECORD +0 -137
- {synapse_sdk-1.0.0a35.dist-info → synapse_sdk-2025.11.7.dist-info}/entry_points.txt +0 -0
- {synapse_sdk-1.0.0a35.dist-info → synapse_sdk-2025.11.7.dist-info}/licenses/LICENSE +0 -0
- {synapse_sdk-1.0.0a35.dist-info → synapse_sdk-2025.11.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
from ..base import FileDiscoveryStrategy
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FlatFileDiscoveryStrategy(FileDiscoveryStrategy):
|
|
9
|
+
"""Non-recursive file discovery strategy."""
|
|
10
|
+
|
|
11
|
+
def discover(self, path: Path, recursive: bool) -> List[Path]:
|
|
12
|
+
"""Discover files non-recursively in the given path."""
|
|
13
|
+
# Exclude system files
|
|
14
|
+
excluded_files = {'.DS_Store', 'Thumbs.db', 'desktop.ini'}
|
|
15
|
+
return [
|
|
16
|
+
file_path for file_path in path.glob('*') if file_path.is_file() and file_path.name not in excluded_files
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
|
|
20
|
+
"""Organize files according to specifications with metadata."""
|
|
21
|
+
organized_files = []
|
|
22
|
+
|
|
23
|
+
# Use provided type_dirs or create fallback mapping
|
|
24
|
+
if type_dirs is None:
|
|
25
|
+
type_dirs = {}
|
|
26
|
+
for spec in specs:
|
|
27
|
+
spec_name = spec['name']
|
|
28
|
+
# Fallback: extract spec directory from file paths
|
|
29
|
+
for file_path in files:
|
|
30
|
+
# Check if this file's path contains the spec_name as a directory
|
|
31
|
+
path_parts = file_path.parts
|
|
32
|
+
if spec_name in path_parts:
|
|
33
|
+
# Find the index of spec_name and reconstruct the path up to that directory
|
|
34
|
+
spec_index = path_parts.index(spec_name)
|
|
35
|
+
spec_dir = Path(*path_parts[: spec_index + 1])
|
|
36
|
+
if spec_dir.exists() and spec_dir.is_dir():
|
|
37
|
+
type_dirs[spec_name] = spec_dir
|
|
38
|
+
break
|
|
39
|
+
|
|
40
|
+
if not type_dirs:
|
|
41
|
+
return organized_files
|
|
42
|
+
|
|
43
|
+
# Performance optimization 2: Build metadata index for faster lookups
|
|
44
|
+
metadata_index = self._build_metadata_index(metadata)
|
|
45
|
+
|
|
46
|
+
# Group files by dataset_key (stem-based matching) - flat discovery (no subdirectories)
|
|
47
|
+
# Strategy:
|
|
48
|
+
# 1. Group all files (required + optional) by their file stem
|
|
49
|
+
# 2. Only create data units for groups that have ALL required files
|
|
50
|
+
# 3. Optional files are automatically included if they match the stem
|
|
51
|
+
dataset_files = {}
|
|
52
|
+
required_specs = [spec['name'] for spec in specs if spec.get('is_required', False)]
|
|
53
|
+
optional_specs = [spec['name'] for spec in specs if not spec.get('is_required', False)]
|
|
54
|
+
|
|
55
|
+
for file_path in files:
|
|
56
|
+
# Determine which type directory this file belongs to
|
|
57
|
+
for spec_name, dir_path in type_dirs.items():
|
|
58
|
+
if file_path.parent == dir_path: # Only direct children
|
|
59
|
+
file_name = file_path.stem
|
|
60
|
+
|
|
61
|
+
if file_name not in dataset_files:
|
|
62
|
+
dataset_files[file_name] = {}
|
|
63
|
+
|
|
64
|
+
if spec_name not in dataset_files[file_name]:
|
|
65
|
+
dataset_files[file_name][spec_name] = file_path
|
|
66
|
+
else:
|
|
67
|
+
# Keep the most recent file - only stat when needed
|
|
68
|
+
existing_file = dataset_files[file_name][spec_name]
|
|
69
|
+
try:
|
|
70
|
+
if file_path.stat().st_mtime > existing_file.stat().st_mtime:
|
|
71
|
+
dataset_files[file_name][spec_name] = file_path
|
|
72
|
+
except (OSError, IOError):
|
|
73
|
+
# If stat fails, keep existing file
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
# Create organized files ONLY for datasets with ALL required files
|
|
77
|
+
# Optional files are included automatically if they match the stem
|
|
78
|
+
for file_name, files_dict in sorted(dataset_files.items()):
|
|
79
|
+
# Check if all required files are present
|
|
80
|
+
has_all_required = all(req in files_dict for req in required_specs)
|
|
81
|
+
|
|
82
|
+
if has_all_required:
|
|
83
|
+
# Extract original file stem from actual file paths (more reliable)
|
|
84
|
+
# Collect stems from all files in the group
|
|
85
|
+
file_stems = {}
|
|
86
|
+
file_extensions = {}
|
|
87
|
+
|
|
88
|
+
for file_path in files_dict.values():
|
|
89
|
+
stem = file_path.stem
|
|
90
|
+
ext = file_path.suffix.lower()
|
|
91
|
+
|
|
92
|
+
# Count stems (to handle multiple files with slightly different names)
|
|
93
|
+
if stem:
|
|
94
|
+
file_stems[stem] = file_stems.get(stem, 0) + 1
|
|
95
|
+
|
|
96
|
+
# Count extensions
|
|
97
|
+
if ext:
|
|
98
|
+
file_extensions[ext] = file_extensions.get(ext, 0) + 1
|
|
99
|
+
|
|
100
|
+
# Use the most common stem (usually they're all the same)
|
|
101
|
+
original_stem = max(file_stems, key=file_stems.get) if file_stems else file_name
|
|
102
|
+
origin_file_extension = max(file_extensions, key=file_extensions.get) if file_extensions else ''
|
|
103
|
+
|
|
104
|
+
meta_data = {
|
|
105
|
+
'origin_file_stem': original_stem,
|
|
106
|
+
'origin_file_extension': origin_file_extension,
|
|
107
|
+
'created_at': datetime.now().isoformat(),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# Add metadata if available - using optimized index lookup
|
|
111
|
+
if metadata_index:
|
|
112
|
+
matched_metadata = self._find_matching_metadata_optimized(file_name, files_dict, metadata_index)
|
|
113
|
+
if matched_metadata:
|
|
114
|
+
meta_data.update(matched_metadata)
|
|
115
|
+
|
|
116
|
+
organized_files.append({'files': files_dict, 'meta': meta_data})
|
|
117
|
+
|
|
118
|
+
return organized_files
|
|
119
|
+
|
|
120
|
+
def _build_metadata_index(self, metadata: Dict) -> Dict:
|
|
121
|
+
"""Build metadata index for faster lookups."""
|
|
122
|
+
if not metadata:
|
|
123
|
+
return {}
|
|
124
|
+
|
|
125
|
+
metadata_index = {'exact_stem': {}, 'exact_name': {}, 'stem_lookup': {}, 'partial_paths': {}, 'full_paths': {}}
|
|
126
|
+
|
|
127
|
+
for meta_key, meta_value in metadata.items():
|
|
128
|
+
meta_path = Path(meta_key)
|
|
129
|
+
|
|
130
|
+
# Index by stem
|
|
131
|
+
stem = meta_path.stem
|
|
132
|
+
if stem:
|
|
133
|
+
metadata_index['exact_stem'][stem] = meta_value
|
|
134
|
+
metadata_index['stem_lookup'][stem] = meta_value
|
|
135
|
+
|
|
136
|
+
# Index by full name
|
|
137
|
+
name = meta_path.name
|
|
138
|
+
if name:
|
|
139
|
+
metadata_index['exact_name'][name] = meta_value
|
|
140
|
+
|
|
141
|
+
# Index for partial path matching
|
|
142
|
+
metadata_index['partial_paths'][meta_key] = meta_value
|
|
143
|
+
|
|
144
|
+
# Index for full path matching
|
|
145
|
+
metadata_index['full_paths'][meta_key] = meta_value
|
|
146
|
+
|
|
147
|
+
return metadata_index
|
|
148
|
+
|
|
149
|
+
def _find_matching_metadata_optimized(self, file_name: str, files_dict: Dict, metadata_index: Dict) -> Dict:
|
|
150
|
+
"""Find matching metadata using optimized index lookups."""
|
|
151
|
+
if not metadata_index:
|
|
152
|
+
return {}
|
|
153
|
+
|
|
154
|
+
# Strategy 1: Exact stem match (O(1) lookup)
|
|
155
|
+
if file_name in metadata_index['exact_stem']:
|
|
156
|
+
return metadata_index['exact_stem'][file_name]
|
|
157
|
+
|
|
158
|
+
# Strategy 2: Exact filename match with extension (O(1) lookup)
|
|
159
|
+
sample_file = list(files_dict.values())[0] if files_dict else None
|
|
160
|
+
if sample_file:
|
|
161
|
+
full_filename = f'{file_name}{sample_file.suffix}'
|
|
162
|
+
if full_filename in metadata_index['exact_name']:
|
|
163
|
+
return metadata_index['exact_name'][full_filename]
|
|
164
|
+
|
|
165
|
+
# Try sample file name
|
|
166
|
+
sample_filename = sample_file.name
|
|
167
|
+
if sample_filename in metadata_index['exact_name']:
|
|
168
|
+
return metadata_index['exact_name'][sample_filename]
|
|
169
|
+
|
|
170
|
+
# Strategy 3: Stem lookup (already optimized above)
|
|
171
|
+
# This is covered by exact_stem lookup
|
|
172
|
+
|
|
173
|
+
# Strategy 4 & 5: Partial and full path matching (fallback to original logic for complex cases)
|
|
174
|
+
if sample_file:
|
|
175
|
+
file_path_str = str(sample_file)
|
|
176
|
+
file_path_posix = sample_file.as_posix()
|
|
177
|
+
|
|
178
|
+
# Check partial paths
|
|
179
|
+
for meta_key in metadata_index['partial_paths']:
|
|
180
|
+
if (
|
|
181
|
+
meta_key in file_path_str
|
|
182
|
+
or meta_key in file_path_posix
|
|
183
|
+
or file_path_str in meta_key
|
|
184
|
+
or file_path_posix in meta_key
|
|
185
|
+
):
|
|
186
|
+
return metadata_index['partial_paths'][meta_key]
|
|
187
|
+
|
|
188
|
+
return {}
|
|
189
|
+
|
|
190
|
+
def _find_matching_metadata(self, file_name: str, files_dict: Dict, metadata: Dict) -> Dict:
|
|
191
|
+
"""Find matching metadata using comprehensive pattern matching.
|
|
192
|
+
|
|
193
|
+
Matching priority:
|
|
194
|
+
1. Exact stem match (highest priority)
|
|
195
|
+
2. Exact filename match (with extension)
|
|
196
|
+
3. Metadata key stem matches file stem
|
|
197
|
+
4. Partial path matching
|
|
198
|
+
5. Full path matching
|
|
199
|
+
"""
|
|
200
|
+
if not metadata:
|
|
201
|
+
return {}
|
|
202
|
+
|
|
203
|
+
# Get sample file for extension and path information
|
|
204
|
+
sample_file = list(files_dict.values())[0] if files_dict else None
|
|
205
|
+
|
|
206
|
+
# Strategy 1: Exact stem match (highest priority)
|
|
207
|
+
if file_name in metadata:
|
|
208
|
+
return metadata[file_name]
|
|
209
|
+
|
|
210
|
+
# Strategy 2: Exact filename match (with extension)
|
|
211
|
+
if sample_file:
|
|
212
|
+
full_filename = f'{file_name}{sample_file.suffix}'
|
|
213
|
+
if full_filename in metadata:
|
|
214
|
+
return metadata[full_filename]
|
|
215
|
+
|
|
216
|
+
# Also try with sample file name
|
|
217
|
+
sample_filename = sample_file.name
|
|
218
|
+
if sample_filename in metadata:
|
|
219
|
+
return metadata[sample_filename]
|
|
220
|
+
|
|
221
|
+
# Strategy 3: Metadata key stem matches file stem
|
|
222
|
+
for meta_key in metadata.keys():
|
|
223
|
+
meta_stem = Path(meta_key).stem
|
|
224
|
+
if meta_stem == file_name:
|
|
225
|
+
return metadata[meta_key]
|
|
226
|
+
|
|
227
|
+
# Strategy 4: Partial path matching
|
|
228
|
+
if sample_file:
|
|
229
|
+
file_path_parts = sample_file.parts
|
|
230
|
+
for meta_key in metadata.keys():
|
|
231
|
+
meta_path = Path(meta_key)
|
|
232
|
+
# Check if any part of the metadata key matches our file path parts
|
|
233
|
+
for part in file_path_parts:
|
|
234
|
+
if part in str(meta_path) or str(meta_path) in part:
|
|
235
|
+
# Additional validation: ensure it's a reasonable match
|
|
236
|
+
if meta_path.stem == file_name or meta_path.name == sample_file.name or part == meta_path.stem:
|
|
237
|
+
return metadata[meta_key]
|
|
238
|
+
|
|
239
|
+
# Strategy 5: Full path matching
|
|
240
|
+
if sample_file:
|
|
241
|
+
full_path_str = str(sample_file)
|
|
242
|
+
full_path_posix = sample_file.as_posix()
|
|
243
|
+
|
|
244
|
+
for meta_key in metadata.keys():
|
|
245
|
+
# Direct path match
|
|
246
|
+
if meta_key == full_path_str or meta_key == full_path_posix:
|
|
247
|
+
return metadata[meta_key]
|
|
248
|
+
|
|
249
|
+
# Relative path match (check if meta_key is contained in our path)
|
|
250
|
+
if meta_key in full_path_str or meta_key in full_path_posix:
|
|
251
|
+
return metadata[meta_key]
|
|
252
|
+
|
|
253
|
+
# Reverse match (check if our path is contained in meta_key)
|
|
254
|
+
if full_path_str in meta_key or full_path_posix in meta_key:
|
|
255
|
+
return metadata[meta_key]
|
|
256
|
+
|
|
257
|
+
# No match found
|
|
258
|
+
return {}
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
from ..base import FileDiscoveryStrategy
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RecursiveFileDiscoveryStrategy(FileDiscoveryStrategy):
|
|
9
|
+
"""Recursive file discovery strategy."""
|
|
10
|
+
|
|
11
|
+
def discover(self, path: Path, recursive: bool) -> List[Path]:
|
|
12
|
+
"""Discover files recursively in the given path."""
|
|
13
|
+
# Exclude system directories
|
|
14
|
+
excluded_dirs = {'@eaDir', '.@__thumb', '@Recycle', '#recycle', '.DS_Store', 'Thumbs.db', '.synology'}
|
|
15
|
+
|
|
16
|
+
def exclude_dirs(file_path: Path) -> bool:
|
|
17
|
+
"""Check if file path contains excluded directories."""
|
|
18
|
+
return any(excluded_dir in file_path.parts for excluded_dir in excluded_dirs)
|
|
19
|
+
|
|
20
|
+
return [file_path for file_path in path.rglob('*') if file_path.is_file() and not exclude_dirs(file_path)]
|
|
21
|
+
|
|
22
|
+
def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
|
|
23
|
+
"""Organize files according to specifications with metadata."""
|
|
24
|
+
organized_files = []
|
|
25
|
+
|
|
26
|
+
# Use provided type_dirs or create fallback mapping
|
|
27
|
+
if type_dirs is None:
|
|
28
|
+
type_dirs = {}
|
|
29
|
+
for spec in specs:
|
|
30
|
+
spec_name = spec['name']
|
|
31
|
+
# Fallback: extract spec directory from file paths
|
|
32
|
+
for file_path in files:
|
|
33
|
+
# Check if this file's path contains the spec_name as a directory
|
|
34
|
+
path_parts = file_path.parts
|
|
35
|
+
if spec_name in path_parts:
|
|
36
|
+
# Find the index of spec_name and reconstruct the path up to that directory
|
|
37
|
+
spec_index = path_parts.index(spec_name)
|
|
38
|
+
spec_dir = Path(*path_parts[: spec_index + 1])
|
|
39
|
+
if spec_dir.exists() and spec_dir.is_dir():
|
|
40
|
+
type_dirs[spec_name] = spec_dir
|
|
41
|
+
break
|
|
42
|
+
|
|
43
|
+
if not type_dirs:
|
|
44
|
+
return organized_files
|
|
45
|
+
|
|
46
|
+
# Performance optimization 1: Path caching - avoid repeated string conversions
|
|
47
|
+
path_cache = {dir_path: str(dir_path) for dir_path in type_dirs.values()}
|
|
48
|
+
|
|
49
|
+
# Performance optimization 2: Build metadata index for faster lookups
|
|
50
|
+
metadata_index = self._build_metadata_index(metadata)
|
|
51
|
+
|
|
52
|
+
# Group files by dataset_key (stem-based matching)
|
|
53
|
+
# Strategy:
|
|
54
|
+
# 1. Group all files (required + optional) by their file stem
|
|
55
|
+
# 2. Only create data units for groups that have ALL required files
|
|
56
|
+
# 3. Optional files are automatically included if they match the stem
|
|
57
|
+
dataset_files = {}
|
|
58
|
+
required_specs = [spec['name'] for spec in specs if spec.get('is_required', False)]
|
|
59
|
+
|
|
60
|
+
for file_path in files:
|
|
61
|
+
# Determine which type directory this file belongs to
|
|
62
|
+
matched = False
|
|
63
|
+
for spec_name, dir_path in type_dirs.items():
|
|
64
|
+
# Check if file is under this spec's directory
|
|
65
|
+
# Use try/except for relative_to to ensure proper path matching
|
|
66
|
+
try:
|
|
67
|
+
relative_path = file_path.relative_to(dir_path)
|
|
68
|
+
matched = True
|
|
69
|
+
except ValueError:
|
|
70
|
+
# File is not under this directory
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Create unique dataset key using relative path from spec directory
|
|
74
|
+
# Use parent directory + stem as unique key to group related files
|
|
75
|
+
if relative_path.parent != Path('.'):
|
|
76
|
+
dataset_key = f'{relative_path.parent}_{file_path.stem}'
|
|
77
|
+
else:
|
|
78
|
+
dataset_key = file_path.stem
|
|
79
|
+
|
|
80
|
+
if dataset_key not in dataset_files:
|
|
81
|
+
dataset_files[dataset_key] = {}
|
|
82
|
+
|
|
83
|
+
if spec_name not in dataset_files[dataset_key]:
|
|
84
|
+
dataset_files[dataset_key][spec_name] = file_path
|
|
85
|
+
else:
|
|
86
|
+
# Keep the most recent file - only stat when needed
|
|
87
|
+
existing_file = dataset_files[dataset_key][spec_name]
|
|
88
|
+
try:
|
|
89
|
+
if file_path.stat().st_mtime > existing_file.stat().st_mtime:
|
|
90
|
+
dataset_files[dataset_key][spec_name] = file_path
|
|
91
|
+
except (OSError, IOError):
|
|
92
|
+
# If stat fails, keep existing file
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# Found matching directory, move to next file
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
# Create organized files ONLY for datasets with ALL required files
|
|
99
|
+
# Optional files are included automatically if they match the stem
|
|
100
|
+
for dataset_key, files_dict in sorted(dataset_files.items()):
|
|
101
|
+
# Check if all required files are present
|
|
102
|
+
has_all_required = all(req in files_dict for req in required_specs)
|
|
103
|
+
|
|
104
|
+
if has_all_required:
|
|
105
|
+
# Extract original file stem from actual file paths (more reliable than parsing dataset_key)
|
|
106
|
+
# Collect stems from all files in the group
|
|
107
|
+
file_stems = {}
|
|
108
|
+
file_extensions = {}
|
|
109
|
+
|
|
110
|
+
for file_path in files_dict.values():
|
|
111
|
+
stem = file_path.stem
|
|
112
|
+
ext = file_path.suffix.lower()
|
|
113
|
+
|
|
114
|
+
# Count stems (to handle multiple files with slightly different names)
|
|
115
|
+
if stem:
|
|
116
|
+
file_stems[stem] = file_stems.get(stem, 0) + 1
|
|
117
|
+
|
|
118
|
+
# Count extensions
|
|
119
|
+
if ext:
|
|
120
|
+
file_extensions[ext] = file_extensions.get(ext, 0) + 1
|
|
121
|
+
|
|
122
|
+
# Use the most common stem (usually they're all the same)
|
|
123
|
+
original_stem = max(file_stems, key=file_stems.get) if file_stems else dataset_key
|
|
124
|
+
origin_file_extension = max(file_extensions, key=file_extensions.get) if file_extensions else ''
|
|
125
|
+
|
|
126
|
+
meta_data = {
|
|
127
|
+
'origin_file_stem': original_stem,
|
|
128
|
+
'origin_file_extension': origin_file_extension,
|
|
129
|
+
'created_at': datetime.now().isoformat(),
|
|
130
|
+
'dataset_key': dataset_key, # Add dataset key for debugging
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Add metadata if available - using optimized index lookup
|
|
134
|
+
if metadata_index:
|
|
135
|
+
matched_metadata = self._find_matching_metadata_optimized(original_stem, files_dict, metadata_index)
|
|
136
|
+
if matched_metadata:
|
|
137
|
+
meta_data.update(matched_metadata)
|
|
138
|
+
|
|
139
|
+
organized_files.append({'files': files_dict, 'meta': meta_data})
|
|
140
|
+
|
|
141
|
+
return organized_files
|
|
142
|
+
|
|
143
|
+
def _build_metadata_index(self, metadata: Dict) -> Dict:
|
|
144
|
+
"""Build metadata index for faster lookups."""
|
|
145
|
+
if not metadata:
|
|
146
|
+
return {}
|
|
147
|
+
|
|
148
|
+
metadata_index = {'exact_stem': {}, 'exact_name': {}, 'stem_lookup': {}, 'partial_paths': {}, 'full_paths': {}}
|
|
149
|
+
|
|
150
|
+
for meta_key, meta_value in metadata.items():
|
|
151
|
+
meta_path = Path(meta_key)
|
|
152
|
+
|
|
153
|
+
# Index by stem
|
|
154
|
+
stem = meta_path.stem
|
|
155
|
+
if stem:
|
|
156
|
+
metadata_index['exact_stem'][stem] = meta_value
|
|
157
|
+
metadata_index['stem_lookup'][stem] = meta_value
|
|
158
|
+
|
|
159
|
+
# Index by full name
|
|
160
|
+
name = meta_path.name
|
|
161
|
+
if name:
|
|
162
|
+
metadata_index['exact_name'][name] = meta_value
|
|
163
|
+
|
|
164
|
+
# Index for partial path matching
|
|
165
|
+
metadata_index['partial_paths'][meta_key] = meta_value
|
|
166
|
+
|
|
167
|
+
# Index for full path matching
|
|
168
|
+
metadata_index['full_paths'][meta_key] = meta_value
|
|
169
|
+
|
|
170
|
+
return metadata_index
|
|
171
|
+
|
|
172
|
+
def _find_matching_metadata_optimized(self, file_name: str, files_dict: Dict, metadata_index: Dict) -> Dict:
|
|
173
|
+
"""Find matching metadata using optimized index lookups."""
|
|
174
|
+
if not metadata_index:
|
|
175
|
+
return {}
|
|
176
|
+
|
|
177
|
+
# Strategy 1: Exact stem match (O(1) lookup)
|
|
178
|
+
if file_name in metadata_index['exact_stem']:
|
|
179
|
+
return metadata_index['exact_stem'][file_name]
|
|
180
|
+
|
|
181
|
+
# Strategy 2: Exact filename match with extension (O(1) lookup)
|
|
182
|
+
sample_file = list(files_dict.values())[0] if files_dict else None
|
|
183
|
+
if sample_file:
|
|
184
|
+
full_filename = f'{file_name}{sample_file.suffix}'
|
|
185
|
+
if full_filename in metadata_index['exact_name']:
|
|
186
|
+
return metadata_index['exact_name'][full_filename]
|
|
187
|
+
|
|
188
|
+
# Try sample file name
|
|
189
|
+
sample_filename = sample_file.name
|
|
190
|
+
if sample_filename in metadata_index['exact_name']:
|
|
191
|
+
return metadata_index['exact_name'][sample_filename]
|
|
192
|
+
|
|
193
|
+
# Strategy 3: Stem lookup (already optimized above)
|
|
194
|
+
# This is covered by exact_stem lookup
|
|
195
|
+
|
|
196
|
+
# Strategy 4 & 5: Partial and full path matching (fallback to original logic for complex cases)
|
|
197
|
+
if sample_file:
|
|
198
|
+
file_path_str = str(sample_file)
|
|
199
|
+
file_path_posix = sample_file.as_posix()
|
|
200
|
+
|
|
201
|
+
# Check partial paths
|
|
202
|
+
for meta_key in metadata_index['partial_paths']:
|
|
203
|
+
if (
|
|
204
|
+
meta_key in file_path_str
|
|
205
|
+
or meta_key in file_path_posix
|
|
206
|
+
or file_path_str in meta_key
|
|
207
|
+
or file_path_posix in meta_key
|
|
208
|
+
):
|
|
209
|
+
return metadata_index['partial_paths'][meta_key]
|
|
210
|
+
|
|
211
|
+
return {}
|
|
212
|
+
|
|
213
|
+
def _find_matching_metadata(self, file_name: str, files_dict: Dict, metadata: Dict) -> Dict:
|
|
214
|
+
"""Find matching metadata using comprehensive pattern matching.
|
|
215
|
+
|
|
216
|
+
Matching priority:
|
|
217
|
+
1. Exact stem match (highest priority)
|
|
218
|
+
2. Exact filename match (with extension)
|
|
219
|
+
3. Metadata key stem matches file stem
|
|
220
|
+
4. Partial path matching
|
|
221
|
+
5. Full path matching
|
|
222
|
+
"""
|
|
223
|
+
if not metadata:
|
|
224
|
+
return {}
|
|
225
|
+
|
|
226
|
+
# Get sample file for extension and path information
|
|
227
|
+
sample_file = list(files_dict.values())[0] if files_dict else None
|
|
228
|
+
|
|
229
|
+
# Strategy 1: Exact stem match (highest priority)
|
|
230
|
+
if file_name in metadata:
|
|
231
|
+
return metadata[file_name]
|
|
232
|
+
|
|
233
|
+
# Strategy 2: Exact filename match (with extension)
|
|
234
|
+
if sample_file:
|
|
235
|
+
full_filename = f'{file_name}{sample_file.suffix}'
|
|
236
|
+
if full_filename in metadata:
|
|
237
|
+
return metadata[full_filename]
|
|
238
|
+
|
|
239
|
+
# Also try with sample file name
|
|
240
|
+
sample_filename = sample_file.name
|
|
241
|
+
if sample_filename in metadata:
|
|
242
|
+
return metadata[sample_filename]
|
|
243
|
+
|
|
244
|
+
# Strategy 3: Metadata key stem matches file stem
|
|
245
|
+
for meta_key in metadata.keys():
|
|
246
|
+
meta_stem = Path(meta_key).stem
|
|
247
|
+
if meta_stem == file_name:
|
|
248
|
+
return metadata[meta_key]
|
|
249
|
+
|
|
250
|
+
# Strategy 4: Partial path matching
|
|
251
|
+
if sample_file:
|
|
252
|
+
file_path_parts = sample_file.parts
|
|
253
|
+
for meta_key in metadata.keys():
|
|
254
|
+
meta_path = Path(meta_key)
|
|
255
|
+
# Check if any part of the metadata key matches our file path parts
|
|
256
|
+
for part in file_path_parts:
|
|
257
|
+
if part in str(meta_path) or str(meta_path) in part:
|
|
258
|
+
# Additional validation: ensure it's a reasonable match
|
|
259
|
+
if meta_path.stem == file_name or meta_path.name == sample_file.name or part == meta_path.stem:
|
|
260
|
+
return metadata[meta_key]
|
|
261
|
+
|
|
262
|
+
# Strategy 5: Full path matching
|
|
263
|
+
if sample_file:
|
|
264
|
+
full_path_str = str(sample_file)
|
|
265
|
+
full_path_posix = sample_file.as_posix()
|
|
266
|
+
|
|
267
|
+
for meta_key in metadata.keys():
|
|
268
|
+
# Direct path match
|
|
269
|
+
if meta_key == full_path_str or meta_key == full_path_posix:
|
|
270
|
+
return metadata[meta_key]
|
|
271
|
+
|
|
272
|
+
# Relative path match (check if meta_key is contained in our path)
|
|
273
|
+
if meta_key in full_path_str or meta_key in full_path_posix:
|
|
274
|
+
return metadata[meta_key]
|
|
275
|
+
|
|
276
|
+
# Reverse match (check if our path is contained in meta_key)
|
|
277
|
+
if full_path_str in meta_key or full_path_posix in meta_key:
|
|
278
|
+
return metadata[meta_key]
|
|
279
|
+
|
|
280
|
+
# No match found
|
|
281
|
+
return {}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Metadata strategy implementations
|