synapse-sdk 1.0.0a13__py3-none-any.whl → 2025.11.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synapse-sdk might be problematic. Click here for more details.

Files changed (339) hide show
  1. synapse_sdk/__init__.py +24 -0
  2. synapse_sdk/cli/__init__.py +310 -5
  3. synapse_sdk/cli/alias/__init__.py +22 -0
  4. synapse_sdk/cli/alias/create.py +36 -0
  5. synapse_sdk/cli/alias/dataclass.py +31 -0
  6. synapse_sdk/cli/alias/default.py +16 -0
  7. synapse_sdk/cli/alias/delete.py +15 -0
  8. synapse_sdk/cli/alias/list.py +19 -0
  9. synapse_sdk/cli/alias/read.py +15 -0
  10. synapse_sdk/cli/alias/update.py +17 -0
  11. synapse_sdk/cli/alias/utils.py +61 -0
  12. synapse_sdk/cli/code_server.py +687 -0
  13. synapse_sdk/cli/config.py +440 -0
  14. synapse_sdk/cli/devtools.py +90 -0
  15. synapse_sdk/cli/plugin/__init__.py +33 -0
  16. synapse_sdk/cli/{create_plugin.py → plugin/create.py} +2 -2
  17. synapse_sdk/cli/plugin/publish.py +45 -0
  18. synapse_sdk/{plugins/cli → cli/plugin}/run.py +12 -5
  19. synapse_sdk/clients/agent/__init__.py +9 -3
  20. synapse_sdk/clients/agent/container.py +133 -0
  21. synapse_sdk/clients/agent/core.py +19 -0
  22. synapse_sdk/clients/agent/ray.py +298 -9
  23. synapse_sdk/clients/backend/__init__.py +41 -12
  24. synapse_sdk/clients/backend/annotation.py +13 -5
  25. synapse_sdk/clients/backend/core.py +59 -0
  26. synapse_sdk/clients/backend/data_collection.py +186 -0
  27. synapse_sdk/clients/backend/hitl.py +17 -0
  28. synapse_sdk/clients/backend/integration.py +19 -4
  29. synapse_sdk/clients/backend/ml.py +10 -7
  30. synapse_sdk/clients/backend/models.py +78 -0
  31. synapse_sdk/clients/base.py +381 -34
  32. synapse_sdk/clients/ray/serve.py +2 -0
  33. synapse_sdk/clients/validators/collections.py +31 -0
  34. synapse_sdk/devtools/config.py +94 -0
  35. synapse_sdk/devtools/docs/.gitignore +20 -0
  36. synapse_sdk/devtools/docs/README.md +41 -0
  37. synapse_sdk/devtools/docs/blog/2019-05-28-first-blog-post.md +12 -0
  38. synapse_sdk/devtools/docs/blog/2019-05-29-long-blog-post.md +44 -0
  39. synapse_sdk/devtools/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
  40. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  41. synapse_sdk/devtools/docs/blog/2021-08-26-welcome/index.md +29 -0
  42. synapse_sdk/devtools/docs/blog/authors.yml +25 -0
  43. synapse_sdk/devtools/docs/blog/tags.yml +19 -0
  44. synapse_sdk/devtools/docs/docs/api/clients/agent.md +43 -0
  45. synapse_sdk/devtools/docs/docs/api/clients/annotation-mixin.md +378 -0
  46. synapse_sdk/devtools/docs/docs/api/clients/backend.md +420 -0
  47. synapse_sdk/devtools/docs/docs/api/clients/base.md +257 -0
  48. synapse_sdk/devtools/docs/docs/api/clients/core-mixin.md +477 -0
  49. synapse_sdk/devtools/docs/docs/api/clients/data-collection-mixin.md +422 -0
  50. synapse_sdk/devtools/docs/docs/api/clients/hitl-mixin.md +554 -0
  51. synapse_sdk/devtools/docs/docs/api/clients/index.md +391 -0
  52. synapse_sdk/devtools/docs/docs/api/clients/integration-mixin.md +571 -0
  53. synapse_sdk/devtools/docs/docs/api/clients/ml-mixin.md +578 -0
  54. synapse_sdk/devtools/docs/docs/api/clients/ray.md +342 -0
  55. synapse_sdk/devtools/docs/docs/api/index.md +52 -0
  56. synapse_sdk/devtools/docs/docs/api/plugins/categories.md +43 -0
  57. synapse_sdk/devtools/docs/docs/api/plugins/models.md +114 -0
  58. synapse_sdk/devtools/docs/docs/api/plugins/utils.md +328 -0
  59. synapse_sdk/devtools/docs/docs/categories.md +0 -0
  60. synapse_sdk/devtools/docs/docs/cli-usage.md +280 -0
  61. synapse_sdk/devtools/docs/docs/concepts/index.md +38 -0
  62. synapse_sdk/devtools/docs/docs/configuration.md +83 -0
  63. synapse_sdk/devtools/docs/docs/contributing.md +306 -0
  64. synapse_sdk/devtools/docs/docs/examples/index.md +29 -0
  65. synapse_sdk/devtools/docs/docs/faq.md +179 -0
  66. synapse_sdk/devtools/docs/docs/features/converters/index.md +455 -0
  67. synapse_sdk/devtools/docs/docs/features/index.md +24 -0
  68. synapse_sdk/devtools/docs/docs/features/utils/file.md +415 -0
  69. synapse_sdk/devtools/docs/docs/features/utils/network.md +378 -0
  70. synapse_sdk/devtools/docs/docs/features/utils/storage.md +57 -0
  71. synapse_sdk/devtools/docs/docs/features/utils/types.md +51 -0
  72. synapse_sdk/devtools/docs/docs/installation.md +94 -0
  73. synapse_sdk/devtools/docs/docs/introduction.md +47 -0
  74. synapse_sdk/devtools/docs/docs/plugins/categories/neural-net-plugins/train-action-overview.md +814 -0
  75. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
  76. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
  77. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
  78. synapse_sdk/devtools/docs/docs/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
  79. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-action.md +948 -0
  80. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-overview.md +544 -0
  81. synapse_sdk/devtools/docs/docs/plugins/categories/upload-plugins/upload-plugin-template.md +766 -0
  82. synapse_sdk/devtools/docs/docs/plugins/export-plugins.md +1092 -0
  83. synapse_sdk/devtools/docs/docs/plugins/plugins.md +852 -0
  84. synapse_sdk/devtools/docs/docs/quickstart.md +78 -0
  85. synapse_sdk/devtools/docs/docs/troubleshooting.md +519 -0
  86. synapse_sdk/devtools/docs/docs/tutorial-basics/_category_.json +8 -0
  87. synapse_sdk/devtools/docs/docs/tutorial-basics/congratulations.md +23 -0
  88. synapse_sdk/devtools/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
  89. synapse_sdk/devtools/docs/docs/tutorial-basics/create-a-document.md +57 -0
  90. synapse_sdk/devtools/docs/docs/tutorial-basics/create-a-page.md +43 -0
  91. synapse_sdk/devtools/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
  92. synapse_sdk/devtools/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
  93. synapse_sdk/devtools/docs/docs/tutorial-extras/_category_.json +7 -0
  94. synapse_sdk/devtools/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
  95. synapse_sdk/devtools/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
  96. synapse_sdk/devtools/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
  97. synapse_sdk/devtools/docs/docs/tutorial-extras/translate-your-site.md +88 -0
  98. synapse_sdk/devtools/docs/docusaurus.config.ts +148 -0
  99. synapse_sdk/devtools/docs/i18n/ko/code.json +325 -0
  100. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/agent.md +43 -0
  101. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/annotation-mixin.md +289 -0
  102. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/backend.md +420 -0
  103. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/base.md +257 -0
  104. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/core-mixin.md +417 -0
  105. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/data-collection-mixin.md +356 -0
  106. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/hitl-mixin.md +192 -0
  107. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/index.md +391 -0
  108. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/integration-mixin.md +479 -0
  109. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ml-mixin.md +284 -0
  110. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/clients/ray.md +342 -0
  111. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/index.md +52 -0
  112. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/api/plugins/models.md +114 -0
  113. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/categories.md +0 -0
  114. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/cli-usage.md +280 -0
  115. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/concepts/index.md +38 -0
  116. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/configuration.md +83 -0
  117. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/contributing.md +306 -0
  118. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/examples/index.md +29 -0
  119. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/faq.md +179 -0
  120. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/converters/index.md +30 -0
  121. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/index.md +24 -0
  122. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/file.md +415 -0
  123. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/network.md +378 -0
  124. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/storage.md +60 -0
  125. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/features/utils/types.md +51 -0
  126. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/installation.md +94 -0
  127. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/introduction.md +47 -0
  128. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/neural-net-plugins/train-action-overview.md +815 -0
  129. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/pre-annotation-plugin-overview.md +198 -0
  130. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-action-development.md +1645 -0
  131. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-overview.md +717 -0
  132. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/pre-annotation-plugins/to-task-template-development.md +1380 -0
  133. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-action.md +948 -0
  134. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-overview.md +544 -0
  135. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/categories/upload-plugins/upload-plugin-template.md +766 -0
  136. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/export-plugins.md +1092 -0
  137. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/plugins/plugins.md +117 -0
  138. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/quickstart.md +78 -0
  139. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current/troubleshooting.md +519 -0
  140. synapse_sdk/devtools/docs/i18n/ko/docusaurus-plugin-content-docs/current.json +34 -0
  141. synapse_sdk/devtools/docs/i18n/ko/docusaurus-theme-classic/footer.json +42 -0
  142. synapse_sdk/devtools/docs/i18n/ko/docusaurus-theme-classic/navbar.json +18 -0
  143. synapse_sdk/devtools/docs/package-lock.json +18784 -0
  144. synapse_sdk/devtools/docs/package.json +48 -0
  145. synapse_sdk/devtools/docs/sidebars.ts +122 -0
  146. synapse_sdk/devtools/docs/src/components/HomepageFeatures/index.tsx +71 -0
  147. synapse_sdk/devtools/docs/src/components/HomepageFeatures/styles.module.css +11 -0
  148. synapse_sdk/devtools/docs/src/css/custom.css +30 -0
  149. synapse_sdk/devtools/docs/src/pages/index.module.css +23 -0
  150. synapse_sdk/devtools/docs/src/pages/index.tsx +21 -0
  151. synapse_sdk/devtools/docs/src/pages/markdown-page.md +7 -0
  152. synapse_sdk/devtools/docs/static/.nojekyll +0 -0
  153. synapse_sdk/devtools/docs/static/img/docusaurus-social-card.jpg +0 -0
  154. synapse_sdk/devtools/docs/static/img/docusaurus.png +0 -0
  155. synapse_sdk/devtools/docs/static/img/favicon.ico +0 -0
  156. synapse_sdk/devtools/docs/static/img/logo.png +0 -0
  157. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
  158. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_react.svg +170 -0
  159. synapse_sdk/devtools/docs/static/img/undraw_docusaurus_tree.svg +40 -0
  160. synapse_sdk/devtools/docs/tsconfig.json +8 -0
  161. synapse_sdk/devtools/server.py +41 -0
  162. synapse_sdk/devtools/streamlit_app/__init__.py +5 -0
  163. synapse_sdk/devtools/streamlit_app/app.py +128 -0
  164. synapse_sdk/devtools/streamlit_app/services/__init__.py +11 -0
  165. synapse_sdk/devtools/streamlit_app/services/job_service.py +233 -0
  166. synapse_sdk/devtools/streamlit_app/services/plugin_service.py +236 -0
  167. synapse_sdk/devtools/streamlit_app/services/serve_service.py +95 -0
  168. synapse_sdk/devtools/streamlit_app/ui/__init__.py +15 -0
  169. synapse_sdk/devtools/streamlit_app/ui/config_tab.py +76 -0
  170. synapse_sdk/devtools/streamlit_app/ui/deployment_tab.py +66 -0
  171. synapse_sdk/devtools/streamlit_app/ui/http_tab.py +125 -0
  172. synapse_sdk/devtools/streamlit_app/ui/jobs_tab.py +573 -0
  173. synapse_sdk/devtools/streamlit_app/ui/serve_tab.py +346 -0
  174. synapse_sdk/devtools/streamlit_app/ui/status_bar.py +118 -0
  175. synapse_sdk/devtools/streamlit_app/utils/__init__.py +40 -0
  176. synapse_sdk/devtools/streamlit_app/utils/json_viewer.py +197 -0
  177. synapse_sdk/devtools/streamlit_app/utils/log_formatter.py +38 -0
  178. synapse_sdk/devtools/streamlit_app/utils/styles.py +241 -0
  179. synapse_sdk/devtools/streamlit_app/utils/ui_components.py +289 -0
  180. synapse_sdk/devtools/streamlit_app.py +10 -0
  181. synapse_sdk/loggers.py +74 -9
  182. synapse_sdk/plugins/README.md +1340 -0
  183. synapse_sdk/plugins/__init__.py +0 -13
  184. synapse_sdk/plugins/categories/base.py +145 -30
  185. synapse_sdk/plugins/categories/data_validation/actions/validation.py +72 -0
  186. synapse_sdk/plugins/categories/data_validation/templates/plugin/validation.py +33 -5
  187. synapse_sdk/plugins/categories/export/actions/__init__.py +3 -0
  188. synapse_sdk/plugins/categories/export/actions/export/__init__.py +28 -0
  189. synapse_sdk/plugins/categories/export/actions/export/action.py +165 -0
  190. synapse_sdk/plugins/categories/export/actions/export/enums.py +113 -0
  191. synapse_sdk/plugins/categories/export/actions/export/exceptions.py +53 -0
  192. synapse_sdk/plugins/categories/export/actions/export/models.py +74 -0
  193. synapse_sdk/plugins/categories/export/actions/export/run.py +195 -0
  194. synapse_sdk/plugins/categories/export/actions/export/utils.py +187 -0
  195. synapse_sdk/plugins/categories/export/templates/config.yaml +21 -0
  196. synapse_sdk/plugins/categories/export/templates/plugin/__init__.py +390 -0
  197. synapse_sdk/plugins/categories/export/templates/plugin/export.py +160 -0
  198. synapse_sdk/plugins/categories/neural_net/actions/deployment.py +29 -14
  199. synapse_sdk/plugins/categories/neural_net/actions/inference.py +13 -1
  200. synapse_sdk/plugins/categories/neural_net/actions/train.py +1084 -38
  201. synapse_sdk/plugins/categories/neural_net/actions/tune.py +534 -0
  202. synapse_sdk/plugins/categories/neural_net/base/__init__.py +0 -0
  203. synapse_sdk/plugins/categories/neural_net/base/inference.py +37 -0
  204. synapse_sdk/plugins/categories/neural_net/templates/config.yaml +30 -5
  205. synapse_sdk/plugins/categories/neural_net/templates/plugin/inference.py +26 -10
  206. synapse_sdk/plugins/categories/pre_annotation/actions/__init__.py +4 -0
  207. synapse_sdk/plugins/categories/pre_annotation/actions/pre_annotation/__init__.py +3 -0
  208. synapse_sdk/plugins/categories/{export/actions/export.py → pre_annotation/actions/pre_annotation/action.py} +4 -4
  209. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/__init__.py +28 -0
  210. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/action.py +145 -0
  211. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/enums.py +269 -0
  212. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/exceptions.py +14 -0
  213. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/factory.py +76 -0
  214. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/models.py +97 -0
  215. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/orchestrator.py +250 -0
  216. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/run.py +64 -0
  217. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/__init__.py +17 -0
  218. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/annotation.py +287 -0
  219. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/base.py +170 -0
  220. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/extraction.py +83 -0
  221. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/metrics.py +87 -0
  222. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/preprocessor.py +127 -0
  223. synapse_sdk/plugins/categories/pre_annotation/actions/to_task/strategies/validation.py +143 -0
  224. synapse_sdk/plugins/categories/pre_annotation/actions/to_task.py +966 -0
  225. synapse_sdk/plugins/categories/pre_annotation/templates/config.yaml +19 -0
  226. synapse_sdk/plugins/categories/pre_annotation/templates/plugin/to_task.py +40 -0
  227. synapse_sdk/plugins/categories/smart_tool/templates/config.yaml +5 -2
  228. synapse_sdk/plugins/categories/upload/__init__.py +0 -0
  229. synapse_sdk/plugins/categories/upload/actions/__init__.py +0 -0
  230. synapse_sdk/plugins/categories/upload/actions/upload/__init__.py +19 -0
  231. synapse_sdk/plugins/categories/upload/actions/upload/action.py +232 -0
  232. synapse_sdk/plugins/categories/upload/actions/upload/context.py +185 -0
  233. synapse_sdk/plugins/categories/upload/actions/upload/enums.py +471 -0
  234. synapse_sdk/plugins/categories/upload/actions/upload/exceptions.py +36 -0
  235. synapse_sdk/plugins/categories/upload/actions/upload/factory.py +138 -0
  236. synapse_sdk/plugins/categories/upload/actions/upload/models.py +203 -0
  237. synapse_sdk/plugins/categories/upload/actions/upload/orchestrator.py +183 -0
  238. synapse_sdk/plugins/categories/upload/actions/upload/registry.py +113 -0
  239. synapse_sdk/plugins/categories/upload/actions/upload/run.py +179 -0
  240. synapse_sdk/plugins/categories/upload/actions/upload/steps/__init__.py +1 -0
  241. synapse_sdk/plugins/categories/upload/actions/upload/steps/base.py +107 -0
  242. synapse_sdk/plugins/categories/upload/actions/upload/steps/cleanup.py +62 -0
  243. synapse_sdk/plugins/categories/upload/actions/upload/steps/collection.py +63 -0
  244. synapse_sdk/plugins/categories/upload/actions/upload/steps/generate.py +84 -0
  245. synapse_sdk/plugins/categories/upload/actions/upload/steps/initialize.py +82 -0
  246. synapse_sdk/plugins/categories/upload/actions/upload/steps/metadata.py +235 -0
  247. synapse_sdk/plugins/categories/upload/actions/upload/steps/organize.py +203 -0
  248. synapse_sdk/plugins/categories/upload/actions/upload/steps/upload.py +97 -0
  249. synapse_sdk/plugins/categories/upload/actions/upload/steps/validate.py +71 -0
  250. synapse_sdk/plugins/categories/upload/actions/upload/strategies/__init__.py +1 -0
  251. synapse_sdk/plugins/categories/upload/actions/upload/strategies/base.py +82 -0
  252. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/__init__.py +1 -0
  253. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/batch.py +39 -0
  254. synapse_sdk/plugins/categories/upload/actions/upload/strategies/data_unit/single.py +29 -0
  255. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/__init__.py +1 -0
  256. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/flat.py +258 -0
  257. synapse_sdk/plugins/categories/upload/actions/upload/strategies/file_discovery/recursive.py +281 -0
  258. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/__init__.py +1 -0
  259. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/excel.py +174 -0
  260. synapse_sdk/plugins/categories/upload/actions/upload/strategies/metadata/none.py +16 -0
  261. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/__init__.py +1 -0
  262. synapse_sdk/plugins/categories/upload/actions/upload/strategies/upload/sync.py +84 -0
  263. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/__init__.py +1 -0
  264. synapse_sdk/plugins/categories/upload/actions/upload/strategies/validation/default.py +60 -0
  265. synapse_sdk/plugins/categories/upload/actions/upload/utils.py +250 -0
  266. synapse_sdk/plugins/categories/upload/templates/README.md +470 -0
  267. synapse_sdk/plugins/categories/upload/templates/config.yaml +33 -0
  268. synapse_sdk/plugins/categories/upload/templates/plugin/__init__.py +294 -0
  269. synapse_sdk/plugins/categories/upload/templates/plugin/upload.py +102 -0
  270. synapse_sdk/plugins/enums.py +3 -1
  271. synapse_sdk/plugins/models.py +140 -16
  272. synapse_sdk/plugins/templates/plugin-config-schema.json +406 -0
  273. synapse_sdk/plugins/templates/schema.json +491 -0
  274. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/config.yaml +1 -0
  275. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/requirements.txt +1 -1
  276. synapse_sdk/plugins/utils/__init__.py +46 -0
  277. synapse_sdk/plugins/utils/actions.py +119 -0
  278. synapse_sdk/plugins/utils/config.py +203 -0
  279. synapse_sdk/plugins/utils/legacy.py +95 -0
  280. synapse_sdk/plugins/utils/ray_gcs.py +66 -0
  281. synapse_sdk/plugins/utils/registry.py +58 -0
  282. synapse_sdk/shared/__init__.py +25 -0
  283. synapse_sdk/shared/enums.py +93 -0
  284. synapse_sdk/types.py +19 -0
  285. synapse_sdk/utils/converters/__init__.py +240 -0
  286. synapse_sdk/utils/converters/coco/__init__.py +0 -0
  287. synapse_sdk/utils/converters/coco/from_dm.py +322 -0
  288. synapse_sdk/utils/converters/coco/to_dm.py +215 -0
  289. synapse_sdk/utils/converters/dm/__init__.py +56 -0
  290. synapse_sdk/utils/converters/dm/from_v1.py +627 -0
  291. synapse_sdk/utils/converters/dm/to_v1.py +367 -0
  292. synapse_sdk/utils/converters/pascal/__init__.py +0 -0
  293. synapse_sdk/utils/converters/pascal/from_dm.py +244 -0
  294. synapse_sdk/utils/converters/pascal/to_dm.py +214 -0
  295. synapse_sdk/utils/converters/yolo/__init__.py +0 -0
  296. synapse_sdk/utils/converters/yolo/from_dm.py +384 -0
  297. synapse_sdk/utils/converters/yolo/to_dm.py +267 -0
  298. synapse_sdk/utils/dataset.py +46 -0
  299. synapse_sdk/utils/encryption.py +158 -0
  300. synapse_sdk/utils/file/__init__.py +39 -0
  301. synapse_sdk/utils/file/archive.py +32 -0
  302. synapse_sdk/utils/file/checksum.py +56 -0
  303. synapse_sdk/utils/file/chunking.py +31 -0
  304. synapse_sdk/utils/file/download.py +385 -0
  305. synapse_sdk/utils/file/encoding.py +40 -0
  306. synapse_sdk/utils/file/io.py +22 -0
  307. synapse_sdk/utils/file/video/__init__.py +29 -0
  308. synapse_sdk/utils/file/video/transcode.py +307 -0
  309. synapse_sdk/utils/file.py.backup +301 -0
  310. synapse_sdk/utils/http.py +138 -0
  311. synapse_sdk/utils/network.py +309 -0
  312. synapse_sdk/utils/storage/__init__.py +72 -0
  313. synapse_sdk/utils/storage/providers/__init__.py +183 -0
  314. synapse_sdk/utils/storage/providers/file_system.py +134 -0
  315. synapse_sdk/utils/storage/providers/gcp.py +13 -0
  316. synapse_sdk/utils/storage/providers/http.py +190 -0
  317. synapse_sdk/utils/storage/providers/s3.py +91 -0
  318. synapse_sdk/utils/storage/providers/sftp.py +47 -0
  319. synapse_sdk/utils/storage/registry.py +17 -0
  320. synapse_sdk-2025.11.7.dist-info/METADATA +122 -0
  321. synapse_sdk-2025.11.7.dist-info/RECORD +386 -0
  322. {synapse_sdk-1.0.0a13.dist-info → synapse_sdk-2025.11.7.dist-info}/WHEEL +1 -1
  323. synapse_sdk/clients/backend/dataset.py +0 -51
  324. synapse_sdk/plugins/categories/import/actions/import.py +0 -10
  325. synapse_sdk/plugins/cli/__init__.py +0 -21
  326. synapse_sdk/plugins/cli/publish.py +0 -37
  327. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env +0 -24
  328. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/.env.dist +0 -24
  329. synapse_sdk/plugins/templates/synapse-{{cookiecutter.plugin_code}}-plugin/main.py +0 -4
  330. synapse_sdk/plugins/utils.py +0 -50
  331. synapse_sdk/utils/file.py +0 -87
  332. synapse_sdk/utils/storage.py +0 -91
  333. synapse_sdk-1.0.0a13.dist-info/METADATA +0 -43
  334. synapse_sdk-1.0.0a13.dist-info/RECORD +0 -111
  335. /synapse_sdk/{plugins/categories/import → clients/validators}/__init__.py +0 -0
  336. /synapse_sdk/{plugins/categories/import/actions → devtools}/__init__.py +0 -0
  337. {synapse_sdk-1.0.0a13.dist-info → synapse_sdk-2025.11.7.dist-info}/entry_points.txt +0 -0
  338. {synapse_sdk-1.0.0a13.dist-info → synapse_sdk-2025.11.7.dist-info/licenses}/LICENSE +0 -0
  339. {synapse_sdk-1.0.0a13.dist-info → synapse_sdk-2025.11.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,258 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from typing import Dict, List
4
+
5
+ from ..base import FileDiscoveryStrategy
6
+
7
+
8
+ class FlatFileDiscoveryStrategy(FileDiscoveryStrategy):
9
+ """Non-recursive file discovery strategy."""
10
+
11
+ def discover(self, path: Path, recursive: bool) -> List[Path]:
12
+ """Discover files non-recursively in the given path."""
13
+ # Exclude system files
14
+ excluded_files = {'.DS_Store', 'Thumbs.db', 'desktop.ini'}
15
+ return [
16
+ file_path for file_path in path.glob('*') if file_path.is_file() and file_path.name not in excluded_files
17
+ ]
18
+
19
+ def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
20
+ """Organize files according to specifications with metadata."""
21
+ organized_files = []
22
+
23
+ # Use provided type_dirs or create fallback mapping
24
+ if type_dirs is None:
25
+ type_dirs = {}
26
+ for spec in specs:
27
+ spec_name = spec['name']
28
+ # Fallback: extract spec directory from file paths
29
+ for file_path in files:
30
+ # Check if this file's path contains the spec_name as a directory
31
+ path_parts = file_path.parts
32
+ if spec_name in path_parts:
33
+ # Find the index of spec_name and reconstruct the path up to that directory
34
+ spec_index = path_parts.index(spec_name)
35
+ spec_dir = Path(*path_parts[: spec_index + 1])
36
+ if spec_dir.exists() and spec_dir.is_dir():
37
+ type_dirs[spec_name] = spec_dir
38
+ break
39
+
40
+ if not type_dirs:
41
+ return organized_files
42
+
43
+ # Performance optimization 2: Build metadata index for faster lookups
44
+ metadata_index = self._build_metadata_index(metadata)
45
+
46
+ # Group files by dataset_key (stem-based matching) - flat discovery (no subdirectories)
47
+ # Strategy:
48
+ # 1. Group all files (required + optional) by their file stem
49
+ # 2. Only create data units for groups that have ALL required files
50
+ # 3. Optional files are automatically included if they match the stem
51
+ dataset_files = {}
52
+ required_specs = [spec['name'] for spec in specs if spec.get('is_required', False)]
53
+ optional_specs = [spec['name'] for spec in specs if not spec.get('is_required', False)]
54
+
55
+ for file_path in files:
56
+ # Determine which type directory this file belongs to
57
+ for spec_name, dir_path in type_dirs.items():
58
+ if file_path.parent == dir_path: # Only direct children
59
+ file_name = file_path.stem
60
+
61
+ if file_name not in dataset_files:
62
+ dataset_files[file_name] = {}
63
+
64
+ if spec_name not in dataset_files[file_name]:
65
+ dataset_files[file_name][spec_name] = file_path
66
+ else:
67
+ # Keep the most recent file - only stat when needed
68
+ existing_file = dataset_files[file_name][spec_name]
69
+ try:
70
+ if file_path.stat().st_mtime > existing_file.stat().st_mtime:
71
+ dataset_files[file_name][spec_name] = file_path
72
+ except (OSError, IOError):
73
+ # If stat fails, keep existing file
74
+ pass
75
+
76
+ # Create organized files ONLY for datasets with ALL required files
77
+ # Optional files are included automatically if they match the stem
78
+ for file_name, files_dict in sorted(dataset_files.items()):
79
+ # Check if all required files are present
80
+ has_all_required = all(req in files_dict for req in required_specs)
81
+
82
+ if has_all_required:
83
+ # Extract original file stem from actual file paths (more reliable)
84
+ # Collect stems from all files in the group
85
+ file_stems = {}
86
+ file_extensions = {}
87
+
88
+ for file_path in files_dict.values():
89
+ stem = file_path.stem
90
+ ext = file_path.suffix.lower()
91
+
92
+ # Count stems (to handle multiple files with slightly different names)
93
+ if stem:
94
+ file_stems[stem] = file_stems.get(stem, 0) + 1
95
+
96
+ # Count extensions
97
+ if ext:
98
+ file_extensions[ext] = file_extensions.get(ext, 0) + 1
99
+
100
+ # Use the most common stem (usually they're all the same)
101
+ original_stem = max(file_stems, key=file_stems.get) if file_stems else file_name
102
+ origin_file_extension = max(file_extensions, key=file_extensions.get) if file_extensions else ''
103
+
104
+ meta_data = {
105
+ 'origin_file_stem': original_stem,
106
+ 'origin_file_extension': origin_file_extension,
107
+ 'created_at': datetime.now().isoformat(),
108
+ }
109
+
110
+ # Add metadata if available - using optimized index lookup
111
+ if metadata_index:
112
+ matched_metadata = self._find_matching_metadata_optimized(file_name, files_dict, metadata_index)
113
+ if matched_metadata:
114
+ meta_data.update(matched_metadata)
115
+
116
+ organized_files.append({'files': files_dict, 'meta': meta_data})
117
+
118
+ return organized_files
119
+
120
+ def _build_metadata_index(self, metadata: Dict) -> Dict:
121
+ """Build metadata index for faster lookups."""
122
+ if not metadata:
123
+ return {}
124
+
125
+ metadata_index = {'exact_stem': {}, 'exact_name': {}, 'stem_lookup': {}, 'partial_paths': {}, 'full_paths': {}}
126
+
127
+ for meta_key, meta_value in metadata.items():
128
+ meta_path = Path(meta_key)
129
+
130
+ # Index by stem
131
+ stem = meta_path.stem
132
+ if stem:
133
+ metadata_index['exact_stem'][stem] = meta_value
134
+ metadata_index['stem_lookup'][stem] = meta_value
135
+
136
+ # Index by full name
137
+ name = meta_path.name
138
+ if name:
139
+ metadata_index['exact_name'][name] = meta_value
140
+
141
+ # Index for partial path matching
142
+ metadata_index['partial_paths'][meta_key] = meta_value
143
+
144
+ # Index for full path matching
145
+ metadata_index['full_paths'][meta_key] = meta_value
146
+
147
+ return metadata_index
148
+
149
+ def _find_matching_metadata_optimized(self, file_name: str, files_dict: Dict, metadata_index: Dict) -> Dict:
150
+ """Find matching metadata using optimized index lookups."""
151
+ if not metadata_index:
152
+ return {}
153
+
154
+ # Strategy 1: Exact stem match (O(1) lookup)
155
+ if file_name in metadata_index['exact_stem']:
156
+ return metadata_index['exact_stem'][file_name]
157
+
158
+ # Strategy 2: Exact filename match with extension (O(1) lookup)
159
+ sample_file = list(files_dict.values())[0] if files_dict else None
160
+ if sample_file:
161
+ full_filename = f'{file_name}{sample_file.suffix}'
162
+ if full_filename in metadata_index['exact_name']:
163
+ return metadata_index['exact_name'][full_filename]
164
+
165
+ # Try sample file name
166
+ sample_filename = sample_file.name
167
+ if sample_filename in metadata_index['exact_name']:
168
+ return metadata_index['exact_name'][sample_filename]
169
+
170
+ # Strategy 3: Stem lookup (already optimized above)
171
+ # This is covered by exact_stem lookup
172
+
173
+ # Strategy 4 & 5: Partial and full path matching (fallback to original logic for complex cases)
174
+ if sample_file:
175
+ file_path_str = str(sample_file)
176
+ file_path_posix = sample_file.as_posix()
177
+
178
+ # Check partial paths
179
+ for meta_key in metadata_index['partial_paths']:
180
+ if (
181
+ meta_key in file_path_str
182
+ or meta_key in file_path_posix
183
+ or file_path_str in meta_key
184
+ or file_path_posix in meta_key
185
+ ):
186
+ return metadata_index['partial_paths'][meta_key]
187
+
188
+ return {}
189
+
190
+ def _find_matching_metadata(self, file_name: str, files_dict: Dict, metadata: Dict) -> Dict:
191
+ """Find matching metadata using comprehensive pattern matching.
192
+
193
+ Matching priority:
194
+ 1. Exact stem match (highest priority)
195
+ 2. Exact filename match (with extension)
196
+ 3. Metadata key stem matches file stem
197
+ 4. Partial path matching
198
+ 5. Full path matching
199
+ """
200
+ if not metadata:
201
+ return {}
202
+
203
+ # Get sample file for extension and path information
204
+ sample_file = list(files_dict.values())[0] if files_dict else None
205
+
206
+ # Strategy 1: Exact stem match (highest priority)
207
+ if file_name in metadata:
208
+ return metadata[file_name]
209
+
210
+ # Strategy 2: Exact filename match (with extension)
211
+ if sample_file:
212
+ full_filename = f'{file_name}{sample_file.suffix}'
213
+ if full_filename in metadata:
214
+ return metadata[full_filename]
215
+
216
+ # Also try with sample file name
217
+ sample_filename = sample_file.name
218
+ if sample_filename in metadata:
219
+ return metadata[sample_filename]
220
+
221
+ # Strategy 3: Metadata key stem matches file stem
222
+ for meta_key in metadata.keys():
223
+ meta_stem = Path(meta_key).stem
224
+ if meta_stem == file_name:
225
+ return metadata[meta_key]
226
+
227
+ # Strategy 4: Partial path matching
228
+ if sample_file:
229
+ file_path_parts = sample_file.parts
230
+ for meta_key in metadata.keys():
231
+ meta_path = Path(meta_key)
232
+ # Check if any part of the metadata key matches our file path parts
233
+ for part in file_path_parts:
234
+ if part in str(meta_path) or str(meta_path) in part:
235
+ # Additional validation: ensure it's a reasonable match
236
+ if meta_path.stem == file_name or meta_path.name == sample_file.name or part == meta_path.stem:
237
+ return metadata[meta_key]
238
+
239
+ # Strategy 5: Full path matching
240
+ if sample_file:
241
+ full_path_str = str(sample_file)
242
+ full_path_posix = sample_file.as_posix()
243
+
244
+ for meta_key in metadata.keys():
245
+ # Direct path match
246
+ if meta_key == full_path_str or meta_key == full_path_posix:
247
+ return metadata[meta_key]
248
+
249
+ # Relative path match (check if meta_key is contained in our path)
250
+ if meta_key in full_path_str or meta_key in full_path_posix:
251
+ return metadata[meta_key]
252
+
253
+ # Reverse match (check if our path is contained in meta_key)
254
+ if full_path_str in meta_key or full_path_posix in meta_key:
255
+ return metadata[meta_key]
256
+
257
+ # No match found
258
+ return {}
@@ -0,0 +1,281 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from typing import Dict, List
4
+
5
+ from ..base import FileDiscoveryStrategy
6
+
7
+
8
+ class RecursiveFileDiscoveryStrategy(FileDiscoveryStrategy):
9
+ """Recursive file discovery strategy."""
10
+
11
+ def discover(self, path: Path, recursive: bool) -> List[Path]:
12
+ """Discover files recursively in the given path."""
13
+ # Exclude system directories
14
+ excluded_dirs = {'@eaDir', '.@__thumb', '@Recycle', '#recycle', '.DS_Store', 'Thumbs.db', '.synology'}
15
+
16
+ def exclude_dirs(file_path: Path) -> bool:
17
+ """Check if file path contains excluded directories."""
18
+ return any(excluded_dir in file_path.parts for excluded_dir in excluded_dirs)
19
+
20
+ return [file_path for file_path in path.rglob('*') if file_path.is_file() and not exclude_dirs(file_path)]
21
+
22
+ def organize(self, files: List[Path], specs: Dict, metadata: Dict, type_dirs: Dict = None) -> List[Dict]:
23
+ """Organize files according to specifications with metadata."""
24
+ organized_files = []
25
+
26
+ # Use provided type_dirs or create fallback mapping
27
+ if type_dirs is None:
28
+ type_dirs = {}
29
+ for spec in specs:
30
+ spec_name = spec['name']
31
+ # Fallback: extract spec directory from file paths
32
+ for file_path in files:
33
+ # Check if this file's path contains the spec_name as a directory
34
+ path_parts = file_path.parts
35
+ if spec_name in path_parts:
36
+ # Find the index of spec_name and reconstruct the path up to that directory
37
+ spec_index = path_parts.index(spec_name)
38
+ spec_dir = Path(*path_parts[: spec_index + 1])
39
+ if spec_dir.exists() and spec_dir.is_dir():
40
+ type_dirs[spec_name] = spec_dir
41
+ break
42
+
43
+ if not type_dirs:
44
+ return organized_files
45
+
46
+ # Performance optimization 1: Path caching - avoid repeated string conversions
47
+ path_cache = {dir_path: str(dir_path) for dir_path in type_dirs.values()}
48
+
49
+ # Performance optimization 2: Build metadata index for faster lookups
50
+ metadata_index = self._build_metadata_index(metadata)
51
+
52
+ # Group files by dataset_key (stem-based matching)
53
+ # Strategy:
54
+ # 1. Group all files (required + optional) by their file stem
55
+ # 2. Only create data units for groups that have ALL required files
56
+ # 3. Optional files are automatically included if they match the stem
57
+ dataset_files = {}
58
+ required_specs = [spec['name'] for spec in specs if spec.get('is_required', False)]
59
+
60
+ for file_path in files:
61
+ # Determine which type directory this file belongs to
62
+ matched = False
63
+ for spec_name, dir_path in type_dirs.items():
64
+ # Check if file is under this spec's directory
65
+ # Use try/except for relative_to to ensure proper path matching
66
+ try:
67
+ relative_path = file_path.relative_to(dir_path)
68
+ matched = True
69
+ except ValueError:
70
+ # File is not under this directory
71
+ continue
72
+
73
+ # Create unique dataset key using relative path from spec directory
74
+ # Use parent directory + stem as unique key to group related files
75
+ if relative_path.parent != Path('.'):
76
+ dataset_key = f'{relative_path.parent}_{file_path.stem}'
77
+ else:
78
+ dataset_key = file_path.stem
79
+
80
+ if dataset_key not in dataset_files:
81
+ dataset_files[dataset_key] = {}
82
+
83
+ if spec_name not in dataset_files[dataset_key]:
84
+ dataset_files[dataset_key][spec_name] = file_path
85
+ else:
86
+ # Keep the most recent file - only stat when needed
87
+ existing_file = dataset_files[dataset_key][spec_name]
88
+ try:
89
+ if file_path.stat().st_mtime > existing_file.stat().st_mtime:
90
+ dataset_files[dataset_key][spec_name] = file_path
91
+ except (OSError, IOError):
92
+ # If stat fails, keep existing file
93
+ pass
94
+
95
+ # Found matching directory, move to next file
96
+ break
97
+
98
+ # Create organized files ONLY for datasets with ALL required files
99
+ # Optional files are included automatically if they match the stem
100
+ for dataset_key, files_dict in sorted(dataset_files.items()):
101
+ # Check if all required files are present
102
+ has_all_required = all(req in files_dict for req in required_specs)
103
+
104
+ if has_all_required:
105
+ # Extract original file stem from actual file paths (more reliable than parsing dataset_key)
106
+ # Collect stems from all files in the group
107
+ file_stems = {}
108
+ file_extensions = {}
109
+
110
+ for file_path in files_dict.values():
111
+ stem = file_path.stem
112
+ ext = file_path.suffix.lower()
113
+
114
+ # Count stems (to handle multiple files with slightly different names)
115
+ if stem:
116
+ file_stems[stem] = file_stems.get(stem, 0) + 1
117
+
118
+ # Count extensions
119
+ if ext:
120
+ file_extensions[ext] = file_extensions.get(ext, 0) + 1
121
+
122
+ # Use the most common stem (usually they're all the same)
123
+ original_stem = max(file_stems, key=file_stems.get) if file_stems else dataset_key
124
+ origin_file_extension = max(file_extensions, key=file_extensions.get) if file_extensions else ''
125
+
126
+ meta_data = {
127
+ 'origin_file_stem': original_stem,
128
+ 'origin_file_extension': origin_file_extension,
129
+ 'created_at': datetime.now().isoformat(),
130
+ 'dataset_key': dataset_key, # Add dataset key for debugging
131
+ }
132
+
133
+ # Add metadata if available - using optimized index lookup
134
+ if metadata_index:
135
+ matched_metadata = self._find_matching_metadata_optimized(original_stem, files_dict, metadata_index)
136
+ if matched_metadata:
137
+ meta_data.update(matched_metadata)
138
+
139
+ organized_files.append({'files': files_dict, 'meta': meta_data})
140
+
141
+ return organized_files
142
+
143
+ def _build_metadata_index(self, metadata: Dict) -> Dict:
144
+ """Build metadata index for faster lookups."""
145
+ if not metadata:
146
+ return {}
147
+
148
+ metadata_index = {'exact_stem': {}, 'exact_name': {}, 'stem_lookup': {}, 'partial_paths': {}, 'full_paths': {}}
149
+
150
+ for meta_key, meta_value in metadata.items():
151
+ meta_path = Path(meta_key)
152
+
153
+ # Index by stem
154
+ stem = meta_path.stem
155
+ if stem:
156
+ metadata_index['exact_stem'][stem] = meta_value
157
+ metadata_index['stem_lookup'][stem] = meta_value
158
+
159
+ # Index by full name
160
+ name = meta_path.name
161
+ if name:
162
+ metadata_index['exact_name'][name] = meta_value
163
+
164
+ # Index for partial path matching
165
+ metadata_index['partial_paths'][meta_key] = meta_value
166
+
167
+ # Index for full path matching
168
+ metadata_index['full_paths'][meta_key] = meta_value
169
+
170
+ return metadata_index
171
+
172
+ def _find_matching_metadata_optimized(self, file_name: str, files_dict: Dict, metadata_index: Dict) -> Dict:
173
+ """Find matching metadata using optimized index lookups."""
174
+ if not metadata_index:
175
+ return {}
176
+
177
+ # Strategy 1: Exact stem match (O(1) lookup)
178
+ if file_name in metadata_index['exact_stem']:
179
+ return metadata_index['exact_stem'][file_name]
180
+
181
+ # Strategy 2: Exact filename match with extension (O(1) lookup)
182
+ sample_file = list(files_dict.values())[0] if files_dict else None
183
+ if sample_file:
184
+ full_filename = f'{file_name}{sample_file.suffix}'
185
+ if full_filename in metadata_index['exact_name']:
186
+ return metadata_index['exact_name'][full_filename]
187
+
188
+ # Try sample file name
189
+ sample_filename = sample_file.name
190
+ if sample_filename in metadata_index['exact_name']:
191
+ return metadata_index['exact_name'][sample_filename]
192
+
193
+ # Strategy 3: Stem lookup (already optimized above)
194
+ # This is covered by exact_stem lookup
195
+
196
+ # Strategy 4 & 5: Partial and full path matching (fallback to original logic for complex cases)
197
+ if sample_file:
198
+ file_path_str = str(sample_file)
199
+ file_path_posix = sample_file.as_posix()
200
+
201
+ # Check partial paths
202
+ for meta_key in metadata_index['partial_paths']:
203
+ if (
204
+ meta_key in file_path_str
205
+ or meta_key in file_path_posix
206
+ or file_path_str in meta_key
207
+ or file_path_posix in meta_key
208
+ ):
209
+ return metadata_index['partial_paths'][meta_key]
210
+
211
+ return {}
212
+
213
+ def _find_matching_metadata(self, file_name: str, files_dict: Dict, metadata: Dict) -> Dict:
214
+ """Find matching metadata using comprehensive pattern matching.
215
+
216
+ Matching priority:
217
+ 1. Exact stem match (highest priority)
218
+ 2. Exact filename match (with extension)
219
+ 3. Metadata key stem matches file stem
220
+ 4. Partial path matching
221
+ 5. Full path matching
222
+ """
223
+ if not metadata:
224
+ return {}
225
+
226
+ # Get sample file for extension and path information
227
+ sample_file = list(files_dict.values())[0] if files_dict else None
228
+
229
+ # Strategy 1: Exact stem match (highest priority)
230
+ if file_name in metadata:
231
+ return metadata[file_name]
232
+
233
+ # Strategy 2: Exact filename match (with extension)
234
+ if sample_file:
235
+ full_filename = f'{file_name}{sample_file.suffix}'
236
+ if full_filename in metadata:
237
+ return metadata[full_filename]
238
+
239
+ # Also try with sample file name
240
+ sample_filename = sample_file.name
241
+ if sample_filename in metadata:
242
+ return metadata[sample_filename]
243
+
244
+ # Strategy 3: Metadata key stem matches file stem
245
+ for meta_key in metadata.keys():
246
+ meta_stem = Path(meta_key).stem
247
+ if meta_stem == file_name:
248
+ return metadata[meta_key]
249
+
250
+ # Strategy 4: Partial path matching
251
+ if sample_file:
252
+ file_path_parts = sample_file.parts
253
+ for meta_key in metadata.keys():
254
+ meta_path = Path(meta_key)
255
+ # Check if any part of the metadata key matches our file path parts
256
+ for part in file_path_parts:
257
+ if part in str(meta_path) or str(meta_path) in part:
258
+ # Additional validation: ensure it's a reasonable match
259
+ if meta_path.stem == file_name or meta_path.name == sample_file.name or part == meta_path.stem:
260
+ return metadata[meta_key]
261
+
262
+ # Strategy 5: Full path matching
263
+ if sample_file:
264
+ full_path_str = str(sample_file)
265
+ full_path_posix = sample_file.as_posix()
266
+
267
+ for meta_key in metadata.keys():
268
+ # Direct path match
269
+ if meta_key == full_path_str or meta_key == full_path_posix:
270
+ return metadata[meta_key]
271
+
272
+ # Relative path match (check if meta_key is contained in our path)
273
+ if meta_key in full_path_str or meta_key in full_path_posix:
274
+ return metadata[meta_key]
275
+
276
+ # Reverse match (check if our path is contained in meta_key)
277
+ if full_path_str in meta_key or full_path_posix in meta_key:
278
+ return metadata[meta_key]
279
+
280
+ # No match found
281
+ return {}
@@ -0,0 +1 @@
1
+ # Metadata strategy implementations