snowflake-data-migration-orchestrator 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake_data_migration_orchestrator-0.2.0/.coveragerc +23 -0
- snowflake_data_migration_orchestrator-0.2.0/.dockerignore +18 -0
- snowflake_data_migration_orchestrator-0.2.0/.env.example +13 -0
- snowflake_data_migration_orchestrator-0.2.0/.gitignore +210 -0
- snowflake_data_migration_orchestrator-0.2.0/CONTRIBUTING.md +388 -0
- snowflake_data_migration_orchestrator-0.2.0/Dockerfile +30 -0
- snowflake_data_migration_orchestrator-0.2.0/Jenkinsfile-push-image +231 -0
- snowflake_data_migration_orchestrator-0.2.0/PKG-INFO +567 -0
- snowflake_data_migration_orchestrator-0.2.0/README.md +537 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/checksum-computation.md +220 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/diagrams/module-dependencies.mmd +16 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/diagrams/module-dependencies.png +0 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/iceberg-migration-support.md +910 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/incremental-sync.md +211 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/public/example-workflows/example-workflow-config-incremental-sync.json +21 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/public/example-workflows/example-workflow-config-redshift-iceberg.json +58 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/public/example-workflows/example-workflow-config-redshift-unload.json +40 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/public/example-workflows/example-workflow-config.json +45 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/public/high-level-diagram.png +0 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/schema-migrations.md +694 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/scopes.md +30 -0
- snowflake_data_migration_orchestrator-0.2.0/docs/task-model.md +117 -0
- snowflake_data_migration_orchestrator-0.2.0/pyproject.toml +168 -0
- snowflake_data_migration_orchestrator-0.2.0/ruff.toml +103 -0
- snowflake_data_migration_orchestrator-0.2.0/scripts/build-wheel.sh +100 -0
- snowflake_data_migration_orchestrator-0.2.0/scripts/create-workflow.sh +54 -0
- snowflake_data_migration_orchestrator-0.2.0/scripts/upload-orchestrator.sh +45 -0
- snowflake_data_migration_orchestrator-0.2.0/scripts/validate_imports.py +342 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/__init__.py +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/__main__.py +56 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/__version__.py +16 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/chunk_hashes.sql +12 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/data_validation_schema.sql +4 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/file_formats/csv_file_format.sql +5 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/metrics_validation_results.sql +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/partition_metadata.sql +12 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/partition_metadata_id_sequence.sql +3 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/procedures/get_partition_metadata.sql +30 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/procedures/get_table_configuration.sql +16 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/procedures/insert_partition_metadata.sql +22 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/procedures/insert_table_metadata.sql +35 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/procedures/update_partition_validation_status.sql +16 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/procedures/update_table_validation_status.sql +16 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/row_validation_results.sql +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/row_validation_summary.sql +15 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/schema_validation_results.sql +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/stages/task_results_stage.sql +3 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/table_metadata.sql +12 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/data-validation/table_metadata_id_sequence.sql +3 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/file-formats/csv_file_format.sql +5 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/file-formats/parquet_file_format.sql +4 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/partition_metadata.sql +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/partition_metadata_id_sequence.sql +3 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/procedures/copy_partition_metadata.sql +44 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/procedures/copy_table_metadata.sql +36 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/procedures/find_existing_table_metadata.sql +21 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/procedures/get_partition_metadata.sql +36 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/procedures/get_table_configuration.sql +16 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/procedures/insert_partition_metadata.sql +20 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/procedures/insert_table_metadata.sql +18 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/table_metadata.sql +10 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/table_metadata_id_sequence.sql +3 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/views/data_migration_error.sql +31 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/views/table_progress.sql +15 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/table-metadata/views/table_progress_with_example_error.sql +24 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/data_migration_schema.sql +4 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/check_tasks.sql +20 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/complete_task.sql +24 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/copy_parquet_into_table.sql +43 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/create_table.sql +70 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/expire_leases.sql +30 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/fail_task.sql +35 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/has_workflow_started.sql +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/pull_many_tasks.sql +59 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/pull_single_task.sql +58 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/pull_tasks.sql +28 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/push_task_text.sql +35 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/push_task_variant.sql +54 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/refresh_leases.sql +18 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/procedures/unblock_tasks.sql +46 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/schema_migration_table.sql +7 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/snowconvert_ai_database.sql +4 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/task_id_sequence.sql +2 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/task_queue.sql +34 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/task-queue/task_results.sql +3 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/workflows/example-workflow.json +17 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/workflows/procedures/complete_workflows.sql +42 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/workflows/procedures/delete_workflow_tasks.sql +24 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/workflows/procedures/expire_stale_initializations.sql +35 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/workflows/procedures/get_pending_workflows.sql +65 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/workflows/procedures/retry_workflow.sql +32 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/assets/scripts/workflows/workflow.sql +20 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/__init__.py +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/connection_manager.py +161 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/constants/__init__.py +12 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/constants/file_formats.py +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/constants/priorities.py +13 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/exceptions/__init__.py +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/exceptions/queueing_error.py +27 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/exceptions/unsupported_payload_kind_error.py +32 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/observability/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/observability/logging_config.py +184 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/observability/telemetry_lifecycle.py +113 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/orchestrator_affinity.py +19 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/snowflake_account.py +4 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/stored_procedure_calls.py +17 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/__init__.py +20 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/constants/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/constants/task_columns.py +7 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/executor_type.py +13 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/queues/__init__.py +12 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/queues/base_task_queue.py +428 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/queues/table_task_queue.py +708 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/snowflake_query_task.py +31 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/task.py +94 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/task_builder.py +58 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/tasks/task_status.py +16 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/warehouse.py +128 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/workflows/__init__.py +12 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/workflows/base_workflow_initializer.py +48 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/workflows/stored_procedures_calls.py +12 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/workflows/workflow.py +66 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/workflows/workflow_columns.py +19 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_core/workflows/workflow_status.py +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/__init__.py +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/exceptions/__init__.py +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/exceptions/unsupported_workflow_type_error.py +25 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/orchestrator.py +432 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/schema/__init__.py +7 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/schema/migrations/__init__.py +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/schema/migrations/base_migration.py +68 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/schema/migrations/migration_0001_initial.py +189 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/schema/migrations/migration_0002_data_validation.py +126 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/schema/run_schema_migrations.py +30 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/schema/schema_manager.py +260 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/worker_context.py +33 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/workflows/__init__.py +7 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/workflows/workflow_initializer_factory.py +110 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/cloud_orchestrator/workflows/workflow_type.py +26 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/commands/__init__.py +4 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/commands/create_data_migration_workflow.py +172 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/commands/start.py +164 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/__init__.py +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/config/__init__.py +20 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/config/configuration_parser.py +730 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/config/configuration_properties_keys.py +53 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/config/extraction_strategy.py +34 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/config/sync_strategy.py +53 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/config/table_configuration.py +397 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/config/workflow_configuration_schema.py +340 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/constants/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/constants/metadata.py +20 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/constants/priorities.py +22 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/constants/stages.py +5 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/dashboard/__init__.py +8 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/dashboard/app.py +453 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/dashboard/environment.yml +6 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/dashboard/streamlit_deployer.py +182 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/data_migration_workflow_initializer.py +1037 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/exceptions/__init__.py +10 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/exceptions/table_not_found_error.py +63 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/exceptions/unsupported_extraction_strategy_error.py +29 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/__init__.py +81 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/base_platform.py +243 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/base_query_builder.py +540 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/platform_registry.py +148 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/postgresql/__init__.py +25 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/postgresql/identifiers.py +66 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/postgresql/platform.py +85 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/postgresql/query_builder.py +240 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/redshift/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/redshift/checksum.py +208 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/redshift/identifiers.py +66 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/redshift/platform.py +85 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/redshift/query_builder.py +479 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/snowflake/__init__.py +14 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/snowflake/identifiers.py +68 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/snowflake/query_builder.py +193 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/sqlserver/__init__.py +25 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/sqlserver/checksum.py +230 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/sqlserver/identifiers.py +189 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/sqlserver/platform.py +108 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/sqlserver/query_builder.py +341 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/platforms/sqlserver/type_mappings.py +53 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/scope.py +227 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/services/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/services/batch_size_calculator.py +70 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/services/schema_validator.py +345 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/strategies/WIP_unload_infrastructure_setup.py +206 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/__init__.py +10 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/checksum_task_creator.py +142 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/base_task_handler.py +75 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/checksum_models.py +58 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/clear_partition_data_handler.py +198 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/compare_partition_checksum_handler.py +283 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/complete_partition_migration_handler.py +299 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/create_partition_tasks_handler.py +447 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/deduplicate_partition_data_handler.py +365 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/partition_boundary_generator.py +274 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/partition_calculator.py +151 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/partition_reuse_service.py +243 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/partition_strategy_handler.py +455 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/process_checksum_result_handler.py +362 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/smart_partition_config_resolver.py +182 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/staged_data_handler.py +1168 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/synchronization_config_service.py +140 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/table_configuration_service.py +427 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/workflow_config_service.py +166 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/handlers/workflow_progress_service.py +258 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/models/__init__.py +5 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/models/partition_migration_status.py +28 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/models/synchronization_data.py +207 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/models/task_payload.py +432 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/models/task_payload_kind.py +75 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/task_chain_creator.py +389 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/task_handler_factory.py +167 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/tasks/task_payload_mapper.py +43 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/utils/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/utils/stage_utils.py +617 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_cloud/utils/type_mapping_loader.py +1178 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/queries/__init__.py +22 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/queries/constants.py +16 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/queries/target_schema_queries.py +150 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/type_mappings/__init__.py +50 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/type_mappings/default_mappings.py +62 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/type_mappings/models.py +130 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/type_mappings/postgresql_default_mappings.py +148 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/type_mappings/redshift_default_mappings.py +181 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/type_mappings/redshift_iceberg_default_mappings.py +47 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_migration_core/type_mappings/sqlserver_default_mappings.py +58 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/config/__init__.py +18 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/config/configuration_properties_keys.py +45 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/config/validation_configuration_parser.py +235 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/config/validation_table_configuration.py +109 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/config/workflow_configuration_schema.py +109 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/constants/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/constants/priorities.py +11 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_cloud/constants/stages.py +3 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_core/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/data_validation_core/constants.py +67 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/utils/__init__.py +7 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/utils/base_registry.py +110 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/utils/datetime_utils.py +31 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/utils/lru_cache.py +154 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/utils/snowflake_schema.py +81 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/utils/snowflake_sql_utils.py +38 -0
- snowflake_data_migration_orchestrator-0.2.0/src/data_migration_orchestrator/utils/table_name_parts.py +20 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/__init__.py +0 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/postgresql/__init__.py +0 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/postgresql/test_metadata_queries.py +133 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/redshift/__init__.py +1 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/redshift/test_metadata_queries.py +134 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/redshift/test_platform.py +227 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/redshift/test_query_builder.py +632 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/redshift/test_type_mappings.py +295 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/snowflake/__init__.py +0 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/snowflake/test_identifiers.py +72 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/sqlserver/__init__.py +0 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/sqlserver/test_identifiers.py +267 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/sqlserver/test_query_builder.py +408 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/test_base_platform.py +276 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/platforms/test_platform_registry.py +173 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_base_task_queue.py +411 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_checksum_query_builder.py +106 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_complete_partition_migration_handler.py +197 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_configuration_parser.py +1378 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_create_data_migration_workflow.py +272 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_create_partition_tasks_handler.py +52 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_datetime_utils.py +178 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_deduplicate_partition_data_handler.py +372 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_iceberg_ddl_generation.py +110 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_iceberg_migration_support.py +659 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_incremental_sync_watermark.py +276 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_orchestrator.py +601 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_partition_boundary_generator.py +411 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_partition_calculator.py +117 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_partition_strategy_handler.py +317 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_resilient_initialization.py +244 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_schema_validator.py +146 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_scope.py +516 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_smart_partition_config_resolver.py +137 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_snowflake_sql_utils.py +68 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_stage_utils.py +128 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_staged_data_handler_column_name_mappings.py +96 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_staged_data_handler_partition_status.py +633 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_table_configuration.py +144 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_table_not_found_error.py +137 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_table_task_queue.py +749 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_task_chain_creator.py +222 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_task_columns.py +49 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_task_payloads.py +302 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_type_mappings.py +1964 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_workflow_configuration_schema.py +649 -0
- snowflake_data_migration_orchestrator-0.2.0/tests/test_workflow_progress_service.py +303 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[run]
|
|
2
|
+
source = src
|
|
3
|
+
omit =
|
|
4
|
+
*/__init__.py
|
|
5
|
+
*/__version__.py
|
|
6
|
+
|
|
7
|
+
[report]
|
|
8
|
+
exclude_lines =
|
|
9
|
+
pragma: no cover
|
|
10
|
+
def __repr__
|
|
11
|
+
raise NotImplementedError
|
|
12
|
+
if __name__ == .__main__.:
|
|
13
|
+
pass
|
|
14
|
+
raise ImportError
|
|
15
|
+
|
|
16
|
+
[html]
|
|
17
|
+
directory = tests/outcome/unit-coverage-html
|
|
18
|
+
|
|
19
|
+
[xml]
|
|
20
|
+
output = tests/outcome/unit-coverage.xml
|
|
21
|
+
|
|
22
|
+
[json]
|
|
23
|
+
output = tests/outcome/unit-coverage.json
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Snowflake Connection Configuration
|
|
2
|
+
# Copy this file to .env and fill in your actual values
|
|
3
|
+
|
|
4
|
+
# Required: Basic Snowflake credentials
|
|
5
|
+
SNOWFLAKE_ACCOUNT=your_account
|
|
6
|
+
SNOWFLAKE_USER=your_user
|
|
7
|
+
SNOWFLAKE_PASSWORD=your_password
|
|
8
|
+
|
|
9
|
+
# Optional: Connection settings
|
|
10
|
+
SNOWFLAKE_WAREHOUSE=your_warehouse
|
|
11
|
+
SNOWFLAKE_DATABASE=your_database
|
|
12
|
+
SNOWFLAKE_SCHEMA=your_schema
|
|
13
|
+
SNOWFLAKE_ROLE=your_role
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# DS_Store
|
|
2
|
+
.DS_Store
|
|
3
|
+
**/.DS_Store
|
|
4
|
+
|
|
5
|
+
# version
|
|
6
|
+
!__version__.py
|
|
7
|
+
!**/__version__.py
|
|
8
|
+
|
|
9
|
+
# Byte-compiled / optimized / DLL files
|
|
10
|
+
__pycache__/
|
|
11
|
+
*.py[cod]
|
|
12
|
+
*$py.class
|
|
13
|
+
|
|
14
|
+
# C extensions
|
|
15
|
+
*.so
|
|
16
|
+
|
|
17
|
+
# Sphinx documentation
|
|
18
|
+
docs/build/
|
|
19
|
+
docs/source/**/_autosummary
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Distribution / packaging
|
|
23
|
+
.Python
|
|
24
|
+
build/
|
|
25
|
+
develop-eggs/
|
|
26
|
+
dist/
|
|
27
|
+
downloads/
|
|
28
|
+
eggs/
|
|
29
|
+
.eggs/
|
|
30
|
+
lib/
|
|
31
|
+
lib64/
|
|
32
|
+
parts/
|
|
33
|
+
sdist/
|
|
34
|
+
var/
|
|
35
|
+
wheels/
|
|
36
|
+
share/python-wheels/
|
|
37
|
+
*.egg-info/
|
|
38
|
+
.installed.cfg
|
|
39
|
+
*.egg
|
|
40
|
+
MANIFEST
|
|
41
|
+
|
|
42
|
+
# PyInstaller
|
|
43
|
+
# Usually these files are written by a python script from a template
|
|
44
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
45
|
+
*.manifest
|
|
46
|
+
*.spec
|
|
47
|
+
|
|
48
|
+
# Installer logs
|
|
49
|
+
pip-log.txt
|
|
50
|
+
pip-delete-this-directory.txt
|
|
51
|
+
|
|
52
|
+
# Unit test / coverage reports
|
|
53
|
+
htmlcov/
|
|
54
|
+
.tox/
|
|
55
|
+
.nox/
|
|
56
|
+
.coverage
|
|
57
|
+
.coverage.*
|
|
58
|
+
.cache
|
|
59
|
+
nosetests.xml
|
|
60
|
+
coverage.xml
|
|
61
|
+
*.cover
|
|
62
|
+
*.py,cover
|
|
63
|
+
.hypothesis/
|
|
64
|
+
.pytest_cache/
|
|
65
|
+
cover/
|
|
66
|
+
|
|
67
|
+
# pytest outcome
|
|
68
|
+
*/tests/outcome/
|
|
69
|
+
tests/outcome/**
|
|
70
|
+
test-results.xml
|
|
71
|
+
|
|
72
|
+
# Translations
|
|
73
|
+
*.mo
|
|
74
|
+
*.pot
|
|
75
|
+
|
|
76
|
+
# Django stuff:
|
|
77
|
+
*.log
|
|
78
|
+
logs/
|
|
79
|
+
local_settings.py
|
|
80
|
+
db.sqlite3
|
|
81
|
+
db.sqlite3-journal
|
|
82
|
+
|
|
83
|
+
# Flask stuff:
|
|
84
|
+
instance/
|
|
85
|
+
.webassets-cache
|
|
86
|
+
|
|
87
|
+
# Scrapy stuff:
|
|
88
|
+
.scrapy
|
|
89
|
+
|
|
90
|
+
# Sphinx documentation
|
|
91
|
+
docs/_build/
|
|
92
|
+
|
|
93
|
+
# PyBuilder
|
|
94
|
+
.pybuilder/
|
|
95
|
+
target/
|
|
96
|
+
|
|
97
|
+
# Jupyter Notebook
|
|
98
|
+
.ipynb_checkpoints
|
|
99
|
+
|
|
100
|
+
# IPython
|
|
101
|
+
profile_default/
|
|
102
|
+
ipython_config.py
|
|
103
|
+
|
|
104
|
+
# pyenv
|
|
105
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
106
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
107
|
+
.python-version
|
|
108
|
+
|
|
109
|
+
# pipenv
|
|
110
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
111
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
112
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
113
|
+
# install all needed dependencies.
|
|
114
|
+
#Pipfile.lock
|
|
115
|
+
|
|
116
|
+
# UV
|
|
117
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
118
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
119
|
+
# commonly ignored for libraries.
|
|
120
|
+
#uv.lock
|
|
121
|
+
|
|
122
|
+
# poetry
|
|
123
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
124
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
125
|
+
# commonly ignored for libraries.
|
|
126
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
127
|
+
#poetry.lock
|
|
128
|
+
|
|
129
|
+
# pdm
|
|
130
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
131
|
+
#pdm.lock
|
|
132
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
133
|
+
# in version control.
|
|
134
|
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
135
|
+
.pdm.toml
|
|
136
|
+
.pdm-python
|
|
137
|
+
.pdm-build/
|
|
138
|
+
|
|
139
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
140
|
+
__pypackages__/
|
|
141
|
+
|
|
142
|
+
# Celery stuff
|
|
143
|
+
celerybeat-schedule
|
|
144
|
+
celerybeat.pid
|
|
145
|
+
|
|
146
|
+
# SageMath parsed files
|
|
147
|
+
*.sage.py
|
|
148
|
+
|
|
149
|
+
# Environments
|
|
150
|
+
.env
|
|
151
|
+
.venv
|
|
152
|
+
env/
|
|
153
|
+
venv/
|
|
154
|
+
ENV/
|
|
155
|
+
env.bak/
|
|
156
|
+
venv.bak/
|
|
157
|
+
|
|
158
|
+
# Spyder project settings
|
|
159
|
+
.spyderproject
|
|
160
|
+
.spyproject
|
|
161
|
+
|
|
162
|
+
# Rope project settings
|
|
163
|
+
.ropeproject
|
|
164
|
+
|
|
165
|
+
# mkdocs documentation
|
|
166
|
+
/site
|
|
167
|
+
|
|
168
|
+
# mypy
|
|
169
|
+
.mypy_cache/
|
|
170
|
+
.dmypy.json
|
|
171
|
+
dmypy.json
|
|
172
|
+
|
|
173
|
+
# Pyre type checker
|
|
174
|
+
.pyre/
|
|
175
|
+
|
|
176
|
+
# pytype static type analyzer
|
|
177
|
+
.pytype/
|
|
178
|
+
|
|
179
|
+
# Cython debug symbols
|
|
180
|
+
cython_debug/
|
|
181
|
+
|
|
182
|
+
# PyCharm
|
|
183
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
184
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
185
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
186
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
187
|
+
.idea/
|
|
188
|
+
|
|
189
|
+
# PyPI configuration file
|
|
190
|
+
.pypirc
|
|
191
|
+
|
|
192
|
+
# ruff cache
|
|
193
|
+
.ruff_cache
|
|
194
|
+
|
|
195
|
+
# Requirements file generated by pip-compile
|
|
196
|
+
requirements.txt
|
|
197
|
+
|
|
198
|
+
# Data Exchange Agent
|
|
199
|
+
data-exchange-agent/src/mocked_api/*
|
|
200
|
+
.vscode
|
|
201
|
+
|
|
202
|
+
# Rye lock files (not committed for library projects)
|
|
203
|
+
requirements.lock
|
|
204
|
+
requirements-dev.lock
|
|
205
|
+
|
|
206
|
+
# E2E test session config (contains credentials)
|
|
207
|
+
e2e-tests/config.yaml
|
|
208
|
+
|
|
209
|
+
# Temporary files
|
|
210
|
+
temp/
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
# CONTRIBUTING
|
|
2
|
+
|
|
3
|
+
## Code Organization
|
|
4
|
+
Data Migration Orchestrator is organized in these main modules:
|
|
5
|
+
- `utils`: Utils that can be imported from anywhere.
|
|
6
|
+
- `cloud_core`: Core for Cloud operations, which includes definitions for Workflows and Tasks.
|
|
7
|
+
- `data_migration_core`: Core for Data Migration, which includes platform-agnostic logic.
|
|
8
|
+
- `data_migration_cloud`: Workflows Initializers, Task, and Task Handlers for Data Migration Workflows.
|
|
9
|
+
- `cloud_orchestrator`: The Orchestrator itself, which consumes the other modules to support the execution of Workflows of any kind.
|
|
10
|
+
|
|
11
|
+
These modules follow a layered architecture with strict import rules. An arrow from module A to module B means that A can import from B. Colors represent the architecture layers:
|
|
12
|
+
- 🔵 **Blue**: Orchestration layer (`cloud_orchestrator`)
|
|
13
|
+
- 🟢 **Green**: Migration logic layer (`data_migration_cloud`)
|
|
14
|
+
- 🟠 **Orange**: Core components layer (`cloud_core`, `data_migration_core`)
|
|
15
|
+
- 🟣 **Purple**: Foundation layer (`utils`)
|
|
16
|
+
|
|
17
|
+

|
|
18
|
+
|
|
19
|
+
## Commands
|
|
20
|
+
|
|
21
|
+
The orchestrator CLI supports multiple subcommands. When no subcommand is given, `start` is assumed for backward compatibility.
|
|
22
|
+
|
|
23
|
+
### Starting the Orchestrator
|
|
24
|
+
```bash
|
|
25
|
+
hatch run start
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Creating a Workflow
|
|
29
|
+
Validate a workflow configuration file and insert it into the `WORKFLOW` table:
|
|
30
|
+
```bash
|
|
31
|
+
hatch run create-workflow <config-file> [--connection-name <name>] [--name <workflow-name>] [--source-platform <platform>]
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
| Option | Default | Description |
|
|
35
|
+
|--------|---------|-------------|
|
|
36
|
+
| `config_file` | *(required)* | Path to the JSON workflow configuration file |
|
|
37
|
+
| `--connection-name` | env vars | Snowflake connection name (see [Connection Management](#connection-management)) |
|
|
38
|
+
| `--name` | `MY_WORKFLOW` | Name for the workflow record |
|
|
39
|
+
| `--source-platform` | `sqlserver` | Source platform (`sqlserver`, `redshift`) |
|
|
40
|
+
|
|
41
|
+
The command reads `affinity` from the configuration file itself (the top-level `affinity` field).
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
```bash
|
|
45
|
+
hatch run create-workflow workflow-config.json --connection-name MY_CONN --name prod_migration
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Linting
|
|
49
|
+
Run linting with [ruff](https://docs.astral.sh/ruff/):
|
|
50
|
+
```bash
|
|
51
|
+
# Run lint checks (with auto-fix)
|
|
52
|
+
hatch run linter:check
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Type Checking
|
|
56
|
+
Run static type checking with [ty](https://docs.astral.sh/ty/) (extremely fast Python type checker from Astral):
|
|
57
|
+
```bash
|
|
58
|
+
# Check all source code
|
|
59
|
+
hatch run types:check-ty
|
|
60
|
+
|
|
61
|
+
# Check specific path
|
|
62
|
+
hatch run types:check-ty tests
|
|
63
|
+
|
|
64
|
+
# Watch mode - automatically re-checks on file changes
|
|
65
|
+
hatch run types:watch-ty
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
You can also run type checking with mypy:
|
|
69
|
+
```bash
|
|
70
|
+
hatch run types:check
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Import Validation
|
|
74
|
+
Check that you're not violating the rules of the dependencies between the main modules described in [Code Organization](#code-organization):
|
|
75
|
+
```bash
|
|
76
|
+
./scripts/validate_imports.py
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### ty Diagnostics Baseline
|
|
80
|
+
|
|
81
|
+
The project uses a **diagnostics baseline** to prevent type error regressions. The CI will fail if the number of `ty` diagnostics increases beyond the baseline.
|
|
82
|
+
|
|
83
|
+
| Project | Baseline |
|
|
84
|
+
|---------|----------|
|
|
85
|
+
| data-migration-orchestrator | 35 |
|
|
86
|
+
|
|
87
|
+
To check diagnostics locally:
|
|
88
|
+
```bash
|
|
89
|
+
python .github/scripts/ty_check_diagnostics.py data-migration-orchestrator
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
If you **fix** type errors and reduce the count, please update the baseline in:
|
|
93
|
+
- `.github/workflows/data-migration-orchestrator-all-ci.yml`
|
|
94
|
+
- `.github/scripts/ty_check_diagnostics.py` (DEFAULT_BASELINES)
|
|
95
|
+
|
|
96
|
+
### Testing
|
|
97
|
+
Run the test suite:
|
|
98
|
+
```bash
|
|
99
|
+
# Run all tests with coverage
|
|
100
|
+
hatch run test_all.py3.11:check
|
|
101
|
+
|
|
102
|
+
# Run tests only
|
|
103
|
+
hatch run test_all.py3.11:unit
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### CI Workflows
|
|
107
|
+
|
|
108
|
+
The following checks run automatically on PRs and pushes to `main`/`develop`:
|
|
109
|
+
|
|
110
|
+
1. **Linting** - Static analysis with ruff
|
|
111
|
+
2. **Type Check** - ty diagnostics baseline check
|
|
112
|
+
3. **Unit Tests** - pytest on Python 3.11 and 3.12 with coverage
|
|
113
|
+
|
|
114
|
+
## Docker
|
|
115
|
+
|
|
116
|
+
### Commands
|
|
117
|
+
Build image:
|
|
118
|
+
```bash
|
|
119
|
+
hatch run docker-build
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Alternatively, build directly with Docker from the **parent directory** (required due to shared module dependencies):
|
|
123
|
+
```bash
|
|
124
|
+
# From the migrations-data-validation directory (parent)
|
|
125
|
+
docker build -f data-migration-orchestrator/Dockerfile -t data-migration-orchestrator:latest .
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Run container:
|
|
129
|
+
```bash
|
|
130
|
+
hatch run docker-run
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Interactive mode (debugging):
|
|
134
|
+
```bash
|
|
135
|
+
hatch run docker-run-interactive
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Setup
|
|
139
|
+
1. Create environment file from template:
|
|
140
|
+
```bash
|
|
141
|
+
cp .env.example .env
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
2. Edit `.env` with your Snowflake credentials:
|
|
145
|
+
```env
|
|
146
|
+
SNOWFLAKE_ACCOUNT=your_account
|
|
147
|
+
SNOWFLAKE_USER=your_user
|
|
148
|
+
SNOWFLAKE_PASSWORD=your_password
|
|
149
|
+
SNOWFLAKE_WAREHOUSE=your_warehouse
|
|
150
|
+
SNOWFLAKE_DATABASE=your_database
|
|
151
|
+
SNOWFLAKE_SCHEMA=your_schema
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
3. Build and run:
|
|
155
|
+
```bash
|
|
156
|
+
hatch run docker-build
|
|
157
|
+
hatch run docker-run
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Connection Management
|
|
161
|
+
The application automatically detects the environment:
|
|
162
|
+
|
|
163
|
+
- **Local**: Uses credentials from `.env` file (via `SNOWFLAKE_ACCOUNT`, `SNOWFLAKE_USER`, `SNOWFLAKE_PASSWORD`, etc.)
|
|
164
|
+
- **SPCS**: Automatically uses Snowflake-injected credentials (OAuth)
|
|
165
|
+
|
|
166
|
+
No code changes needed when deploying to Snowflake Container Services (SPCS).
|
|
167
|
+
|
|
168
|
+
## End-to-End Flow
|
|
169
|
+
For testing the end-to-end flow, with one **Data Migration Orchestrator** running and one (or more) **Data Exchange Agents**, we have the following options:
|
|
170
|
+
1. Running all components locally.
|
|
171
|
+
2. Running all components in SPCS.
|
|
172
|
+
3. Mixed approach: running some components locally and other SPCS.
|
|
173
|
+
|
|
174
|
+
### Running all components locally
|
|
175
|
+
#### Starting the Data Migration Orchestrator
|
|
176
|
+
These steps will start the **Data Migration Orchestrator** locally, and when that is orchestrated, the corresponding resources in the Snowflake account will also be set up. That will allow you to start **Data Exchange Agents** in the same account.
|
|
177
|
+
*<snowflake-connection-name>* must be the name of the connection you want to use, as it appears on your `config.toml`/`connections.toml`.
|
|
178
|
+
1. Go to the `data-migration-orchestrator` directory (`cd data-migration-orchestrator`).
|
|
179
|
+
2. `export SNOWFLAKE_CONNECTION_NAME=<snowflake-connection-name>`.
|
|
180
|
+
3. `export DATA_MIGRATION_ORCHESTRATOR_AFFINITY=<affinity>` (this is optional, but is recommended for local development, to avoid confusion with multiple orchestrator instances from different devs).
|
|
181
|
+
4. `hatch run start` to start the program (it can also be started from the **VS Code** debugger just by clicking **Run and Debug** while being on the `main.py` file for this project).
|
|
182
|
+
|
|
183
|
+
#### Starting the Data Exchange Agent
|
|
184
|
+
These steps will start the **Data Exchange Agent** locally. They must be performed after the corresponding resources have been set up in the Snowflake account (which is done when the **Data Migration Orchestrator** starts).
|
|
185
|
+
1. Go to the `data-exchange-agent` directory (`cd data-exchange-agent`)
|
|
186
|
+
2. Create a `configuration.toml` file in the `data-exchange-agent` (it will be ignored by **Git**). there is a `configuration_example.toml` file in that same directory, which can be used as an example. A minimal configuration is also shown below.
|
|
187
|
+
3. `hatch run data-exchange-agent` to start the program (it can also be started from the **VS Code** debugger just by clicking **Run and Debug** while being on the `main.py` file for this project).
|
|
188
|
+
|
|
189
|
+
Example minimal config:
|
|
190
|
+
```toml
|
|
191
|
+
selected_task_source = "snowflake_stored_procedure"
|
|
192
|
+
|
|
193
|
+
[task_source.snowflake_stored_procedure]
|
|
194
|
+
connection_name = <snowflake-connection-name>
|
|
195
|
+
|
|
196
|
+
[application]
|
|
197
|
+
workers = 1
|
|
198
|
+
task_fetch_interval = 5s
|
|
199
|
+
debug_mode = false
|
|
200
|
+
affinity = <affinity>
|
|
201
|
+
|
|
202
|
+
[connections.target.snowflake_connection_name]
|
|
203
|
+
connection_name = <snowflake-connection-name>
|
|
204
|
+
|
|
205
|
+
[connections.source.sqlserver]
|
|
206
|
+
username = <sql-server-username>
|
|
207
|
+
password = <sql-server-password>
|
|
208
|
+
database = <sql-server-database-name>
|
|
209
|
+
host = <sql-server-host>
|
|
210
|
+
port = 1433
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
#### Creating a Workflow
|
|
214
|
+
Prepare a `workflow-config.json` (see the [Workflow Configuration Reference](README.md#workflow-configuration-reference) in the README for the configuration format). The `affinity` field inside the config must match the affinity you are using for the **Data Exchange Agent**.
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
hatch run create-workflow workflow-config.json --connection-name <snowflake-connection-name>
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
This validates the configuration and creates a new record in the `SNOWCONVERT_AI.DATA_MIGRATION.WORKFLOW` table. If `--connection-name` is omitted, the connection from `SNOWFLAKE_CONNECTION_NAME` (or other env vars) is used.
|
|
221
|
+
|
|
222
|
+
### Running all components in SPCS
|
|
223
|
+
#### Starting the Data Migration Orchestrator
|
|
224
|
+
You must run the `./data-migration-orchestrator/scripts/upload-orchestrator.sh` script (on the root of the repository). This will update the image for the **Data Exchange Agent** and push it to the image registry on the Snowflake account. This script takes an optional `--connection` parameter that receives the name of the Snowflake connection. If not provided, the default will be used.
|
|
225
|
+
|
|
226
|
+
Then, execute this in the Snowflake account:
|
|
227
|
+
```sql
|
|
228
|
+
CREATE SERVICE IF NOT EXISTS SNOWCONVERT_AI.DATA_MIGRATION.DATA_MIGRATION_SERVICE
|
|
229
|
+
IN COMPUTE POOL <compute-pool-name>
|
|
230
|
+
FROM SPECIFICATION
|
|
231
|
+
$$
|
|
232
|
+
spec:
|
|
233
|
+
containers:
|
|
234
|
+
- name: orchestrator
|
|
235
|
+
image: /SNOWCONVERT_AI/PUBLIC/IMAGES/data-migration-orchestrator:latest
|
|
236
|
+
env:
|
|
237
|
+
SNOWFLAKE_WAREHOUSE: <warehouse>
|
|
238
|
+
resources:
|
|
239
|
+
requests:
|
|
240
|
+
memory: 2Gi
|
|
241
|
+
cpu: 0.5
|
|
242
|
+
$$
|
|
243
|
+
QUERY_WAREHOUSE = <warehouse>;
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
#### Starting the Data Exchange Agent
|
|
247
|
+
You must run the `./data-exchange-agent/scripts/upload-agent.sh` script (on the root of the repository). This will update the image for the **Data Exchange Agent** and push it to the image registry on the Snowflake account. This script takes an optional `--connection` parameter that receives the name of the Snowflake connection. If not provided, the default will be used.
|
|
248
|
+
|
|
249
|
+
Then, execute this in the Snowflake account:
|
|
250
|
+
```sql
|
|
251
|
+
-- 1 Create secret and grant usage over it (Source System)
|
|
252
|
+
CREATE OR REPLACE SECRET MY_DB.SECRETS.SQL_SERVER_PASSWORD
|
|
253
|
+
TYPE = PASSWORD
|
|
254
|
+
USERNAME = <sql-server-username>
|
|
255
|
+
PASSWORD = <sql-server-password>;
|
|
256
|
+
|
|
257
|
+
GRANT USAGE ON SECRET MY_DB.SECRETS.SQL_SERVER_PASSWORD TO ROLE <data-migration-role-name>;
|
|
258
|
+
|
|
259
|
+
-- 2. Create network rule
|
|
260
|
+
CREATE OR REPLACE NETWORK RULE MY_DB.SECRETS.SQL_SERVER_PASSWORD
|
|
261
|
+
MODE = EGRESS
|
|
262
|
+
TYPE = HOST_PORT
|
|
263
|
+
VALUE_LIST = ('<sql-server-host>');
|
|
264
|
+
|
|
265
|
+
-- 3. Create External Access Integration
|
|
266
|
+
CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION SQL_SERVER_INTEGRATION
|
|
267
|
+
ALLOWED_NETWORK_RULES = (DM_DEMO_AZURE_SQL_EGRESS_RULE)
|
|
268
|
+
ENABLED = TRUE;
|
|
269
|
+
|
|
270
|
+
GRANT USAGE ON INTEGRATION SQL_SERVER_INTEGRATION TO ROLE <data-migration-role-name>;
|
|
271
|
+
|
|
272
|
+
-- 4. Create Service
|
|
273
|
+
CREATE SERVICE SNOWCONVERT_AI.DATA_MIGRATION.DATA_EXCHANGE_SERVICE
|
|
274
|
+
IN COMPUTE POOL <compute-pool-name>
|
|
275
|
+
FROM SPECIFICATION $$
|
|
276
|
+
spec:
|
|
277
|
+
containers:
|
|
278
|
+
- name: agent
|
|
279
|
+
image: /snowconvert_ai/public/images/data-exchange-agent:latest
|
|
280
|
+
env:
|
|
281
|
+
# Source System Config
|
|
282
|
+
DATA_SOURCE_HOST: <sql-server-host>
|
|
283
|
+
DATA_SOURCE_PORT: <1433>
|
|
284
|
+
DATA_SOURCE_DATABASE: <sql-server-database>
|
|
285
|
+
|
|
286
|
+
# Snowflake Config
|
|
287
|
+
SNOWFLAKE_WAREHOUSE: <snowflake-warehouse>
|
|
288
|
+
|
|
289
|
+
# Application
|
|
290
|
+
WORKER_COUNT: 1
|
|
291
|
+
AGENT_AFFINITY: <affinity>
|
|
292
|
+
|
|
293
|
+
secrets:
|
|
294
|
+
# Secrets (Source System)
|
|
295
|
+
- snowflakeSecret: MY_DB.SECRETS.SQL_SERVER_PASSWORD
|
|
296
|
+
secretKeyRef: USERNAME
|
|
297
|
+
envVarName: DATA_SOURCE_USERNAME
|
|
298
|
+
- snowflakeSecret: MY_DB.SECRETS.SQL_SERVER_PASSWORD
|
|
299
|
+
secretKeyRef: PASSWORD
|
|
300
|
+
envVarName: DATA_SOURCE_PASSWORD
|
|
301
|
+
$$
|
|
302
|
+
-- This line links the network hole to the service
|
|
303
|
+
EXTERNAL_ACCESS_INTEGRATIONS = (SQL_SERVER_INTEGRATION)
|
|
304
|
+
MIN_INSTANCES = 6
|
|
305
|
+
MAX_INSTANCES = 6;
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
#### Creating a Workflow
|
|
309
|
+
Creating a workflow is the same independently of where the components are deployed (see [Creating a Workflow](#creating-a-workflow-1) above).
|
|
310
|
+
|
|
311
|
+
### Mixed approach
|
|
312
|
+
You can combine the two approaches and run only the **Data Exchange Agent** locally and the **Data Migration Orchestrator** in SPCS (or the other way around). There is no significant difference when doing this.
|
|
313
|
+
|
|
314
|
+
## Keeping Documentation up to Date
|
|
315
|
+
When adding/modifying features, update the corresponding documentation:
|
|
316
|
+
- **README.md** is the user-facing source of truth (architecture, usage, configuration reference).
|
|
317
|
+
- **CONTRIBUTING.md** (this file) has developer content (commands, Docker, E2E, CI).
|
|
318
|
+
- The [Data Exchange Agent README](../data-exchange-agent/README.md) is the source of truth for Worker Configuration.
|
|
319
|
+
- Cursor rules and commands should be kept synchronized with the docs.
|
|
320
|
+
|
|
321
|
+
When changing configuration models or behavior, update the README's configuration reference and the example workflow files in `docs/public/example-workflows/`.
|
|
322
|
+
|
|
323
|
+
## Development Patterns
|
|
324
|
+
### Task Model
|
|
325
|
+
In the **Data Migration Orchestrator** we distribute work by converting workflows into sequences of many tasks:
|
|
326
|
+
- Distributing the work in tasks help us achieve fault-tolerance, distribute the workload across several workers, and have "resumability".
|
|
327
|
+
- These tasks can have successors/predecessors and be executed by different components of the architecture.
|
|
328
|
+
- We strive for tasks to be idempotent, retryable, and moderately "quick" to execute. This might not always be achievable.
|
|
329
|
+
- No specific executor is assigned to a task, although a specific executor type will be assigned depending on the kind of the task.
|
|
330
|
+
- Each task has a payload that depends on the kind of the task. There are no guarantees on the time at which a task will be executed. Because of that, we can't assume that a sucessor task will be executed immediately after its predecessor (hours/days could have passed since).
|
|
331
|
+
|
|
332
|
+
When adding features to the Orchestrator, consider this task model. For more information, you can visit the `task-model.md` document.
|
|
333
|
+
|
|
334
|
+
### Creating/Queuing Tasks
|
|
335
|
+
Create tasks using the `TaskBuilder` class and then queue them using the `TaskQueue`. When creating sequences of dependent tasks, you can use the `push_task_chain` method of the `TaskQueue`.
|
|
336
|
+
|
|
337
|
+
### Manipulating Stage Paths
|
|
338
|
+
We constantly are putting data/metadata into files in different paths of stages and then reading that data/metadata (or copying it into tables). For example, the default internal stage is `SNOWCONVERT_AI.DATA_MIGRATION.TASK_RESULTS` and we normally upload metadata/data of the tables we're moving.
|
|
339
|
+
|
|
340
|
+
We try to avoid passing stage paths through task payloads. Instead, we try to have a standardize pattern for stage paths. For example, if you want to access the path at which the schema information for a table was uploaded, then it might look something like this: `<stage-id>/workflows/<workflow-id>/table/<table-metadata-id>/schema/`.
|
|
341
|
+
|
|
342
|
+
### Scopes
|
|
343
|
+
Scopes are described in detail in the `scopes.md` doc. Those are useful for understanding what is the purpose of each task and to what "domain" it applies.
|
|
344
|
+
|
|
345
|
+
When creating scopes, we should use the `ScopeBuilder`. There are other helper functions that can be used when creating scopes (specially if the scope is very commonly created).
|
|
346
|
+
|
|
347
|
+
### Schema Migrations
|
|
348
|
+
When changing the schema (tables in the `SNOWCONVERT_AI.DATA_MIGRATION` schema, for example), we should do it through schema migrations. Check the `schema-migrations.md` doc for more information about this topic.
|
|
349
|
+
|
|
350
|
+
## Anti-patterns
|
|
351
|
+
1. We want to avoid having duplicated dataclasses (or dataclasses that are almost equivalent) without a good reason.
|
|
352
|
+
2. We do not want to rely too much on magic strings. In cases where we are dealing with categories/kinds, we should prefer to create a `StrEnum`.
|
|
353
|
+
3. We want to avoid "carrying" too much information across tasks, particularly if you are sending a piece of information that is not meant for a task, but rather for a future task that might be created by the immediate task you are creating. We should strive to read configuration values from the TABLE_METADATA configuration column, for example. This also applies to the PARTITION_METADATA synchronization data, for example.
|
|
354
|
+
4. We want to avoid using `Any` if possible. In general, we try to keep the code as type-safe as possible.
|
|
355
|
+
5. We want to avoid complex types without a meaningful name. For example, types like `dict[str, dict[str, str]]` might be complex to understand when reading the code. A type alias or a dedicated class can help here.
|
|
356
|
+
6. Let's not make non-deterministic tests, or unit tests that can take a lot of time. Running all tests should be matter of less than 10 seconds.
|
|
357
|
+
|
|
358
|
+
## Considerations
|
|
359
|
+
1. If the **Workflow Config** schema changes, update the `example-workflow-config.json` file. Also, update the mechanism for schema validation in **SnowConvert Desktop**.
|
|
360
|
+
2. If the **Data Exchange Agent Config** schema changes, update the `data-exchange-agent/src/data_exchange_agent/configuration_example.toml` (and possibly the `data-exchange-agent/docker-artifacts/configuration.template.toml`).
|
|
361
|
+
3. For development, it is useful to set orchestrator affinity (by setting the `DATA_MIGRATION_ORCHESTRATOR_AFFINITY` to your affinity value). This will prevent other orchestrator instances (from other devs) from picking up the workflows you have created (and the other way around).
|
|
362
|
+
|
|
363
|
+
## How to add a new platform?
|
|
364
|
+
### Basic Integration
|
|
365
|
+
For integrating a new platform, it suffices to:
|
|
366
|
+
1. Add the new value to the `SourceType` enum (to `shared/enums/source_type.py`).
|
|
367
|
+
2. Add the corresponding *default mappings* in `default_mappings.py`. Some examples: `sqlserver_default_mappings.py` and `redshift_default_mappings.py`.
|
|
368
|
+
3. Create the directory for this new platform in the `src/data_migration_orchestrator/data_migration_cloud/platforms` directory. Follow the examples of existing platforms.
|
|
369
|
+
4. Add the corresponding subclass of `BaseQueryBuilder` for the platform. Some examples: `RedshiftQueryBuilder` and `SqlServerQueryBuilder`. Make sure to implement all methods and override base methods if needed.
|
|
370
|
+
5. Add the corresponding subclass of `BasePlatform` for the platform. Some examples: `RedshiftPlatform` and `SqlServerPlatform`. Make sure to implement all methods and override base methods if needed.
|
|
371
|
+
6. In `src/data_migration_orchestrator/data_migration_cloud/platforms/__init__.py`, you must register the platform and corresponding aliases (if any).
|
|
372
|
+
|
|
373
|
+
Some changes will likely be needed on the `data-exchange-agent` project.
|
|
374
|
+
Finally make sure to do appropriate manual testing and include new unit tests.
|
|
375
|
+
|
|
376
|
+
### Optimizations
|
|
377
|
+
Some platforms might have an "optimized" way of extracting the data and pushing it to a Snowflake stage (internal or external).
|
|
378
|
+
|
|
379
|
+
#### Examples
|
|
380
|
+
##### SQL Server**
|
|
381
|
+
**BCP** tool is highly performant and can be used to extract the data to CSV files. This approach requires changes only on the **Data Exchange Agent** (and **BCP** is only leveraged for "bulk" data movement operations, not for small metadata queries).
|
|
382
|
+
|
|
383
|
+
##### Redshift
|
|
384
|
+
It is common to use the **UNLOAD** strategy, in which an **UNLOAD** statement is issued to the **Redshift** insteance (with the appropriate SQL query) and that creates a Parquet file in a **S3** bucket. On the side of the **Data Migration Orchestrator**, the workflow must receive configuration to use the **UNLOAD** strategy (this must also include the name of the external stage that is pointing to the **S3** bucket). This approach requires changes on both the **Data Exchange Agent** (to understand how to issue the **UNLOAD** statement) and in the **Data Migration Orchestrator** (to read the configuration and understand where to read the data from).
|
|
385
|
+
|
|
386
|
+
#### Considerations
|
|
387
|
+
- Take into account if a new extraction strategy must be added to the **Data Migration Orchestrator**. This would require changes in the workflow configuration models.
|
|
388
|
+
- Take into account if the optimization only requires changes to the **Data Exchange Agent** (which is preferred). An example of this is the **BCP** approach for **SQL Server**.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Use Python 3.11 slim image
|
|
2
|
+
FROM python:3.11-slim
|
|
3
|
+
|
|
4
|
+
# Set working directory
|
|
5
|
+
WORKDIR /app
|
|
6
|
+
|
|
7
|
+
# Copy shared directory from parent context (required by pyproject.toml force-include)
|
|
8
|
+
COPY shared /shared
|
|
9
|
+
|
|
10
|
+
# Copy project files
|
|
11
|
+
COPY data-migration-orchestrator/pyproject.toml data-migration-orchestrator/README.md ./
|
|
12
|
+
COPY data-migration-orchestrator/src/ ./src/
|
|
13
|
+
|
|
14
|
+
# Install the package and dependencies
|
|
15
|
+
RUN pip install --no-cache-dir -e .
|
|
16
|
+
# TODO(SNOW-2860314): move dependency installation before the part in which we copy the source code to leverage Docker layer caching
|
|
17
|
+
|
|
18
|
+
# Set the Python path
|
|
19
|
+
ENV PYTHONPATH=/app/src
|
|
20
|
+
|
|
21
|
+
# Disable Python output buffering to see logs in real-time
|
|
22
|
+
ENV PYTHONUNBUFFERED=1
|
|
23
|
+
|
|
24
|
+
# Create non-root user and pre-create writable dirs
|
|
25
|
+
RUN groupadd --system appgroup && useradd --system --gid appgroup appuser \
|
|
26
|
+
&& mkdir -p /app/logs && chown appuser:appgroup /app/logs
|
|
27
|
+
USER appuser
|
|
28
|
+
|
|
29
|
+
# Run the main module
|
|
30
|
+
CMD ["python", "-u", "-m", "data_migration_orchestrator"]
|