snowflake-cli-labs 2.8.0rc1__py3-none-any.whl → 2.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- README.md +21 -0
- {snowflake_cli_labs-2.8.0rc1.dist-info → snowflake_cli_labs-2.8.2.dist-info}/METADATA +7 -95
- snowflake_cli_labs-2.8.2.dist-info/RECORD +5 -0
- snowflake/cli/__about__.py +0 -17
- snowflake/cli/__init__.py +0 -13
- snowflake/cli/api/__init__.py +0 -48
- snowflake/cli/api/cli_global_context.py +0 -390
- snowflake/cli/api/commands/__init__.py +0 -13
- snowflake/cli/api/commands/alias.py +0 -23
- snowflake/cli/api/commands/decorators.py +0 -354
- snowflake/cli/api/commands/execution_metadata.py +0 -40
- snowflake/cli/api/commands/experimental_behaviour.py +0 -19
- snowflake/cli/api/commands/flags.py +0 -640
- snowflake/cli/api/commands/project_initialisation.py +0 -65
- snowflake/cli/api/commands/snow_typer.py +0 -237
- snowflake/cli/api/commands/typer_pre_execute.py +0 -26
- snowflake/cli/api/config.py +0 -348
- snowflake/cli/api/console/__init__.py +0 -17
- snowflake/cli/api/console/abc.py +0 -89
- snowflake/cli/api/console/console.py +0 -134
- snowflake/cli/api/console/enum.py +0 -17
- snowflake/cli/api/constants.py +0 -79
- snowflake/cli/api/errno.py +0 -27
- snowflake/cli/api/exceptions.py +0 -164
- snowflake/cli/api/feature_flags.py +0 -55
- snowflake/cli/api/identifiers.py +0 -154
- snowflake/cli/api/output/__init__.py +0 -13
- snowflake/cli/api/output/formats.py +0 -20
- snowflake/cli/api/output/types.py +0 -118
- snowflake/cli/api/plugins/__init__.py +0 -13
- snowflake/cli/api/plugins/command/__init__.py +0 -72
- snowflake/cli/api/plugins/command/plugin_hook_specs.py +0 -21
- snowflake/cli/api/plugins/plugin_config.py +0 -32
- snowflake/cli/api/project/__init__.py +0 -13
- snowflake/cli/api/project/definition.py +0 -84
- snowflake/cli/api/project/definition_manager.py +0 -134
- snowflake/cli/api/project/errors.py +0 -56
- snowflake/cli/api/project/project_verification.py +0 -23
- snowflake/cli/api/project/schemas/__init__.py +0 -13
- snowflake/cli/api/project/schemas/entities/application_entity.py +0 -44
- snowflake/cli/api/project/schemas/entities/application_package_entity.py +0 -66
- snowflake/cli/api/project/schemas/entities/common.py +0 -78
- snowflake/cli/api/project/schemas/entities/entities.py +0 -30
- snowflake/cli/api/project/schemas/identifier_model.py +0 -49
- snowflake/cli/api/project/schemas/native_app/__init__.py +0 -13
- snowflake/cli/api/project/schemas/native_app/application.py +0 -62
- snowflake/cli/api/project/schemas/native_app/native_app.py +0 -93
- snowflake/cli/api/project/schemas/native_app/package.py +0 -78
- snowflake/cli/api/project/schemas/native_app/path_mapping.py +0 -65
- snowflake/cli/api/project/schemas/project_definition.py +0 -199
- snowflake/cli/api/project/schemas/snowpark/__init__.py +0 -13
- snowflake/cli/api/project/schemas/snowpark/argument.py +0 -28
- snowflake/cli/api/project/schemas/snowpark/callable.py +0 -69
- snowflake/cli/api/project/schemas/snowpark/snowpark.py +0 -36
- snowflake/cli/api/project/schemas/streamlit/__init__.py +0 -13
- snowflake/cli/api/project/schemas/streamlit/streamlit.py +0 -46
- snowflake/cli/api/project/schemas/template.py +0 -77
- snowflake/cli/api/project/schemas/updatable_model.py +0 -194
- snowflake/cli/api/project/util.py +0 -261
- snowflake/cli/api/rendering/__init__.py +0 -13
- snowflake/cli/api/rendering/jinja.py +0 -112
- snowflake/cli/api/rendering/project_definition_templates.py +0 -39
- snowflake/cli/api/rendering/project_templates.py +0 -98
- snowflake/cli/api/rendering/sql_templates.py +0 -60
- snowflake/cli/api/rest_api.py +0 -172
- snowflake/cli/api/sanitizers.py +0 -43
- snowflake/cli/api/secure_path.py +0 -362
- snowflake/cli/api/secure_utils.py +0 -29
- snowflake/cli/api/sql_execution.py +0 -260
- snowflake/cli/api/utils/__init__.py +0 -13
- snowflake/cli/api/utils/cursor.py +0 -34
- snowflake/cli/api/utils/definition_rendering.py +0 -383
- snowflake/cli/api/utils/dict_utils.py +0 -73
- snowflake/cli/api/utils/error_handling.py +0 -23
- snowflake/cli/api/utils/graph.py +0 -97
- snowflake/cli/api/utils/models.py +0 -63
- snowflake/cli/api/utils/naming_utils.py +0 -13
- snowflake/cli/api/utils/path_utils.py +0 -36
- snowflake/cli/api/utils/templating_functions.py +0 -144
- snowflake/cli/api/utils/types.py +0 -35
- snowflake/cli/app/__init__.py +0 -22
- snowflake/cli/app/__main__.py +0 -31
- snowflake/cli/app/api_impl/__init__.py +0 -13
- snowflake/cli/app/api_impl/plugin/__init__.py +0 -13
- snowflake/cli/app/api_impl/plugin/plugin_config_provider_impl.py +0 -66
- snowflake/cli/app/build_and_push.sh +0 -8
- snowflake/cli/app/cli_app.py +0 -243
- snowflake/cli/app/commands_registration/__init__.py +0 -33
- snowflake/cli/app/commands_registration/builtin_plugins.py +0 -54
- snowflake/cli/app/commands_registration/command_plugins_loader.py +0 -169
- snowflake/cli/app/commands_registration/commands_registration_with_callbacks.py +0 -105
- snowflake/cli/app/commands_registration/exception_logging.py +0 -26
- snowflake/cli/app/commands_registration/threadsafe.py +0 -48
- snowflake/cli/app/commands_registration/typer_registration.py +0 -153
- snowflake/cli/app/constants.py +0 -19
- snowflake/cli/app/dev/__init__.py +0 -13
- snowflake/cli/app/dev/commands_structure.py +0 -48
- snowflake/cli/app/dev/docs/__init__.py +0 -13
- snowflake/cli/app/dev/docs/commands_docs_generator.py +0 -100
- snowflake/cli/app/dev/docs/generator.py +0 -35
- snowflake/cli/app/dev/docs/project_definition_docs_generator.py +0 -58
- snowflake/cli/app/dev/docs/project_definition_generate_json_schema.py +0 -227
- snowflake/cli/app/dev/docs/template_utils.py +0 -23
- snowflake/cli/app/dev/docs/templates/definition_description.rst.jinja2 +0 -38
- snowflake/cli/app/dev/docs/templates/overview.rst.jinja2 +0 -9
- snowflake/cli/app/dev/docs/templates/usage.rst.jinja2 +0 -57
- snowflake/cli/app/dev/pycharm_remote_debug.py +0 -46
- snowflake/cli/app/loggers.py +0 -199
- snowflake/cli/app/main_typer.py +0 -62
- snowflake/cli/app/printing.py +0 -181
- snowflake/cli/app/snow_connector.py +0 -243
- snowflake/cli/app/telemetry.py +0 -189
- snowflake/cli/plugins/__init__.py +0 -13
- snowflake/cli/plugins/connection/__init__.py +0 -13
- snowflake/cli/plugins/connection/commands.py +0 -330
- snowflake/cli/plugins/connection/plugin_spec.py +0 -30
- snowflake/cli/plugins/connection/util.py +0 -179
- snowflake/cli/plugins/cortex/__init__.py +0 -13
- snowflake/cli/plugins/cortex/commands.py +0 -327
- snowflake/cli/plugins/cortex/constants.py +0 -17
- snowflake/cli/plugins/cortex/manager.py +0 -189
- snowflake/cli/plugins/cortex/plugin_spec.py +0 -30
- snowflake/cli/plugins/cortex/types.py +0 -22
- snowflake/cli/plugins/git/__init__.py +0 -13
- snowflake/cli/plugins/git/commands.py +0 -305
- snowflake/cli/plugins/git/manager.py +0 -96
- snowflake/cli/plugins/git/plugin_spec.py +0 -30
- snowflake/cli/plugins/init/__init__.py +0 -13
- snowflake/cli/plugins/init/commands.py +0 -244
- snowflake/cli/plugins/init/plugin_spec.py +0 -30
- snowflake/cli/plugins/nativeapp/__init__.py +0 -13
- snowflake/cli/plugins/nativeapp/artifacts.py +0 -742
- snowflake/cli/plugins/nativeapp/codegen/__init__.py +0 -13
- snowflake/cli/plugins/nativeapp/codegen/artifact_processor.py +0 -91
- snowflake/cli/plugins/nativeapp/codegen/compiler.py +0 -130
- snowflake/cli/plugins/nativeapp/codegen/sandbox.py +0 -306
- snowflake/cli/plugins/nativeapp/codegen/setup/native_app_setup_processor.py +0 -172
- snowflake/cli/plugins/nativeapp/codegen/setup/setup_driver.py.source +0 -56
- snowflake/cli/plugins/nativeapp/codegen/snowpark/callback_source.py.jinja +0 -181
- snowflake/cli/plugins/nativeapp/codegen/snowpark/extension_function_utils.py +0 -217
- snowflake/cli/plugins/nativeapp/codegen/snowpark/models.py +0 -61
- snowflake/cli/plugins/nativeapp/codegen/snowpark/python_processor.py +0 -528
- snowflake/cli/plugins/nativeapp/commands.py +0 -439
- snowflake/cli/plugins/nativeapp/common_flags.py +0 -44
- snowflake/cli/plugins/nativeapp/constants.py +0 -27
- snowflake/cli/plugins/nativeapp/exceptions.py +0 -122
- snowflake/cli/plugins/nativeapp/feature_flags.py +0 -24
- snowflake/cli/plugins/nativeapp/init.py +0 -345
- snowflake/cli/plugins/nativeapp/manager.py +0 -823
- snowflake/cli/plugins/nativeapp/plugin_spec.py +0 -30
- snowflake/cli/plugins/nativeapp/policy.py +0 -50
- snowflake/cli/plugins/nativeapp/project_model.py +0 -195
- snowflake/cli/plugins/nativeapp/run_processor.py +0 -389
- snowflake/cli/plugins/nativeapp/teardown_processor.py +0 -301
- snowflake/cli/plugins/nativeapp/utils.py +0 -98
- snowflake/cli/plugins/nativeapp/v2_conversions/v2_to_v1_decorator.py +0 -135
- snowflake/cli/plugins/nativeapp/version/__init__.py +0 -13
- snowflake/cli/plugins/nativeapp/version/commands.py +0 -170
- snowflake/cli/plugins/nativeapp/version/version_processor.py +0 -362
- snowflake/cli/plugins/notebook/__init__.py +0 -13
- snowflake/cli/plugins/notebook/commands.py +0 -84
- snowflake/cli/plugins/notebook/exceptions.py +0 -20
- snowflake/cli/plugins/notebook/manager.py +0 -71
- snowflake/cli/plugins/notebook/plugin_spec.py +0 -30
- snowflake/cli/plugins/notebook/types.py +0 -16
- snowflake/cli/plugins/object/__init__.py +0 -13
- snowflake/cli/plugins/object/command_aliases.py +0 -94
- snowflake/cli/plugins/object/commands.py +0 -174
- snowflake/cli/plugins/object/common.py +0 -85
- snowflake/cli/plugins/object/manager.py +0 -96
- snowflake/cli/plugins/object/plugin_spec.py +0 -30
- snowflake/cli/plugins/object_stage_deprecated/__init__.py +0 -15
- snowflake/cli/plugins/object_stage_deprecated/commands.py +0 -122
- snowflake/cli/plugins/object_stage_deprecated/plugin_spec.py +0 -32
- snowflake/cli/plugins/snowpark/__init__.py +0 -13
- snowflake/cli/plugins/snowpark/commands.py +0 -548
- snowflake/cli/plugins/snowpark/common.py +0 -307
- snowflake/cli/plugins/snowpark/manager.py +0 -109
- snowflake/cli/plugins/snowpark/models.py +0 -156
- snowflake/cli/plugins/snowpark/package/__init__.py +0 -13
- snowflake/cli/plugins/snowpark/package/anaconda_packages.py +0 -233
- snowflake/cli/plugins/snowpark/package/commands.py +0 -256
- snowflake/cli/plugins/snowpark/package/manager.py +0 -43
- snowflake/cli/plugins/snowpark/package/utils.py +0 -26
- snowflake/cli/plugins/snowpark/package_utils.py +0 -354
- snowflake/cli/plugins/snowpark/plugin_spec.py +0 -30
- snowflake/cli/plugins/snowpark/snowpark_package_paths.py +0 -65
- snowflake/cli/plugins/snowpark/snowpark_shared.py +0 -95
- snowflake/cli/plugins/snowpark/zipper.py +0 -81
- snowflake/cli/plugins/spcs/__init__.py +0 -35
- snowflake/cli/plugins/spcs/common.py +0 -99
- snowflake/cli/plugins/spcs/compute_pool/__init__.py +0 -13
- snowflake/cli/plugins/spcs/compute_pool/commands.py +0 -240
- snowflake/cli/plugins/spcs/compute_pool/manager.py +0 -121
- snowflake/cli/plugins/spcs/image_registry/__init__.py +0 -13
- snowflake/cli/plugins/spcs/image_registry/commands.py +0 -65
- snowflake/cli/plugins/spcs/image_registry/manager.py +0 -105
- snowflake/cli/plugins/spcs/image_repository/__init__.py +0 -13
- snowflake/cli/plugins/spcs/image_repository/commands.py +0 -196
- snowflake/cli/plugins/spcs/image_repository/manager.py +0 -84
- snowflake/cli/plugins/spcs/jobs/__init__.py +0 -13
- snowflake/cli/plugins/spcs/jobs/commands.py +0 -78
- snowflake/cli/plugins/spcs/jobs/manager.py +0 -53
- snowflake/cli/plugins/spcs/plugin_spec.py +0 -30
- snowflake/cli/plugins/spcs/services/__init__.py +0 -13
- snowflake/cli/plugins/spcs/services/commands.py +0 -311
- snowflake/cli/plugins/spcs/services/manager.py +0 -170
- snowflake/cli/plugins/sql/__init__.py +0 -13
- snowflake/cli/plugins/sql/commands.py +0 -83
- snowflake/cli/plugins/sql/manager.py +0 -92
- snowflake/cli/plugins/sql/plugin_spec.py +0 -30
- snowflake/cli/plugins/sql/snowsql_templating.py +0 -28
- snowflake/cli/plugins/stage/__init__.py +0 -13
- snowflake/cli/plugins/stage/commands.py +0 -261
- snowflake/cli/plugins/stage/diff.py +0 -326
- snowflake/cli/plugins/stage/manager.py +0 -544
- snowflake/cli/plugins/stage/md5.py +0 -160
- snowflake/cli/plugins/stage/plugin_spec.py +0 -30
- snowflake/cli/plugins/streamlit/__init__.py +0 -13
- snowflake/cli/plugins/streamlit/commands.py +0 -186
- snowflake/cli/plugins/streamlit/manager.py +0 -222
- snowflake/cli/plugins/streamlit/plugin_spec.py +0 -30
- snowflake/cli/plugins/workspace/__init__.py +0 -13
- snowflake/cli/plugins/workspace/commands.py +0 -35
- snowflake/cli/plugins/workspace/plugin_spec.py +0 -30
- snowflake/cli/templates/default_snowpark/.gitignore +0 -4
- snowflake/cli/templates/default_snowpark/app/__init__.py +0 -0
- snowflake/cli/templates/default_snowpark/app/common.py +0 -2
- snowflake/cli/templates/default_snowpark/app/functions.py +0 -15
- snowflake/cli/templates/default_snowpark/app/procedures.py +0 -22
- snowflake/cli/templates/default_snowpark/requirements.txt +0 -1
- snowflake/cli/templates/default_snowpark/snowflake.yml +0 -23
- snowflake/cli/templates/default_streamlit/.gitignore +0 -4
- snowflake/cli/templates/default_streamlit/common/hello.py +0 -2
- snowflake/cli/templates/default_streamlit/environment.yml +0 -6
- snowflake/cli/templates/default_streamlit/pages/my_page.py +0 -3
- snowflake/cli/templates/default_streamlit/snowflake.yml +0 -10
- snowflake/cli/templates/default_streamlit/streamlit_app.py +0 -4
- snowflake_cli_labs-2.8.0rc1.dist-info/RECORD +0 -240
- snowflake_cli_labs-2.8.0rc1.dist-info/entry_points.txt +0 -2
- {snowflake_cli_labs-2.8.0rc1.dist-info → snowflake_cli_labs-2.8.2.dist-info}/WHEEL +0 -0
- {snowflake_cli_labs-2.8.0rc1.dist-info → snowflake_cli_labs-2.8.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,544 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2024 Snowflake Inc.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
import fnmatch
|
|
18
|
-
import glob
|
|
19
|
-
import logging
|
|
20
|
-
import re
|
|
21
|
-
import sys
|
|
22
|
-
from contextlib import nullcontext
|
|
23
|
-
from dataclasses import dataclass
|
|
24
|
-
from os import path
|
|
25
|
-
from pathlib import Path
|
|
26
|
-
from textwrap import dedent
|
|
27
|
-
from typing import Dict, List, Optional, Union
|
|
28
|
-
|
|
29
|
-
from click import ClickException
|
|
30
|
-
from snowflake.cli.api.commands.flags import (
|
|
31
|
-
OnErrorType,
|
|
32
|
-
Variable,
|
|
33
|
-
parse_key_value_variables,
|
|
34
|
-
)
|
|
35
|
-
from snowflake.cli.api.console import cli_console
|
|
36
|
-
from snowflake.cli.api.constants import PYTHON_3_12
|
|
37
|
-
from snowflake.cli.api.identifiers import FQN
|
|
38
|
-
from snowflake.cli.api.project.util import to_string_literal
|
|
39
|
-
from snowflake.cli.api.secure_path import SecurePath
|
|
40
|
-
from snowflake.cli.api.sql_execution import SqlExecutionMixin
|
|
41
|
-
from snowflake.cli.api.utils.path_utils import path_resolver
|
|
42
|
-
from snowflake.cli.plugins.snowpark.package_utils import parse_requirements
|
|
43
|
-
from snowflake.connector import DictCursor, ProgrammingError
|
|
44
|
-
from snowflake.connector.cursor import SnowflakeCursor
|
|
45
|
-
|
|
46
|
-
if sys.version_info < PYTHON_3_12:
|
|
47
|
-
# Because Snowpark works only below 3.12 and to use @sproc Session must be imported here.
|
|
48
|
-
from snowflake.snowpark import Session
|
|
49
|
-
|
|
50
|
-
log = logging.getLogger(__name__)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
UNQUOTED_FILE_URI_REGEX = r"[\w/*?\-.=&{}$#[\]\"\\!@%^+:]+"
|
|
54
|
-
USER_STAGE_PREFIX = "@~"
|
|
55
|
-
EXECUTE_SUPPORTED_FILES_FORMATS = (
|
|
56
|
-
".sql",
|
|
57
|
-
".py",
|
|
58
|
-
) # tuple to preserve order but it's a set
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
@dataclass
|
|
62
|
-
class StagePathParts:
|
|
63
|
-
directory: str
|
|
64
|
-
stage: str
|
|
65
|
-
stage_name: str
|
|
66
|
-
is_directory: bool
|
|
67
|
-
|
|
68
|
-
@staticmethod
|
|
69
|
-
def get_directory(stage_path: str) -> str:
|
|
70
|
-
return "/".join(Path(stage_path).parts[1:])
|
|
71
|
-
|
|
72
|
-
@property
|
|
73
|
-
def path(self) -> str:
|
|
74
|
-
raise NotImplementedError
|
|
75
|
-
|
|
76
|
-
def add_stage_prefix(self, file_path: str) -> str:
|
|
77
|
-
raise NotImplementedError
|
|
78
|
-
|
|
79
|
-
def get_directory_from_file_path(self, file_path: str) -> List[str]:
|
|
80
|
-
raise NotImplementedError
|
|
81
|
-
|
|
82
|
-
def get_full_stage_path(self, path: str):
|
|
83
|
-
if prefix := FQN.from_stage(self.stage).prefix:
|
|
84
|
-
return prefix + "." + path
|
|
85
|
-
return path
|
|
86
|
-
|
|
87
|
-
def get_standard_stage_path(self) -> str:
|
|
88
|
-
path = self.path
|
|
89
|
-
return f"@{path}{'/'if self.is_directory and not path.endswith('/') else ''}"
|
|
90
|
-
|
|
91
|
-
def get_standard_stage_directory_path(self) -> str:
|
|
92
|
-
path = self.get_standard_stage_path()
|
|
93
|
-
if not path.endswith("/"):
|
|
94
|
-
return path + "/"
|
|
95
|
-
return path
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
@dataclass
|
|
99
|
-
class DefaultStagePathParts(StagePathParts):
|
|
100
|
-
"""
|
|
101
|
-
For path like @db.schema.stage/dir the values will be:
|
|
102
|
-
directory = dir
|
|
103
|
-
stage = @db.schema.stage
|
|
104
|
-
stage_name = stage
|
|
105
|
-
For `@stage/dir` to
|
|
106
|
-
stage -> @stage
|
|
107
|
-
stage_name -> stage
|
|
108
|
-
directory -> dir
|
|
109
|
-
"""
|
|
110
|
-
|
|
111
|
-
def __init__(self, stage_path: str):
|
|
112
|
-
self.directory = self.get_directory(stage_path)
|
|
113
|
-
self.stage = StageManager.get_stage_from_path(stage_path)
|
|
114
|
-
stage_name = self.stage.split(".")[-1]
|
|
115
|
-
if stage_name.startswith("@"):
|
|
116
|
-
stage_name = stage_name[1:]
|
|
117
|
-
self.stage_name = stage_name
|
|
118
|
-
self.is_directory = True if stage_path.endswith("/") else False
|
|
119
|
-
|
|
120
|
-
@property
|
|
121
|
-
def path(self) -> str:
|
|
122
|
-
return (
|
|
123
|
-
f"{self.stage_name}{self.directory}"
|
|
124
|
-
if self.stage_name.endswith("/")
|
|
125
|
-
else f"{self.stage_name}/{self.directory}"
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
def add_stage_prefix(self, file_path: str) -> str:
|
|
129
|
-
stage = Path(self.stage).parts[0]
|
|
130
|
-
file_path_without_prefix = Path(file_path).parts[1:]
|
|
131
|
-
return f"{stage}/{'/'.join(file_path_without_prefix)}"
|
|
132
|
-
|
|
133
|
-
def get_directory_from_file_path(self, file_path: str) -> List[str]:
|
|
134
|
-
stage_path_length = len(Path(self.directory).parts)
|
|
135
|
-
return list(Path(file_path).parts[1 + stage_path_length : -1])
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
@dataclass
|
|
139
|
-
class UserStagePathParts(StagePathParts):
|
|
140
|
-
"""
|
|
141
|
-
For path like @db.schema.stage/dir the values will be:
|
|
142
|
-
directory = dir
|
|
143
|
-
stage = @~
|
|
144
|
-
stage_name = @~
|
|
145
|
-
"""
|
|
146
|
-
|
|
147
|
-
def __init__(self, stage_path: str):
|
|
148
|
-
self.directory = self.get_directory(stage_path)
|
|
149
|
-
self.stage = "@~"
|
|
150
|
-
self.stage_name = "@~"
|
|
151
|
-
self.is_directory = True if stage_path.endswith("/") else False
|
|
152
|
-
|
|
153
|
-
@property
|
|
154
|
-
def path(self) -> str:
|
|
155
|
-
return f"{self.directory}"
|
|
156
|
-
|
|
157
|
-
def add_stage_prefix(self, file_path: str) -> str:
|
|
158
|
-
return f"{self.stage}/{file_path}"
|
|
159
|
-
|
|
160
|
-
def get_directory_from_file_path(self, file_path: str) -> List[str]:
|
|
161
|
-
stage_path_length = len(Path(self.directory).parts)
|
|
162
|
-
return list(Path(file_path).parts[stage_path_length:-1])
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
class StageManager(SqlExecutionMixin):
|
|
166
|
-
def __init__(self):
|
|
167
|
-
super().__init__()
|
|
168
|
-
self._python_exe_procedure = None
|
|
169
|
-
|
|
170
|
-
@staticmethod
|
|
171
|
-
def get_standard_stage_prefix(name: str) -> str:
|
|
172
|
-
# Handle embedded stages
|
|
173
|
-
if name.startswith("snow://") or name.startswith("@"):
|
|
174
|
-
return name
|
|
175
|
-
|
|
176
|
-
return f"@{name}"
|
|
177
|
-
|
|
178
|
-
@staticmethod
|
|
179
|
-
def get_stage_from_path(path: str):
|
|
180
|
-
"""
|
|
181
|
-
Returns stage name from potential path on stage. For example
|
|
182
|
-
db.schema.stage/foo/bar -> db.schema.stage
|
|
183
|
-
"""
|
|
184
|
-
return Path(path).parts[0]
|
|
185
|
-
|
|
186
|
-
@staticmethod
|
|
187
|
-
def quote_stage_name(name: str) -> str:
|
|
188
|
-
if name.startswith("'") and name.endswith("'"):
|
|
189
|
-
return name # already quoted
|
|
190
|
-
|
|
191
|
-
standard_name = StageManager.get_standard_stage_prefix(name)
|
|
192
|
-
if standard_name.startswith("@") and not re.fullmatch(
|
|
193
|
-
r"@([\w./$])+", standard_name
|
|
194
|
-
):
|
|
195
|
-
return to_string_literal(standard_name)
|
|
196
|
-
|
|
197
|
-
return standard_name
|
|
198
|
-
|
|
199
|
-
def _to_uri(self, local_path: str):
|
|
200
|
-
uri = f"file://{local_path}"
|
|
201
|
-
if re.fullmatch(UNQUOTED_FILE_URI_REGEX, uri):
|
|
202
|
-
return uri
|
|
203
|
-
return to_string_literal(uri)
|
|
204
|
-
|
|
205
|
-
def list_files(self, stage_name: str, pattern: str | None = None) -> DictCursor:
|
|
206
|
-
stage_name = self.get_standard_stage_prefix(stage_name)
|
|
207
|
-
query = f"ls {self.quote_stage_name(stage_name)}"
|
|
208
|
-
if pattern is not None:
|
|
209
|
-
query += f" pattern = '{pattern}'"
|
|
210
|
-
return self._execute_query(query, cursor_class=DictCursor)
|
|
211
|
-
|
|
212
|
-
@staticmethod
|
|
213
|
-
def _assure_is_existing_directory(path: Path) -> None:
|
|
214
|
-
spath = SecurePath(path)
|
|
215
|
-
if not spath.exists():
|
|
216
|
-
spath.mkdir(parents=True)
|
|
217
|
-
spath.assert_is_directory()
|
|
218
|
-
|
|
219
|
-
def get(
|
|
220
|
-
self, stage_path: str, dest_path: Path, parallel: int = 4
|
|
221
|
-
) -> SnowflakeCursor:
|
|
222
|
-
stage_path = self.get_standard_stage_prefix(stage_path)
|
|
223
|
-
self._assure_is_existing_directory(dest_path)
|
|
224
|
-
dest_directory = f"{dest_path}/"
|
|
225
|
-
return self._execute_query(
|
|
226
|
-
f"get {self.quote_stage_name(stage_path)} {self._to_uri(dest_directory)} parallel={parallel}"
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
def get_recursive(
|
|
230
|
-
self, stage_path: str, dest_path: Path, parallel: int = 4
|
|
231
|
-
) -> List[SnowflakeCursor]:
|
|
232
|
-
stage_path_parts = self._stage_path_part_factory(stage_path)
|
|
233
|
-
|
|
234
|
-
results = []
|
|
235
|
-
for file_path in self.iter_stage(stage_path):
|
|
236
|
-
dest_directory = dest_path
|
|
237
|
-
for path_part in stage_path_parts.get_directory_from_file_path(file_path):
|
|
238
|
-
dest_directory = dest_directory / path_part
|
|
239
|
-
self._assure_is_existing_directory(dest_directory)
|
|
240
|
-
|
|
241
|
-
result = self._execute_query(
|
|
242
|
-
f"get {self.quote_stage_name(stage_path_parts.add_stage_prefix(file_path))} {self._to_uri(f'{dest_directory}/')} parallel={parallel}"
|
|
243
|
-
)
|
|
244
|
-
results.append(result)
|
|
245
|
-
|
|
246
|
-
return results
|
|
247
|
-
|
|
248
|
-
def put(
|
|
249
|
-
self,
|
|
250
|
-
local_path: Union[str, Path],
|
|
251
|
-
stage_path: str,
|
|
252
|
-
parallel: int = 4,
|
|
253
|
-
overwrite: bool = False,
|
|
254
|
-
role: Optional[str] = None,
|
|
255
|
-
auto_compress: bool = False,
|
|
256
|
-
) -> SnowflakeCursor:
|
|
257
|
-
"""
|
|
258
|
-
This method will take a file path from the user's system and put it into a Snowflake stage,
|
|
259
|
-
which includes its fully qualified name as well as the path within the stage.
|
|
260
|
-
If provided with a role, then temporarily use this role to perform the operation above,
|
|
261
|
-
and switch back to the original role for the next commands to run.
|
|
262
|
-
"""
|
|
263
|
-
with self.use_role(role) if role else nullcontext():
|
|
264
|
-
stage_path = self.get_standard_stage_prefix(stage_path)
|
|
265
|
-
local_resolved_path = path_resolver(str(local_path))
|
|
266
|
-
log.info("Uploading %s to %s", local_resolved_path, stage_path)
|
|
267
|
-
cursor = self._execute_query(
|
|
268
|
-
f"put {self._to_uri(local_resolved_path)} {self.quote_stage_name(stage_path)} "
|
|
269
|
-
f"auto_compress={str(auto_compress).lower()} parallel={parallel} overwrite={overwrite}"
|
|
270
|
-
)
|
|
271
|
-
return cursor
|
|
272
|
-
|
|
273
|
-
def copy_files(self, source_path: str, destination_path: str) -> SnowflakeCursor:
|
|
274
|
-
source_path_parts = self._stage_path_part_factory(source_path)
|
|
275
|
-
destination_path_parts = self._stage_path_part_factory(destination_path)
|
|
276
|
-
|
|
277
|
-
if isinstance(destination_path_parts, UserStagePathParts):
|
|
278
|
-
raise ClickException(
|
|
279
|
-
"Destination path cannot be a user stage. Please provide a named stage."
|
|
280
|
-
)
|
|
281
|
-
|
|
282
|
-
source = source_path_parts.get_standard_stage_path()
|
|
283
|
-
destination = destination_path_parts.get_standard_stage_directory_path()
|
|
284
|
-
log.info("Copying files from %s to %s", source, destination)
|
|
285
|
-
query = f"copy files into {destination} from {source}"
|
|
286
|
-
return self._execute_query(query)
|
|
287
|
-
|
|
288
|
-
def remove(
|
|
289
|
-
self, stage_name: str, path: str, role: Optional[str] = None
|
|
290
|
-
) -> SnowflakeCursor:
|
|
291
|
-
"""
|
|
292
|
-
This method will take a file path that exists on a Snowflake stage,
|
|
293
|
-
and remove it from the stage.
|
|
294
|
-
If provided with a role, then temporarily use this role to perform the operation above,
|
|
295
|
-
and switch back to the original role for the next commands to run.
|
|
296
|
-
"""
|
|
297
|
-
with self.use_role(role) if role else nullcontext():
|
|
298
|
-
stage_name = self.get_standard_stage_prefix(stage_name)
|
|
299
|
-
path = path if path.startswith("/") else "/" + path
|
|
300
|
-
quoted_stage_name = self.quote_stage_name(f"{stage_name}{path}")
|
|
301
|
-
return self._execute_query(f"remove {quoted_stage_name}")
|
|
302
|
-
|
|
303
|
-
def create(self, stage_name: str, comment: Optional[str] = None) -> SnowflakeCursor:
|
|
304
|
-
query = f"create stage if not exists {stage_name}"
|
|
305
|
-
if comment:
|
|
306
|
-
query += f" comment='{comment}'"
|
|
307
|
-
return self._execute_query(query)
|
|
308
|
-
|
|
309
|
-
def iter_stage(self, stage_path: str):
|
|
310
|
-
for file in self.list_files(stage_path).fetchall():
|
|
311
|
-
yield file["name"]
|
|
312
|
-
|
|
313
|
-
def execute(
|
|
314
|
-
self,
|
|
315
|
-
stage_path: str,
|
|
316
|
-
on_error: OnErrorType,
|
|
317
|
-
variables: Optional[List[str]] = None,
|
|
318
|
-
):
|
|
319
|
-
stage_path_parts = self._stage_path_part_factory(stage_path)
|
|
320
|
-
all_files_list = self._get_files_list_from_stage(stage_path_parts)
|
|
321
|
-
|
|
322
|
-
# filter files from stage if match stage_path pattern
|
|
323
|
-
filtered_file_list = self._filter_files_list(stage_path_parts, all_files_list)
|
|
324
|
-
|
|
325
|
-
if not filtered_file_list:
|
|
326
|
-
raise ClickException(f"No files matched pattern '{stage_path}'")
|
|
327
|
-
|
|
328
|
-
# sort filtered files in alphabetical order with directories at the end
|
|
329
|
-
sorted_file_path_list = sorted(
|
|
330
|
-
filtered_file_list, key=lambda f: (path.dirname(f), path.basename(f))
|
|
331
|
-
)
|
|
332
|
-
|
|
333
|
-
parsed_variables = parse_key_value_variables(variables)
|
|
334
|
-
sql_variables = self._parse_execute_variables(parsed_variables)
|
|
335
|
-
python_variables = {str(v.key): v.value for v in parsed_variables}
|
|
336
|
-
results = []
|
|
337
|
-
|
|
338
|
-
if any(file.endswith(".py") for file in sorted_file_path_list):
|
|
339
|
-
self._python_exe_procedure = self._bootstrap_snowpark_execution_environment(
|
|
340
|
-
stage_path_parts
|
|
341
|
-
)
|
|
342
|
-
|
|
343
|
-
for file_path in sorted_file_path_list:
|
|
344
|
-
file_stage_path = stage_path_parts.add_stage_prefix(file_path)
|
|
345
|
-
if file_path.endswith(".py"):
|
|
346
|
-
result = self._execute_python(
|
|
347
|
-
file_stage_path=file_stage_path,
|
|
348
|
-
on_error=on_error,
|
|
349
|
-
variables=python_variables,
|
|
350
|
-
)
|
|
351
|
-
else:
|
|
352
|
-
result = self._call_execute_immediate(
|
|
353
|
-
file_stage_path=file_stage_path,
|
|
354
|
-
variables=sql_variables,
|
|
355
|
-
on_error=on_error,
|
|
356
|
-
)
|
|
357
|
-
results.append(result)
|
|
358
|
-
|
|
359
|
-
return results
|
|
360
|
-
|
|
361
|
-
def _get_files_list_from_stage(
|
|
362
|
-
self, stage_path_parts: StagePathParts, pattern: str | None = None
|
|
363
|
-
) -> List[str]:
|
|
364
|
-
files_list_result = self.list_files(
|
|
365
|
-
stage_path_parts.stage, pattern=pattern
|
|
366
|
-
).fetchall()
|
|
367
|
-
|
|
368
|
-
if not files_list_result:
|
|
369
|
-
raise ClickException(f"No files found on stage '{stage_path_parts.stage}'")
|
|
370
|
-
|
|
371
|
-
return [f["name"] for f in files_list_result]
|
|
372
|
-
|
|
373
|
-
def _filter_files_list(
|
|
374
|
-
self, stage_path_parts: StagePathParts, files_on_stage: List[str]
|
|
375
|
-
) -> List[str]:
|
|
376
|
-
if not stage_path_parts.directory:
|
|
377
|
-
return self._filter_supported_files(files_on_stage)
|
|
378
|
-
|
|
379
|
-
stage_path = stage_path_parts.path.lower()
|
|
380
|
-
|
|
381
|
-
# Exact file path was provided if stage_path in file list
|
|
382
|
-
if stage_path in files_on_stage:
|
|
383
|
-
filtered_files = self._filter_supported_files([stage_path])
|
|
384
|
-
if filtered_files:
|
|
385
|
-
return filtered_files
|
|
386
|
-
else:
|
|
387
|
-
raise ClickException(
|
|
388
|
-
f"Invalid file extension, only {', '.join(EXECUTE_SUPPORTED_FILES_FORMATS)} files are allowed."
|
|
389
|
-
)
|
|
390
|
-
# Filter with fnmatch if contains `*` or `?`
|
|
391
|
-
if glob.has_magic(stage_path):
|
|
392
|
-
filtered_files = fnmatch.filter(files_on_stage, stage_path)
|
|
393
|
-
else:
|
|
394
|
-
# Path to directory was provided
|
|
395
|
-
filtered_files = fnmatch.filter(files_on_stage, f"{stage_path}*")
|
|
396
|
-
return self._filter_supported_files(filtered_files)
|
|
397
|
-
|
|
398
|
-
@staticmethod
|
|
399
|
-
def _filter_supported_files(files: List[str]) -> List[str]:
|
|
400
|
-
return [f for f in files if Path(f).suffix in EXECUTE_SUPPORTED_FILES_FORMATS]
|
|
401
|
-
|
|
402
|
-
@staticmethod
|
|
403
|
-
def _parse_execute_variables(variables: List[Variable]) -> Optional[str]:
|
|
404
|
-
if not variables:
|
|
405
|
-
return None
|
|
406
|
-
query_parameters = [f"{v.key}=>{v.value}" for v in variables]
|
|
407
|
-
return f" using ({', '.join(query_parameters)})"
|
|
408
|
-
|
|
409
|
-
@staticmethod
|
|
410
|
-
def _success_result(file: str):
|
|
411
|
-
cli_console.warning(f"SUCCESS - {file}")
|
|
412
|
-
return {"File": file, "Status": "SUCCESS", "Error": None}
|
|
413
|
-
|
|
414
|
-
@staticmethod
|
|
415
|
-
def _error_result(file: str, msg: str):
|
|
416
|
-
cli_console.warning(f"FAILURE - {file}")
|
|
417
|
-
return {"File": file, "Status": "FAILURE", "Error": msg}
|
|
418
|
-
|
|
419
|
-
@staticmethod
|
|
420
|
-
def _handle_execution_exception(on_error: OnErrorType, exception: Exception):
|
|
421
|
-
if on_error == OnErrorType.BREAK:
|
|
422
|
-
raise exception
|
|
423
|
-
|
|
424
|
-
def _call_execute_immediate(
|
|
425
|
-
self,
|
|
426
|
-
file_stage_path: str,
|
|
427
|
-
variables: Optional[str],
|
|
428
|
-
on_error: OnErrorType,
|
|
429
|
-
) -> Dict:
|
|
430
|
-
try:
|
|
431
|
-
query = f"execute immediate from {file_stage_path}"
|
|
432
|
-
if variables:
|
|
433
|
-
query += variables
|
|
434
|
-
self._execute_query(query)
|
|
435
|
-
return StageManager._success_result(file=file_stage_path)
|
|
436
|
-
except ProgrammingError as e:
|
|
437
|
-
StageManager._handle_execution_exception(on_error=on_error, exception=e)
|
|
438
|
-
return StageManager._error_result(file=file_stage_path, msg=e.msg)
|
|
439
|
-
|
|
440
|
-
@staticmethod
|
|
441
|
-
def _stage_path_part_factory(stage_path: str) -> StagePathParts:
|
|
442
|
-
stage_path = StageManager.get_standard_stage_prefix(stage_path)
|
|
443
|
-
if stage_path.startswith(USER_STAGE_PREFIX):
|
|
444
|
-
return UserStagePathParts(stage_path)
|
|
445
|
-
return DefaultStagePathParts(stage_path)
|
|
446
|
-
|
|
447
|
-
def _check_for_requirements_file(
|
|
448
|
-
self, stage_path_parts: StagePathParts
|
|
449
|
-
) -> List[str]:
|
|
450
|
-
"""Looks for requirements.txt file on stage."""
|
|
451
|
-
req_files_on_stage = self._get_files_list_from_stage(
|
|
452
|
-
stage_path_parts, pattern=r".*requirements\.txt$"
|
|
453
|
-
)
|
|
454
|
-
if not req_files_on_stage:
|
|
455
|
-
return []
|
|
456
|
-
|
|
457
|
-
# Construct all possible path for requirements file for this context
|
|
458
|
-
# We don't use os.path or pathlib to preserve compatibility on Windows
|
|
459
|
-
req_file_name = "requirements.txt"
|
|
460
|
-
path_parts = stage_path_parts.path.split("/")
|
|
461
|
-
possible_req_files = []
|
|
462
|
-
|
|
463
|
-
while path_parts:
|
|
464
|
-
current_file = "/".join([*path_parts, req_file_name])
|
|
465
|
-
possible_req_files.append(str(current_file))
|
|
466
|
-
path_parts = path_parts[:-1]
|
|
467
|
-
|
|
468
|
-
# Now for every possible path check if the file exists on stage,
|
|
469
|
-
# if yes break, we use the first possible file
|
|
470
|
-
requirements_file = None
|
|
471
|
-
for req_file in possible_req_files:
|
|
472
|
-
if req_file in req_files_on_stage:
|
|
473
|
-
requirements_file = req_file
|
|
474
|
-
break
|
|
475
|
-
|
|
476
|
-
# If we haven't found any matching requirements
|
|
477
|
-
if requirements_file is None:
|
|
478
|
-
return []
|
|
479
|
-
|
|
480
|
-
# req_file at this moment is the first found requirements file
|
|
481
|
-
with SecurePath.temporary_directory() as tmp_dir:
|
|
482
|
-
self.get(
|
|
483
|
-
stage_path_parts.get_full_stage_path(requirements_file), tmp_dir.path
|
|
484
|
-
)
|
|
485
|
-
requirements = parse_requirements(
|
|
486
|
-
requirements_file=tmp_dir / "requirements.txt"
|
|
487
|
-
)
|
|
488
|
-
|
|
489
|
-
return [req.package_name for req in requirements]
|
|
490
|
-
|
|
491
|
-
def _bootstrap_snowpark_execution_environment(
|
|
492
|
-
self, stage_path_parts: StagePathParts
|
|
493
|
-
):
|
|
494
|
-
"""Prepares Snowpark session for executing Python code remotely."""
|
|
495
|
-
if sys.version_info >= PYTHON_3_12:
|
|
496
|
-
raise ClickException(
|
|
497
|
-
f"Executing python files is not supported in Python >= 3.12. Current version: {sys.version}"
|
|
498
|
-
)
|
|
499
|
-
|
|
500
|
-
from snowflake.snowpark.functions import sproc
|
|
501
|
-
|
|
502
|
-
self.snowpark_session.add_packages("snowflake-snowpark-python")
|
|
503
|
-
self.snowpark_session.add_packages("snowflake.core")
|
|
504
|
-
requirements = self._check_for_requirements_file(stage_path_parts)
|
|
505
|
-
self.snowpark_session.add_packages(*requirements)
|
|
506
|
-
|
|
507
|
-
@sproc(is_permanent=False)
|
|
508
|
-
def _python_execution_procedure(
|
|
509
|
-
_: Session, file_path: str, variables: Dict | None = None
|
|
510
|
-
) -> None:
|
|
511
|
-
"""Snowpark session-scoped stored procedure to execute content of provided python file."""
|
|
512
|
-
import json
|
|
513
|
-
|
|
514
|
-
from snowflake.snowpark.files import SnowflakeFile
|
|
515
|
-
|
|
516
|
-
with SnowflakeFile.open(file_path, require_scoped_url=False) as f:
|
|
517
|
-
file_content: str = f.read() # type: ignore
|
|
518
|
-
|
|
519
|
-
wrapper = dedent(
|
|
520
|
-
f"""\
|
|
521
|
-
import os
|
|
522
|
-
os.environ.update({json.dumps(variables)})
|
|
523
|
-
"""
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
exec(wrapper + file_content)
|
|
527
|
-
|
|
528
|
-
return _python_execution_procedure
|
|
529
|
-
|
|
530
|
-
def _execute_python(
|
|
531
|
-
self, file_stage_path: str, on_error: OnErrorType, variables: Dict
|
|
532
|
-
):
|
|
533
|
-
"""
|
|
534
|
-
Executes Python file from stage using a Snowpark temporary procedure.
|
|
535
|
-
Currently, there's no option to pass input to the execution.
|
|
536
|
-
"""
|
|
537
|
-
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
538
|
-
|
|
539
|
-
try:
|
|
540
|
-
self._python_exe_procedure(self.get_standard_stage_prefix(file_stage_path), variables) # type: ignore
|
|
541
|
-
return StageManager._success_result(file=file_stage_path)
|
|
542
|
-
except SnowparkSQLException as e:
|
|
543
|
-
StageManager._handle_execution_exception(on_error=on_error, exception=e)
|
|
544
|
-
return StageManager._error_result(file=file_stage_path, msg=e.message)
|
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2024 Snowflake Inc.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
import hashlib
|
|
18
|
-
import logging
|
|
19
|
-
import math
|
|
20
|
-
import os.path
|
|
21
|
-
import re
|
|
22
|
-
from pathlib import Path
|
|
23
|
-
from typing import List, Tuple
|
|
24
|
-
|
|
25
|
-
from click.exceptions import ClickException
|
|
26
|
-
from snowflake.cli.api.secure_path import UNLIMITED, SecurePath
|
|
27
|
-
from snowflake.connector.constants import S3_CHUNK_SIZE, S3_MAX_PARTS, S3_MIN_PART_SIZE
|
|
28
|
-
|
|
29
|
-
ONE_MEGABYTE = 1024**2
|
|
30
|
-
READ_BUFFER_BYTES = 64 * 1024
|
|
31
|
-
MD5SUM_REGEX = r"^[A-Fa-f0-9]{32}$"
|
|
32
|
-
MULTIPART_MD5SUM_REGEX = r"^([A-Fa-f0-9]{32})-(\d+)$"
|
|
33
|
-
|
|
34
|
-
log = logging.getLogger(__name__)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class UnknownMD5FormatError(ClickException):
|
|
38
|
-
def __init__(self, md5: str):
|
|
39
|
-
super().__init__(f"Unknown md5 format: {md5}")
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def is_md5sum(checksum: str) -> bool:
|
|
43
|
-
"""
|
|
44
|
-
Could the provided hexadecimal checksum represent a valid md5sum?
|
|
45
|
-
"""
|
|
46
|
-
return re.match(MD5SUM_REGEX, checksum) is not None
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def parse_multipart_md5sum(checksum: str) -> Tuple[str, int] | None:
|
|
50
|
-
"""
|
|
51
|
-
Does this represent a multi-part md5sum (i.e. "<md5>-<n>")?
|
|
52
|
-
If so, returns the tuple (md5, n), otherwise None.
|
|
53
|
-
"""
|
|
54
|
-
multipart_md5 = re.match(MULTIPART_MD5SUM_REGEX, checksum)
|
|
55
|
-
if multipart_md5:
|
|
56
|
-
return (multipart_md5.group(1), int(multipart_md5.group(2)))
|
|
57
|
-
return None
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def compute_md5sum(file: Path, chunk_size: int | None = None) -> str:
|
|
61
|
-
"""
|
|
62
|
-
Returns a hexadecimal checksum for the file located at the given path.
|
|
63
|
-
If chunk_size is given, computes a multi-part md5sum.
|
|
64
|
-
"""
|
|
65
|
-
if not file.is_file():
|
|
66
|
-
raise ValueError(
|
|
67
|
-
"The provided file does not exist or not a (symlink to a) regular file"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
# If the stage uses SNOWFLAKE_FULL encryption, this will fail to provide
|
|
71
|
-
# a matching md5sum, even when the underlying file is the same, as we do
|
|
72
|
-
# not have access to the encrypted file under checksum.
|
|
73
|
-
|
|
74
|
-
file_size = os.path.getsize(file)
|
|
75
|
-
if file_size == 0:
|
|
76
|
-
# simple md5 with no content
|
|
77
|
-
return hashlib.md5().hexdigest()
|
|
78
|
-
|
|
79
|
-
with SecurePath(file).open("rb", read_file_limit_mb=UNLIMITED) as f:
|
|
80
|
-
md5s: List[hashlib._Hash] = [] # noqa: SLF001
|
|
81
|
-
hasher = hashlib.md5()
|
|
82
|
-
|
|
83
|
-
remains = file_size
|
|
84
|
-
remains_in_chunk: int = min(chunk_size, remains) if chunk_size else remains
|
|
85
|
-
while remains > 0:
|
|
86
|
-
sz = min(READ_BUFFER_BYTES, remains_in_chunk)
|
|
87
|
-
buf = f.read(sz)
|
|
88
|
-
hasher.update(buf)
|
|
89
|
-
remains_in_chunk -= sz
|
|
90
|
-
remains -= sz
|
|
91
|
-
if remains_in_chunk == 0:
|
|
92
|
-
if not chunk_size:
|
|
93
|
-
# simple md5; only one chunk processed
|
|
94
|
-
return hasher.hexdigest()
|
|
95
|
-
else:
|
|
96
|
-
# push the hash of this chunk + reset
|
|
97
|
-
md5s.append(hasher)
|
|
98
|
-
hasher = hashlib.md5()
|
|
99
|
-
remains_in_chunk = min(chunk_size, remains)
|
|
100
|
-
|
|
101
|
-
# multi-part hash (e.g. aws)
|
|
102
|
-
digests = b"".join(m.digest() for m in md5s)
|
|
103
|
-
digests_md5 = hashlib.md5(digests)
|
|
104
|
-
return f"{digests_md5.hexdigest()}-{len(md5s)}"
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def file_matches_md5sum(local_file: Path, remote_md5: str | None) -> bool:
|
|
108
|
-
"""
|
|
109
|
-
Try a few different md5sums to determine if a local file is identical
|
|
110
|
-
to a file that has a given remote md5sum.
|
|
111
|
-
|
|
112
|
-
Handles the multi-part md5sums generated by e.g. AWS S3, using values
|
|
113
|
-
from the python connector to make educated guesses on chunk size.
|
|
114
|
-
|
|
115
|
-
Assumes that upload time would dominate local hashing time.
|
|
116
|
-
"""
|
|
117
|
-
if not remote_md5:
|
|
118
|
-
# no hash available
|
|
119
|
-
return False
|
|
120
|
-
|
|
121
|
-
if is_md5sum(remote_md5):
|
|
122
|
-
# regular hash
|
|
123
|
-
return compute_md5sum(local_file) == remote_md5
|
|
124
|
-
|
|
125
|
-
if md5_and_chunks := parse_multipart_md5sum(remote_md5):
|
|
126
|
-
# multi-part hash (e.g. aws)
|
|
127
|
-
(_, num_chunks) = md5_and_chunks
|
|
128
|
-
file_size = os.path.getsize(local_file)
|
|
129
|
-
|
|
130
|
-
# If this file uses the maximum number of parts supported by the cloud backend,
|
|
131
|
-
# the chunk size is likely not a clean multiple of a megabyte. Try reverse engineering
|
|
132
|
-
# from the file size first, then fall back to the usual detection method.
|
|
133
|
-
# At time of writing this logic would trigger for files >= 80GiB (python connector)
|
|
134
|
-
if num_chunks == S3_MAX_PARTS:
|
|
135
|
-
chunk_size = max(math.ceil(file_size / S3_MAX_PARTS), S3_MIN_PART_SIZE)
|
|
136
|
-
if compute_md5sum(local_file, chunk_size) == remote_md5:
|
|
137
|
-
return True
|
|
138
|
-
|
|
139
|
-
# Estimates the chunk size the multi-part file must have been uploaded with
|
|
140
|
-
# by trying chunk sizes that give the most evenly-sized chunks.
|
|
141
|
-
#
|
|
142
|
-
# First we'll try the chunk size that's a multiple of S3_CHUNK_SIZE (8mb) from
|
|
143
|
-
# the python connector that results in num_chunks, then we'll do the same with
|
|
144
|
-
# a smaller granularity (1mb) that is used by default in some AWS multi-part
|
|
145
|
-
# upload implementations.
|
|
146
|
-
#
|
|
147
|
-
# We're working backwards from num_chunks here because it's the only value we know.
|
|
148
|
-
for chunk_size_alignment in [S3_CHUNK_SIZE, ONE_MEGABYTE]:
|
|
149
|
-
# +1 because we need at least one chunk when file_size < num_chunks * chunk_size_alignment
|
|
150
|
-
# -1 because we don't want to add an extra chunk when file_size is an exact multiple of num_chunks * chunk_size_alignment
|
|
151
|
-
multiplier = 1 + ((file_size - 1) // (num_chunks * chunk_size_alignment))
|
|
152
|
-
chunk_size = multiplier * chunk_size_alignment
|
|
153
|
-
if compute_md5sum(local_file, chunk_size) == remote_md5:
|
|
154
|
-
return True
|
|
155
|
-
|
|
156
|
-
# we were unable to figure out the chunk size, or the files are different
|
|
157
|
-
log.debug("multi-part md5: %s != %s", remote_md5, local_file)
|
|
158
|
-
return False
|
|
159
|
-
|
|
160
|
-
raise UnknownMD5FormatError(remote_md5)
|