snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +72 -31
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
- snowflake/ml/_internal/telemetry.py +1 -0
- snowflake/ml/_internal/utils/identifier.py +1 -1
- snowflake/ml/_internal/utils/sql_identifier.py +14 -1
- snowflake/ml/dataset/__init__.py +11 -0
- snowflake/ml/dataset/dataset.py +455 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +199 -0
- snowflake/ml/feature_store/__init__.py +6 -0
- snowflake/ml/feature_store/access_manager.py +279 -0
- snowflake/ml/feature_store/feature_store.py +544 -358
- snowflake/ml/feature_store/feature_view.py +55 -16
- snowflake/ml/fileset/embedded_stage_fs.py +149 -0
- snowflake/ml/fileset/sfcfs.py +0 -4
- snowflake/ml/fileset/snowfs.py +160 -0
- snowflake/ml/fileset/stage_fs.py +25 -10
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +65 -31
- snowflake/ml/model/_client/model/model_version_impl.py +159 -2
- snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
- snowflake/ml/model/_client/ops/model_ops.py +268 -83
- snowflake/ml/model/_client/sql/_base.py +34 -0
- snowflake/ml/model/_client/sql/model.py +42 -47
- snowflake/ml/model/_client/sql/model_version.py +164 -39
- snowflake/ml/model/_client/sql/stage.py +6 -32
- snowflake/ml/model/_client/sql/tag.py +32 -56
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
- snowflake/ml/model/_packager/model_packager.py +0 -3
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +50 -21
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +340 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
- snowflake/ml/modeling/cluster/birch.py +53 -52
- snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
- snowflake/ml/modeling/cluster/dbscan.py +51 -52
- snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
- snowflake/ml/modeling/cluster/k_means.py +53 -52
- snowflake/ml/modeling/cluster/mean_shift.py +51 -52
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
- snowflake/ml/modeling/cluster/optics.py +51 -52
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
- snowflake/ml/modeling/compose/column_transformer.py +53 -52
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
- snowflake/ml/modeling/covariance/oas.py +51 -52
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
- snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
- snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
- snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
- snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
- snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
- snowflake/ml/modeling/decomposition/pca.py +53 -52
- snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
- snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
- snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
- snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
- snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
- snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
- snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
- snowflake/ml/modeling/framework/base.py +64 -36
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
- snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
- snowflake/ml/modeling/impute/knn_imputer.py +53 -52
- snowflake/ml/modeling/impute/missing_indicator.py +53 -52
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
- snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/lars.py +51 -52
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/perceptron.py +51 -52
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ridge.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
- snowflake/ml/modeling/manifold/isomap.py +53 -52
- snowflake/ml/modeling/manifold/mds.py +53 -52
- snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
- snowflake/ml/modeling/manifold/tsne.py +53 -52
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
- snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
- snowflake/ml/modeling/pipeline/pipeline.py +538 -36
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
- snowflake/ml/modeling/svm/linear_svc.py +51 -52
- snowflake/ml/modeling/svm/linear_svr.py +51 -52
- snowflake/ml/modeling/svm/nu_svc.py +51 -52
- snowflake/ml/modeling/svm/nu_svr.py +51 -52
- snowflake/ml/modeling/svm/svc.py +51 -52
- snowflake/ml/modeling/svm/svr.py +51 -52
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
- snowflake/ml/registry/_manager/model_manager.py +36 -7
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/METADATA +112 -7
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/RECORD +216 -206
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/top_level.txt +0 -0
@@ -13,10 +13,6 @@ from packaging import requirements, specifiers, utils as packaging_utils, versio
|
|
13
13
|
|
14
14
|
import snowflake.connector
|
15
15
|
from snowflake.ml._internal import env as snowml_env
|
16
|
-
from snowflake.ml._internal.exceptions import (
|
17
|
-
error_codes,
|
18
|
-
exceptions as snowml_exceptions,
|
19
|
-
)
|
20
16
|
from snowflake.ml._internal.utils import query_result_checker
|
21
17
|
from snowflake.snowpark import context, exceptions, session
|
22
18
|
from snowflake.snowpark._internal import utils as snowpark_utils
|
@@ -237,6 +233,72 @@ def get_local_installed_version_of_pip_package(pip_req: requirements.Requirement
|
|
237
233
|
return new_pip_req
|
238
234
|
|
239
235
|
|
236
|
+
class IncorrectLocalEnvironmentError(Exception):
|
237
|
+
...
|
238
|
+
|
239
|
+
|
240
|
+
def validate_local_installed_version_of_pip_package(pip_req: requirements.Requirement) -> None:
|
241
|
+
"""Validate if the package is locally installed, and the local version meet the specifier of the requirements.
|
242
|
+
|
243
|
+
Args:
|
244
|
+
pip_req: A requirements.Requirement object showing the requirement.
|
245
|
+
|
246
|
+
Raises:
|
247
|
+
IncorrectLocalEnvironmentError: Raised when cannot find the local installation of the requested package.
|
248
|
+
IncorrectLocalEnvironmentError: Raised when the local installed version cannot meet the requirement.
|
249
|
+
"""
|
250
|
+
try:
|
251
|
+
local_dist = importlib_metadata.distribution(pip_req.name)
|
252
|
+
local_dist_version = version.parse(local_dist.version)
|
253
|
+
except importlib_metadata.PackageNotFoundError:
|
254
|
+
raise IncorrectLocalEnvironmentError(f"Cannot find the local installation of the requested package {pip_req}.")
|
255
|
+
|
256
|
+
if not pip_req.specifier.contains(local_dist_version):
|
257
|
+
raise IncorrectLocalEnvironmentError(
|
258
|
+
f"The local installed version {local_dist_version} cannot meet the requirement {pip_req}."
|
259
|
+
)
|
260
|
+
|
261
|
+
|
262
|
+
CONDA_PKG_NAME_TO_PYPI_MAP = {"pytorch": "torch"}
|
263
|
+
|
264
|
+
|
265
|
+
def try_convert_conda_requirement_to_pip(conda_req: requirements.Requirement) -> requirements.Requirement:
|
266
|
+
"""Return a new requirements.Requirement object whose name has been attempted to convert to name in pypi from conda.
|
267
|
+
|
268
|
+
Args:
|
269
|
+
conda_req: A requirements.Requirement object showing the requirement in conda.
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
A new requirements.Requirement object showing the requirement in pypi.
|
273
|
+
"""
|
274
|
+
pip_req = copy.deepcopy(conda_req)
|
275
|
+
pip_req.name = CONDA_PKG_NAME_TO_PYPI_MAP.get(conda_req.name, conda_req.name)
|
276
|
+
return pip_req
|
277
|
+
|
278
|
+
|
279
|
+
def validate_py_runtime_version(provided_py_version_str: str) -> None:
|
280
|
+
"""Validate the provided python version string with python version in current runtime.
|
281
|
+
If the major or minor is different, errors out.
|
282
|
+
|
283
|
+
Args:
|
284
|
+
provided_py_version_str: the provided python version string.
|
285
|
+
|
286
|
+
Raises:
|
287
|
+
IncorrectLocalEnvironmentError: Raised when the provided python version has different major or minor.
|
288
|
+
"""
|
289
|
+
if provided_py_version_str != snowml_env.PYTHON_VERSION:
|
290
|
+
provided_py_version = version.parse(provided_py_version_str)
|
291
|
+
current_py_version = version.parse(snowml_env.PYTHON_VERSION)
|
292
|
+
if (
|
293
|
+
provided_py_version.major != current_py_version.major
|
294
|
+
or provided_py_version.minor != current_py_version.minor
|
295
|
+
):
|
296
|
+
raise IncorrectLocalEnvironmentError(
|
297
|
+
f"Requested python version is {provided_py_version_str} "
|
298
|
+
f"while current Python version is {snowml_env.PYTHON_VERSION}. "
|
299
|
+
)
|
300
|
+
|
301
|
+
|
240
302
|
def get_package_spec_with_supported_ops_only(req: requirements.Requirement) -> requirements.Requirement:
|
241
303
|
"""Get the package spec with supported ops only including ==, >=, <=, > and <
|
242
304
|
|
@@ -491,6 +553,9 @@ def load_conda_env_file(
|
|
491
553
|
A tuple of Dict of conda dependencies after validated, optional pip requirements if exist
|
492
554
|
and a string 'major.minor.patchlevel' of python version.
|
493
555
|
"""
|
556
|
+
if not path.exists():
|
557
|
+
return collections.defaultdict(list), None, None
|
558
|
+
|
494
559
|
with open(path, encoding="utf-8") as f:
|
495
560
|
env = yaml.safe_load(stream=f)
|
496
561
|
|
@@ -541,6 +606,9 @@ def load_requirements_file(path: pathlib.Path) -> List[requirements.Requirement]
|
|
541
606
|
Returns:
|
542
607
|
List of dependencies string after validated.
|
543
608
|
"""
|
609
|
+
if not path.exists():
|
610
|
+
return []
|
611
|
+
|
544
612
|
with open(path, encoding="utf-8") as f:
|
545
613
|
reqs = f.readlines()
|
546
614
|
|
@@ -568,33 +636,6 @@ def parse_python_version_string(dep: str) -> Optional[str]:
|
|
568
636
|
return None
|
569
637
|
|
570
638
|
|
571
|
-
def validate_py_runtime_version(provided_py_version_str: str) -> None:
|
572
|
-
"""Validate the provided python version string with python version in current runtime.
|
573
|
-
If the major or minor is different, errors out.
|
574
|
-
|
575
|
-
Args:
|
576
|
-
provided_py_version_str: the provided python version string.
|
577
|
-
|
578
|
-
Raises:
|
579
|
-
SnowflakeMLException: Raised when the provided python version has different major or minor.
|
580
|
-
"""
|
581
|
-
if provided_py_version_str != snowml_env.PYTHON_VERSION:
|
582
|
-
provided_py_version = version.parse(provided_py_version_str)
|
583
|
-
current_py_version = version.parse(snowml_env.PYTHON_VERSION)
|
584
|
-
if (
|
585
|
-
provided_py_version.major != current_py_version.major
|
586
|
-
or provided_py_version.minor != current_py_version.minor
|
587
|
-
):
|
588
|
-
raise snowml_exceptions.SnowflakeMLException(
|
589
|
-
error_code=error_codes.LOCAL_ENVIRONMENT_ERROR,
|
590
|
-
original_exception=RuntimeError(
|
591
|
-
f"Unable to load model which is saved with Python {provided_py_version_str} "
|
592
|
-
f"while current Python version is {snowml_env.PYTHON_VERSION}. "
|
593
|
-
"To load model metadata only, set meta_only to True."
|
594
|
-
),
|
595
|
-
)
|
596
|
-
|
597
|
-
|
598
639
|
def _find_conda_dep_spec(
|
599
640
|
conda_chan_deps: DefaultDict[str, List[requirements.Requirement]], pkg_name: str
|
600
641
|
) -> Optional[Tuple[str, requirements.Requirement]]:
|
@@ -0,0 +1,5 @@
|
|
1
|
+
DATASET_ALREADY_EXISTS = "Dataset {} already exists."
|
2
|
+
DATASET_VERSION_ALREADY_EXISTS = "Dataset {} version {} already exists."
|
3
|
+
|
4
|
+
DATASET_NOT_EXIST = "Dataset {} does not exist or is inaccessible."
|
5
|
+
DATASET_VERSION_NOT_EXIST = "Dataset {} version '{}' does not exist or is inaccessible."
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Error code from Snowflake Python Connector.
|
2
|
+
ERRNO_OBJECT_ALREADY_EXISTS = "002002"
|
3
|
+
ERRNO_OBJECT_NOT_EXIST = "002043"
|
4
|
+
ERRNO_FILES_ALREADY_EXISTING = "001030"
|
5
|
+
ERRNO_VERSION_ALREADY_EXISTS = "092917"
|
6
|
+
ERRNO_DATASET_NOT_EXIST = "399019"
|
7
|
+
ERRNO_DATASET_VERSION_NOT_EXIST = "399012"
|
8
|
+
ERRNO_DATASET_VERSION_ALREADY_EXISTS = "399020"
|
9
|
+
|
10
|
+
|
11
|
+
class DatasetError(Exception):
|
12
|
+
"""Base class for other exceptions."""
|
13
|
+
|
14
|
+
|
15
|
+
class DatasetNotExistError(DatasetError):
|
16
|
+
"""Raised when the requested Dataset does not exist."""
|
17
|
+
|
18
|
+
|
19
|
+
class DatasetExistError(DatasetError):
|
20
|
+
"""Raised when there is already an existing Dataset with the same name and version in selected schema."""
|
21
|
+
|
22
|
+
|
23
|
+
class DatasetCannotDeleteError(DatasetError):
|
24
|
+
"""Raised when a Dataset is unable to get deleted."""
|
@@ -105,3 +105,6 @@ UNFEASIBLE_ENVIRONMENT_ERROR = "2502"
|
|
105
105
|
|
106
106
|
# Missing required client side dependency.
|
107
107
|
CLIENT_DEPENDENCY_MISSING_ERROR = "2511"
|
108
|
+
|
109
|
+
# Current client side snowpark-ml-python version is outdated and may have forward compatibility issue
|
110
|
+
SNOWML_PACKAGE_OUTDATED = "2700"
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import copy
|
2
|
+
import functools
|
3
|
+
from typing import Any, Callable, List
|
4
|
+
|
5
|
+
from snowflake import snowpark
|
6
|
+
from snowflake.ml._internal.lineage import data_source
|
7
|
+
|
8
|
+
DATA_SOURCES_ATTR = "_data_sources"
|
9
|
+
|
10
|
+
|
11
|
+
def _get_datasources(*args: Any) -> List[data_source.DataSource]:
|
12
|
+
"""Helper method for extracting data sources attribute from DataFrames in an argument list"""
|
13
|
+
result = []
|
14
|
+
for arg in args:
|
15
|
+
srcs = getattr(arg, DATA_SOURCES_ATTR, None)
|
16
|
+
if isinstance(srcs, list) and all(isinstance(s, data_source.DataSource) for s in srcs):
|
17
|
+
result += srcs
|
18
|
+
return result
|
19
|
+
|
20
|
+
|
21
|
+
def _wrap_func(
|
22
|
+
fn: Callable[..., snowpark.DataFrame], data_sources: List[data_source.DataSource]
|
23
|
+
) -> Callable[..., snowpark.DataFrame]:
|
24
|
+
"""Wrap a DataFrame transform function to propagate data_sources to derived DataFrames."""
|
25
|
+
|
26
|
+
@functools.wraps(fn)
|
27
|
+
def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
|
28
|
+
df = fn(*args, **kwargs)
|
29
|
+
patch_dataframe(df, data_sources=data_sources, inplace=True)
|
30
|
+
return df
|
31
|
+
|
32
|
+
return wrapped
|
33
|
+
|
34
|
+
|
35
|
+
def patch_dataframe(
|
36
|
+
df: snowpark.DataFrame, data_sources: List[data_source.DataSource], inplace: bool = False
|
37
|
+
) -> snowpark.DataFrame:
|
38
|
+
"""
|
39
|
+
Monkey patch a DataFrame to add attach the provided data_sources as an attribute of the DataFrame.
|
40
|
+
Also patches the DataFrame's transformation functions to propagate the new data sources attribute to
|
41
|
+
derived DataFrames.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
df: DataFrame to be patched
|
45
|
+
data_sources: List of data sources for the DataFrame
|
46
|
+
inplace: If True, patches to DataFrame in-place. If False, creates a shallow copy of the DataFrame.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
Patched DataFrame
|
50
|
+
"""
|
51
|
+
# Instance-level monkey-patches
|
52
|
+
funcs = [
|
53
|
+
"_with_plan",
|
54
|
+
"_lateral",
|
55
|
+
"group_by",
|
56
|
+
"group_by_grouping_sets",
|
57
|
+
"cube",
|
58
|
+
"pivot",
|
59
|
+
"rollup",
|
60
|
+
"cache_result",
|
61
|
+
"_to_df", # RelationalGroupedDataFrame
|
62
|
+
]
|
63
|
+
if not inplace:
|
64
|
+
df = copy.copy(df)
|
65
|
+
setattr(df, DATA_SOURCES_ATTR, data_sources)
|
66
|
+
for func in funcs:
|
67
|
+
fn = getattr(df, func, None)
|
68
|
+
if fn is not None:
|
69
|
+
setattr(df, func, _wrap_func(fn, data_sources=data_sources))
|
70
|
+
return df
|
71
|
+
|
72
|
+
|
73
|
+
def _wrap_class_func(fn: Callable[..., snowpark.DataFrame]) -> Callable[..., snowpark.DataFrame]:
|
74
|
+
@functools.wraps(fn)
|
75
|
+
def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
|
76
|
+
df = fn(*args, **kwargs)
|
77
|
+
data_sources = _get_datasources(*args) + _get_datasources(*kwargs.values())
|
78
|
+
if data_sources:
|
79
|
+
patch_dataframe(df, data_sources, inplace=True)
|
80
|
+
return df
|
81
|
+
|
82
|
+
return wrapped
|
83
|
+
|
84
|
+
|
85
|
+
# Class-level monkey-patches
|
86
|
+
for klass, func_list in {
|
87
|
+
snowpark.DataFrame: [
|
88
|
+
"__copy__",
|
89
|
+
],
|
90
|
+
snowpark.RelationalGroupedDataFrame: [],
|
91
|
+
}.items():
|
92
|
+
assert isinstance(func_list, list) # mypy
|
93
|
+
for func in func_list:
|
94
|
+
fn = getattr(klass, func)
|
95
|
+
setattr(klass, func, _wrap_class_func(fn))
|
@@ -50,6 +50,7 @@ class TelemetryField(enum.Enum):
|
|
50
50
|
# types of telemetry
|
51
51
|
TYPE_FUNCTION_USAGE = "function_usage"
|
52
52
|
TYPE_SNOWML_SPCS_USAGE = "snowml_spcs_usage"
|
53
|
+
TYPE_SNOWML_PIPELINE_USAGE = "snowml_pipeline_usage"
|
53
54
|
# message keys for telemetry
|
54
55
|
KEY_PROJECT = "project"
|
55
56
|
KEY_SUBPROJECT = "subproject"
|
@@ -156,7 +156,7 @@ def parse_schema_level_object_identifier(
|
|
156
156
|
"""
|
157
157
|
res = _SF_SCHEMA_LEVEL_OBJECT_RE.fullmatch(path)
|
158
158
|
if not res:
|
159
|
-
raise ValueError(f"Invalid identifier. It should start with database.schema.
|
159
|
+
raise ValueError(f"Invalid identifier. It should start with database.schema.object. Getting {path}")
|
160
160
|
return (
|
161
161
|
res.group("db"),
|
162
162
|
res.group("schema"),
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List
|
1
|
+
from typing import List, Optional, Tuple
|
2
2
|
|
3
3
|
from snowflake.ml._internal.utils import identifier
|
4
4
|
|
@@ -79,3 +79,16 @@ class SqlIdentifier(str):
|
|
79
79
|
|
80
80
|
def to_sql_identifiers(list_of_str: List[str], *, case_sensitive: bool = False) -> List[SqlIdentifier]:
|
81
81
|
return [SqlIdentifier(val, case_sensitive=case_sensitive) for val in list_of_str]
|
82
|
+
|
83
|
+
|
84
|
+
def parse_fully_qualified_name(
|
85
|
+
name: str,
|
86
|
+
) -> Tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
|
87
|
+
db, schema, object, _ = identifier.parse_schema_level_object_identifier(name)
|
88
|
+
|
89
|
+
assert name is not None, f"Unable parse the input name `{name}` as fully qualified."
|
90
|
+
return (
|
91
|
+
SqlIdentifier(db) if db else None,
|
92
|
+
SqlIdentifier(schema) if schema else None,
|
93
|
+
SqlIdentifier(object),
|
94
|
+
)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from .dataset import Dataset, DatasetVersion
|
2
|
+
from .dataset_factory import create_from_dataframe, load_dataset
|
3
|
+
from .dataset_reader import DatasetReader
|
4
|
+
|
5
|
+
__all__ = [
|
6
|
+
"Dataset",
|
7
|
+
"DatasetVersion",
|
8
|
+
"DatasetReader",
|
9
|
+
"create_from_dataframe",
|
10
|
+
"load_dataset",
|
11
|
+
]
|