snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. snowflake/ml/_internal/env_utils.py +72 -31
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  7. snowflake/ml/_internal/telemetry.py +1 -0
  8. snowflake/ml/_internal/utils/identifier.py +1 -1
  9. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  10. snowflake/ml/dataset/__init__.py +11 -0
  11. snowflake/ml/dataset/dataset.py +455 -129
  12. snowflake/ml/dataset/dataset_factory.py +53 -0
  13. snowflake/ml/dataset/dataset_metadata.py +103 -0
  14. snowflake/ml/dataset/dataset_reader.py +199 -0
  15. snowflake/ml/feature_store/__init__.py +6 -0
  16. snowflake/ml/feature_store/access_manager.py +279 -0
  17. snowflake/ml/feature_store/feature_store.py +544 -358
  18. snowflake/ml/feature_store/feature_view.py +55 -16
  19. snowflake/ml/fileset/embedded_stage_fs.py +149 -0
  20. snowflake/ml/fileset/sfcfs.py +0 -4
  21. snowflake/ml/fileset/snowfs.py +160 -0
  22. snowflake/ml/fileset/stage_fs.py +25 -10
  23. snowflake/ml/model/__init__.py +2 -2
  24. snowflake/ml/model/_api.py +16 -1
  25. snowflake/ml/model/_client/model/model_impl.py +65 -31
  26. snowflake/ml/model/_client/model/model_version_impl.py +159 -2
  27. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  28. snowflake/ml/model/_client/ops/model_ops.py +268 -83
  29. snowflake/ml/model/_client/sql/_base.py +34 -0
  30. snowflake/ml/model/_client/sql/model.py +42 -47
  31. snowflake/ml/model/_client/sql/model_version.py +164 -39
  32. snowflake/ml/model/_client/sql/stage.py +6 -32
  33. snowflake/ml/model/_client/sql/tag.py +32 -56
  34. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  35. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  36. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  37. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  38. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  39. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  40. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
  41. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
  42. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  43. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  44. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
  45. snowflake/ml/model/_packager/model_packager.py +0 -3
  46. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  47. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  48. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  49. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  50. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +50 -21
  51. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
  52. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +340 -17
  53. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
  54. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
  55. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
  56. snowflake/ml/modeling/cluster/birch.py +53 -52
  57. snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
  58. snowflake/ml/modeling/cluster/dbscan.py +51 -52
  59. snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
  60. snowflake/ml/modeling/cluster/k_means.py +53 -52
  61. snowflake/ml/modeling/cluster/mean_shift.py +51 -52
  62. snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
  63. snowflake/ml/modeling/cluster/optics.py +51 -52
  64. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
  65. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
  66. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
  67. snowflake/ml/modeling/compose/column_transformer.py +53 -52
  68. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
  69. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
  70. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
  71. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
  72. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
  73. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
  74. snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
  75. snowflake/ml/modeling/covariance/oas.py +51 -52
  76. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
  77. snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
  78. snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
  79. snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
  80. snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
  81. snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
  82. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
  83. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
  84. snowflake/ml/modeling/decomposition/pca.py +53 -52
  85. snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
  86. snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
  87. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
  88. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
  89. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
  90. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
  91. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
  92. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
  93. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
  94. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
  95. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
  96. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
  97. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
  98. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
  99. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
  100. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
  101. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
  102. snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
  103. snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
  104. snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
  105. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
  106. snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
  107. snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
  108. snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
  109. snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
  110. snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
  111. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
  112. snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
  113. snowflake/ml/modeling/framework/base.py +64 -36
  114. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
  115. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
  116. snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
  117. snowflake/ml/modeling/impute/knn_imputer.py +53 -52
  118. snowflake/ml/modeling/impute/missing_indicator.py +53 -52
  119. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
  120. snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
  121. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
  122. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
  123. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
  124. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
  125. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
  126. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
  127. snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
  128. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
  129. snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
  130. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
  131. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
  132. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
  133. snowflake/ml/modeling/linear_model/lars.py +51 -52
  134. snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
  135. snowflake/ml/modeling/linear_model/lasso.py +51 -52
  136. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
  137. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
  138. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
  139. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
  140. snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
  141. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
  142. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
  143. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
  144. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
  145. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
  146. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
  147. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
  148. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
  149. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
  150. snowflake/ml/modeling/linear_model/perceptron.py +51 -52
  151. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
  152. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
  153. snowflake/ml/modeling/linear_model/ridge.py +51 -52
  154. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
  155. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
  156. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
  157. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
  158. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
  159. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
  160. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
  161. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
  162. snowflake/ml/modeling/manifold/isomap.py +53 -52
  163. snowflake/ml/modeling/manifold/mds.py +53 -52
  164. snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
  165. snowflake/ml/modeling/manifold/tsne.py +53 -52
  166. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
  167. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
  168. snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
  169. snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
  170. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
  171. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
  172. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
  173. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
  174. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
  175. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
  176. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
  177. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
  178. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
  179. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
  180. snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
  181. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
  182. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
  183. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
  184. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
  185. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
  186. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
  187. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
  188. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
  189. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
  190. snowflake/ml/modeling/pipeline/pipeline.py +538 -36
  191. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
  192. snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
  193. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
  194. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
  195. snowflake/ml/modeling/svm/linear_svc.py +51 -52
  196. snowflake/ml/modeling/svm/linear_svr.py +51 -52
  197. snowflake/ml/modeling/svm/nu_svc.py +51 -52
  198. snowflake/ml/modeling/svm/nu_svr.py +51 -52
  199. snowflake/ml/modeling/svm/svc.py +51 -52
  200. snowflake/ml/modeling/svm/svr.py +51 -52
  201. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
  202. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
  203. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
  204. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
  205. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
  206. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
  207. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
  208. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
  209. snowflake/ml/registry/_manager/model_manager.py +36 -7
  210. snowflake/ml/registry/model_registry.py +3 -149
  211. snowflake/ml/version.py +1 -1
  212. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/METADATA +112 -7
  213. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/RECORD +216 -206
  214. snowflake/ml/registry/_artifact_manager.py +0 -156
  215. snowflake/ml/registry/artifact.py +0 -46
  216. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/LICENSE.txt +0 -0
  217. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/WHEEL +0 -0
  218. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/top_level.txt +0 -0
@@ -13,10 +13,6 @@ from packaging import requirements, specifiers, utils as packaging_utils, versio
13
13
 
14
14
  import snowflake.connector
15
15
  from snowflake.ml._internal import env as snowml_env
16
- from snowflake.ml._internal.exceptions import (
17
- error_codes,
18
- exceptions as snowml_exceptions,
19
- )
20
16
  from snowflake.ml._internal.utils import query_result_checker
21
17
  from snowflake.snowpark import context, exceptions, session
22
18
  from snowflake.snowpark._internal import utils as snowpark_utils
@@ -237,6 +233,72 @@ def get_local_installed_version_of_pip_package(pip_req: requirements.Requirement
237
233
  return new_pip_req
238
234
 
239
235
 
236
+ class IncorrectLocalEnvironmentError(Exception):
237
+ ...
238
+
239
+
240
+ def validate_local_installed_version_of_pip_package(pip_req: requirements.Requirement) -> None:
241
+ """Validate if the package is locally installed, and the local version meet the specifier of the requirements.
242
+
243
+ Args:
244
+ pip_req: A requirements.Requirement object showing the requirement.
245
+
246
+ Raises:
247
+ IncorrectLocalEnvironmentError: Raised when cannot find the local installation of the requested package.
248
+ IncorrectLocalEnvironmentError: Raised when the local installed version cannot meet the requirement.
249
+ """
250
+ try:
251
+ local_dist = importlib_metadata.distribution(pip_req.name)
252
+ local_dist_version = version.parse(local_dist.version)
253
+ except importlib_metadata.PackageNotFoundError:
254
+ raise IncorrectLocalEnvironmentError(f"Cannot find the local installation of the requested package {pip_req}.")
255
+
256
+ if not pip_req.specifier.contains(local_dist_version):
257
+ raise IncorrectLocalEnvironmentError(
258
+ f"The local installed version {local_dist_version} cannot meet the requirement {pip_req}."
259
+ )
260
+
261
+
262
+ CONDA_PKG_NAME_TO_PYPI_MAP = {"pytorch": "torch"}
263
+
264
+
265
+ def try_convert_conda_requirement_to_pip(conda_req: requirements.Requirement) -> requirements.Requirement:
266
+ """Return a new requirements.Requirement object whose name has been attempted to convert to name in pypi from conda.
267
+
268
+ Args:
269
+ conda_req: A requirements.Requirement object showing the requirement in conda.
270
+
271
+ Returns:
272
+ A new requirements.Requirement object showing the requirement in pypi.
273
+ """
274
+ pip_req = copy.deepcopy(conda_req)
275
+ pip_req.name = CONDA_PKG_NAME_TO_PYPI_MAP.get(conda_req.name, conda_req.name)
276
+ return pip_req
277
+
278
+
279
+ def validate_py_runtime_version(provided_py_version_str: str) -> None:
280
+ """Validate the provided python version string with python version in current runtime.
281
+ If the major or minor is different, errors out.
282
+
283
+ Args:
284
+ provided_py_version_str: the provided python version string.
285
+
286
+ Raises:
287
+ IncorrectLocalEnvironmentError: Raised when the provided python version has different major or minor.
288
+ """
289
+ if provided_py_version_str != snowml_env.PYTHON_VERSION:
290
+ provided_py_version = version.parse(provided_py_version_str)
291
+ current_py_version = version.parse(snowml_env.PYTHON_VERSION)
292
+ if (
293
+ provided_py_version.major != current_py_version.major
294
+ or provided_py_version.minor != current_py_version.minor
295
+ ):
296
+ raise IncorrectLocalEnvironmentError(
297
+ f"Requested python version is {provided_py_version_str} "
298
+ f"while current Python version is {snowml_env.PYTHON_VERSION}. "
299
+ )
300
+
301
+
240
302
  def get_package_spec_with_supported_ops_only(req: requirements.Requirement) -> requirements.Requirement:
241
303
  """Get the package spec with supported ops only including ==, >=, <=, > and <
242
304
 
@@ -491,6 +553,9 @@ def load_conda_env_file(
491
553
  A tuple of Dict of conda dependencies after validated, optional pip requirements if exist
492
554
  and a string 'major.minor.patchlevel' of python version.
493
555
  """
556
+ if not path.exists():
557
+ return collections.defaultdict(list), None, None
558
+
494
559
  with open(path, encoding="utf-8") as f:
495
560
  env = yaml.safe_load(stream=f)
496
561
 
@@ -541,6 +606,9 @@ def load_requirements_file(path: pathlib.Path) -> List[requirements.Requirement]
541
606
  Returns:
542
607
  List of dependencies string after validated.
543
608
  """
609
+ if not path.exists():
610
+ return []
611
+
544
612
  with open(path, encoding="utf-8") as f:
545
613
  reqs = f.readlines()
546
614
 
@@ -568,33 +636,6 @@ def parse_python_version_string(dep: str) -> Optional[str]:
568
636
  return None
569
637
 
570
638
 
571
- def validate_py_runtime_version(provided_py_version_str: str) -> None:
572
- """Validate the provided python version string with python version in current runtime.
573
- If the major or minor is different, errors out.
574
-
575
- Args:
576
- provided_py_version_str: the provided python version string.
577
-
578
- Raises:
579
- SnowflakeMLException: Raised when the provided python version has different major or minor.
580
- """
581
- if provided_py_version_str != snowml_env.PYTHON_VERSION:
582
- provided_py_version = version.parse(provided_py_version_str)
583
- current_py_version = version.parse(snowml_env.PYTHON_VERSION)
584
- if (
585
- provided_py_version.major != current_py_version.major
586
- or provided_py_version.minor != current_py_version.minor
587
- ):
588
- raise snowml_exceptions.SnowflakeMLException(
589
- error_code=error_codes.LOCAL_ENVIRONMENT_ERROR,
590
- original_exception=RuntimeError(
591
- f"Unable to load model which is saved with Python {provided_py_version_str} "
592
- f"while current Python version is {snowml_env.PYTHON_VERSION}. "
593
- "To load model metadata only, set meta_only to True."
594
- ),
595
- )
596
-
597
-
598
639
  def _find_conda_dep_spec(
599
640
  conda_chan_deps: DefaultDict[str, List[requirements.Requirement]], pkg_name: str
600
641
  ) -> Optional[Tuple[str, requirements.Requirement]]:
@@ -0,0 +1,5 @@
1
+ DATASET_ALREADY_EXISTS = "Dataset {} already exists."
2
+ DATASET_VERSION_ALREADY_EXISTS = "Dataset {} version {} already exists."
3
+
4
+ DATASET_NOT_EXIST = "Dataset {} does not exist or is inaccessible."
5
+ DATASET_VERSION_NOT_EXIST = "Dataset {} version '{}' does not exist or is inaccessible."
@@ -0,0 +1,24 @@
1
+ # Error code from Snowflake Python Connector.
2
+ ERRNO_OBJECT_ALREADY_EXISTS = "002002"
3
+ ERRNO_OBJECT_NOT_EXIST = "002043"
4
+ ERRNO_FILES_ALREADY_EXISTING = "001030"
5
+ ERRNO_VERSION_ALREADY_EXISTS = "092917"
6
+ ERRNO_DATASET_NOT_EXIST = "399019"
7
+ ERRNO_DATASET_VERSION_NOT_EXIST = "399012"
8
+ ERRNO_DATASET_VERSION_ALREADY_EXISTS = "399020"
9
+
10
+
11
+ class DatasetError(Exception):
12
+ """Base class for other exceptions."""
13
+
14
+
15
+ class DatasetNotExistError(DatasetError):
16
+ """Raised when the requested Dataset does not exist."""
17
+
18
+
19
+ class DatasetExistError(DatasetError):
20
+ """Raised when there is already an existing Dataset with the same name and version in selected schema."""
21
+
22
+
23
+ class DatasetCannotDeleteError(DatasetError):
24
+ """Raised when a Dataset is unable to get deleted."""
@@ -105,3 +105,6 @@ UNFEASIBLE_ENVIRONMENT_ERROR = "2502"
105
105
 
106
106
  # Missing required client side dependency.
107
107
  CLIENT_DEPENDENCY_MISSING_ERROR = "2511"
108
+
109
+ # Current client side snowpark-ml-python version is outdated and may have forward compatibility issue
110
+ SNOWML_PACKAGE_OUTDATED = "2700"
@@ -0,0 +1,10 @@
1
+ import dataclasses
2
+ from typing import List, Optional
3
+
4
+
5
+ @dataclasses.dataclass(frozen=True)
6
+ class DataSource:
7
+ fully_qualified_name: str
8
+ version: str
9
+ url: str
10
+ exclude_cols: Optional[List[str]] = None
@@ -0,0 +1,95 @@
1
+ import copy
2
+ import functools
3
+ from typing import Any, Callable, List
4
+
5
+ from snowflake import snowpark
6
+ from snowflake.ml._internal.lineage import data_source
7
+
8
+ DATA_SOURCES_ATTR = "_data_sources"
9
+
10
+
11
+ def _get_datasources(*args: Any) -> List[data_source.DataSource]:
12
+ """Helper method for extracting data sources attribute from DataFrames in an argument list"""
13
+ result = []
14
+ for arg in args:
15
+ srcs = getattr(arg, DATA_SOURCES_ATTR, None)
16
+ if isinstance(srcs, list) and all(isinstance(s, data_source.DataSource) for s in srcs):
17
+ result += srcs
18
+ return result
19
+
20
+
21
+ def _wrap_func(
22
+ fn: Callable[..., snowpark.DataFrame], data_sources: List[data_source.DataSource]
23
+ ) -> Callable[..., snowpark.DataFrame]:
24
+ """Wrap a DataFrame transform function to propagate data_sources to derived DataFrames."""
25
+
26
+ @functools.wraps(fn)
27
+ def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
28
+ df = fn(*args, **kwargs)
29
+ patch_dataframe(df, data_sources=data_sources, inplace=True)
30
+ return df
31
+
32
+ return wrapped
33
+
34
+
35
+ def patch_dataframe(
36
+ df: snowpark.DataFrame, data_sources: List[data_source.DataSource], inplace: bool = False
37
+ ) -> snowpark.DataFrame:
38
+ """
39
+ Monkey patch a DataFrame to add attach the provided data_sources as an attribute of the DataFrame.
40
+ Also patches the DataFrame's transformation functions to propagate the new data sources attribute to
41
+ derived DataFrames.
42
+
43
+ Args:
44
+ df: DataFrame to be patched
45
+ data_sources: List of data sources for the DataFrame
46
+ inplace: If True, patches to DataFrame in-place. If False, creates a shallow copy of the DataFrame.
47
+
48
+ Returns:
49
+ Patched DataFrame
50
+ """
51
+ # Instance-level monkey-patches
52
+ funcs = [
53
+ "_with_plan",
54
+ "_lateral",
55
+ "group_by",
56
+ "group_by_grouping_sets",
57
+ "cube",
58
+ "pivot",
59
+ "rollup",
60
+ "cache_result",
61
+ "_to_df", # RelationalGroupedDataFrame
62
+ ]
63
+ if not inplace:
64
+ df = copy.copy(df)
65
+ setattr(df, DATA_SOURCES_ATTR, data_sources)
66
+ for func in funcs:
67
+ fn = getattr(df, func, None)
68
+ if fn is not None:
69
+ setattr(df, func, _wrap_func(fn, data_sources=data_sources))
70
+ return df
71
+
72
+
73
+ def _wrap_class_func(fn: Callable[..., snowpark.DataFrame]) -> Callable[..., snowpark.DataFrame]:
74
+ @functools.wraps(fn)
75
+ def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
76
+ df = fn(*args, **kwargs)
77
+ data_sources = _get_datasources(*args) + _get_datasources(*kwargs.values())
78
+ if data_sources:
79
+ patch_dataframe(df, data_sources, inplace=True)
80
+ return df
81
+
82
+ return wrapped
83
+
84
+
85
+ # Class-level monkey-patches
86
+ for klass, func_list in {
87
+ snowpark.DataFrame: [
88
+ "__copy__",
89
+ ],
90
+ snowpark.RelationalGroupedDataFrame: [],
91
+ }.items():
92
+ assert isinstance(func_list, list) # mypy
93
+ for func in func_list:
94
+ fn = getattr(klass, func)
95
+ setattr(klass, func, _wrap_class_func(fn))
@@ -50,6 +50,7 @@ class TelemetryField(enum.Enum):
50
50
  # types of telemetry
51
51
  TYPE_FUNCTION_USAGE = "function_usage"
52
52
  TYPE_SNOWML_SPCS_USAGE = "snowml_spcs_usage"
53
+ TYPE_SNOWML_PIPELINE_USAGE = "snowml_pipeline_usage"
53
54
  # message keys for telemetry
54
55
  KEY_PROJECT = "project"
55
56
  KEY_SUBPROJECT = "subproject"
@@ -156,7 +156,7 @@ def parse_schema_level_object_identifier(
156
156
  """
157
157
  res = _SF_SCHEMA_LEVEL_OBJECT_RE.fullmatch(path)
158
158
  if not res:
159
- raise ValueError(f"Invalid identifier. It should start with database.schema.stage. Getting {path}")
159
+ raise ValueError(f"Invalid identifier. It should start with database.schema.object. Getting {path}")
160
160
  return (
161
161
  res.group("db"),
162
162
  res.group("schema"),
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Optional, Tuple
2
2
 
3
3
  from snowflake.ml._internal.utils import identifier
4
4
 
@@ -79,3 +79,16 @@ class SqlIdentifier(str):
79
79
 
80
80
  def to_sql_identifiers(list_of_str: List[str], *, case_sensitive: bool = False) -> List[SqlIdentifier]:
81
81
  return [SqlIdentifier(val, case_sensitive=case_sensitive) for val in list_of_str]
82
+
83
+
84
+ def parse_fully_qualified_name(
85
+ name: str,
86
+ ) -> Tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
87
+ db, schema, object, _ = identifier.parse_schema_level_object_identifier(name)
88
+
89
+ assert name is not None, f"Unable parse the input name `{name}` as fully qualified."
90
+ return (
91
+ SqlIdentifier(db) if db else None,
92
+ SqlIdentifier(schema) if schema else None,
93
+ SqlIdentifier(object),
94
+ )
@@ -0,0 +1,11 @@
1
+ from .dataset import Dataset, DatasetVersion
2
+ from .dataset_factory import create_from_dataframe, load_dataset
3
+ from .dataset_reader import DatasetReader
4
+
5
+ __all__ = [
6
+ "Dataset",
7
+ "DatasetVersion",
8
+ "DatasetReader",
9
+ "create_from_dataframe",
10
+ "load_dataset",
11
+ ]