snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. snowflake/ml/_internal/env_utils.py +66 -31
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
  7. snowflake/ml/dataset/__init__.py +10 -0
  8. snowflake/ml/dataset/dataset.py +454 -129
  9. snowflake/ml/dataset/dataset_factory.py +53 -0
  10. snowflake/ml/dataset/dataset_metadata.py +103 -0
  11. snowflake/ml/dataset/dataset_reader.py +202 -0
  12. snowflake/ml/feature_store/feature_store.py +408 -282
  13. snowflake/ml/feature_store/feature_view.py +37 -8
  14. snowflake/ml/fileset/embedded_stage_fs.py +146 -0
  15. snowflake/ml/fileset/sfcfs.py +0 -4
  16. snowflake/ml/fileset/snowfs.py +159 -0
  17. snowflake/ml/fileset/stage_fs.py +1 -4
  18. snowflake/ml/model/__init__.py +2 -2
  19. snowflake/ml/model/_api.py +16 -1
  20. snowflake/ml/model/_client/model/model_impl.py +27 -0
  21. snowflake/ml/model/_client/model/model_version_impl.py +135 -0
  22. snowflake/ml/model/_client/ops/model_ops.py +137 -67
  23. snowflake/ml/model/_client/sql/model.py +16 -14
  24. snowflake/ml/model/_client/sql/model_version.py +109 -1
  25. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  26. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  27. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  28. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  29. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  30. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  31. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
  32. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
  33. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  34. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
  35. snowflake/ml/model/_packager/model_packager.py +0 -3
  36. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  37. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  38. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  39. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  40. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
  41. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
  42. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
  43. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
  44. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
  45. snowflake/ml/modeling/cluster/birch.py +53 -52
  46. snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
  47. snowflake/ml/modeling/cluster/dbscan.py +51 -52
  48. snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
  49. snowflake/ml/modeling/cluster/k_means.py +53 -52
  50. snowflake/ml/modeling/cluster/mean_shift.py +51 -52
  51. snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
  52. snowflake/ml/modeling/cluster/optics.py +51 -52
  53. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
  54. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
  55. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
  56. snowflake/ml/modeling/compose/column_transformer.py +53 -52
  57. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
  58. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
  59. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
  60. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
  61. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
  62. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
  63. snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
  64. snowflake/ml/modeling/covariance/oas.py +51 -52
  65. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
  66. snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
  67. snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
  68. snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
  69. snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
  70. snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
  71. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
  72. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
  73. snowflake/ml/modeling/decomposition/pca.py +53 -52
  74. snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
  75. snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
  76. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
  77. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
  78. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
  79. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
  80. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
  81. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
  82. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
  83. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
  84. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
  85. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
  86. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
  87. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
  88. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
  89. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
  90. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
  91. snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
  92. snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
  93. snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
  94. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
  95. snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
  96. snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
  97. snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
  98. snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
  99. snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
  100. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
  101. snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
  102. snowflake/ml/modeling/framework/base.py +63 -36
  103. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
  104. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
  105. snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
  106. snowflake/ml/modeling/impute/knn_imputer.py +53 -52
  107. snowflake/ml/modeling/impute/missing_indicator.py +53 -52
  108. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
  109. snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
  110. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
  111. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
  112. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
  113. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
  114. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
  115. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
  116. snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
  117. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
  118. snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
  119. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
  120. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
  121. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
  122. snowflake/ml/modeling/linear_model/lars.py +51 -52
  123. snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
  124. snowflake/ml/modeling/linear_model/lasso.py +51 -52
  125. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
  126. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
  127. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
  128. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
  129. snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
  130. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
  131. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
  132. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
  133. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
  134. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
  135. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
  136. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
  137. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
  138. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
  139. snowflake/ml/modeling/linear_model/perceptron.py +51 -52
  140. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
  141. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
  142. snowflake/ml/modeling/linear_model/ridge.py +51 -52
  143. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
  144. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
  145. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
  146. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
  147. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
  148. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
  149. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
  150. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
  151. snowflake/ml/modeling/manifold/isomap.py +53 -52
  152. snowflake/ml/modeling/manifold/mds.py +53 -52
  153. snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
  154. snowflake/ml/modeling/manifold/tsne.py +53 -52
  155. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
  156. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
  157. snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
  158. snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
  159. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
  160. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
  161. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
  162. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
  163. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
  164. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
  165. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
  166. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
  167. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
  168. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
  169. snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
  170. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
  171. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
  172. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
  173. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
  174. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
  175. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
  176. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
  177. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
  178. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
  179. snowflake/ml/modeling/pipeline/pipeline.py +514 -32
  180. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
  181. snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
  182. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
  183. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
  184. snowflake/ml/modeling/svm/linear_svc.py +51 -52
  185. snowflake/ml/modeling/svm/linear_svr.py +51 -52
  186. snowflake/ml/modeling/svm/nu_svc.py +51 -52
  187. snowflake/ml/modeling/svm/nu_svr.py +51 -52
  188. snowflake/ml/modeling/svm/svc.py +51 -52
  189. snowflake/ml/modeling/svm/svr.py +51 -52
  190. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
  191. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
  192. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
  193. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
  194. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
  195. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
  196. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
  197. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
  198. snowflake/ml/registry/model_registry.py +3 -149
  199. snowflake/ml/version.py +1 -1
  200. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +63 -2
  201. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/RECORD +204 -196
  202. snowflake/ml/registry/_artifact_manager.py +0 -156
  203. snowflake/ml/registry/artifact.py +0 -46
  204. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
  205. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
  206. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -13,10 +13,6 @@ from packaging import requirements, specifiers, utils as packaging_utils, versio
13
13
 
14
14
  import snowflake.connector
15
15
  from snowflake.ml._internal import env as snowml_env
16
- from snowflake.ml._internal.exceptions import (
17
- error_codes,
18
- exceptions as snowml_exceptions,
19
- )
20
16
  from snowflake.ml._internal.utils import query_result_checker
21
17
  from snowflake.snowpark import context, exceptions, session
22
18
  from snowflake.snowpark._internal import utils as snowpark_utils
@@ -237,6 +233,72 @@ def get_local_installed_version_of_pip_package(pip_req: requirements.Requirement
237
233
  return new_pip_req
238
234
 
239
235
 
236
+ class IncorrectLocalEnvironmentError(Exception):
237
+ ...
238
+
239
+
240
+ def validate_local_installed_version_of_pip_package(pip_req: requirements.Requirement) -> None:
241
+ """Validate if the package is locally installed, and the local version meet the specifier of the requirements.
242
+
243
+ Args:
244
+ pip_req: A requirements.Requirement object showing the requirement.
245
+
246
+ Raises:
247
+ IncorrectLocalEnvironmentError: Raised when cannot find the local installation of the requested package.
248
+ IncorrectLocalEnvironmentError: Raised when the local installed version cannot meet the requirement.
249
+ """
250
+ try:
251
+ local_dist = importlib_metadata.distribution(pip_req.name)
252
+ local_dist_version = version.parse(local_dist.version)
253
+ except importlib_metadata.PackageNotFoundError:
254
+ raise IncorrectLocalEnvironmentError(f"Cannot find the local installation of the requested package {pip_req}.")
255
+
256
+ if not pip_req.specifier.contains(local_dist_version):
257
+ raise IncorrectLocalEnvironmentError(
258
+ f"The local installed version {local_dist_version} cannot meet the requirement {pip_req}."
259
+ )
260
+
261
+
262
+ CONDA_PKG_NAME_TO_PYPI_MAP = {"pytorch": "torch"}
263
+
264
+
265
+ def try_convert_conda_requirement_to_pip(conda_req: requirements.Requirement) -> requirements.Requirement:
266
+ """Return a new requirements.Requirement object whose name has been attempted to convert to name in pypi from conda.
267
+
268
+ Args:
269
+ conda_req: A requirements.Requirement object showing the requirement in conda.
270
+
271
+ Returns:
272
+ A new requirements.Requirement object showing the requirement in pypi.
273
+ """
274
+ pip_req = copy.deepcopy(conda_req)
275
+ pip_req.name = CONDA_PKG_NAME_TO_PYPI_MAP.get(conda_req.name, conda_req.name)
276
+ return pip_req
277
+
278
+
279
+ def validate_py_runtime_version(provided_py_version_str: str) -> None:
280
+ """Validate the provided python version string with python version in current runtime.
281
+ If the major or minor is different, errors out.
282
+
283
+ Args:
284
+ provided_py_version_str: the provided python version string.
285
+
286
+ Raises:
287
+ IncorrectLocalEnvironmentError: Raised when the provided python version has different major or minor.
288
+ """
289
+ if provided_py_version_str != snowml_env.PYTHON_VERSION:
290
+ provided_py_version = version.parse(provided_py_version_str)
291
+ current_py_version = version.parse(snowml_env.PYTHON_VERSION)
292
+ if (
293
+ provided_py_version.major != current_py_version.major
294
+ or provided_py_version.minor != current_py_version.minor
295
+ ):
296
+ raise IncorrectLocalEnvironmentError(
297
+ f"Requested python version is {provided_py_version_str} "
298
+ f"while current Python version is {snowml_env.PYTHON_VERSION}. "
299
+ )
300
+
301
+
240
302
  def get_package_spec_with_supported_ops_only(req: requirements.Requirement) -> requirements.Requirement:
241
303
  """Get the package spec with supported ops only including ==, >=, <=, > and <
242
304
 
@@ -568,33 +630,6 @@ def parse_python_version_string(dep: str) -> Optional[str]:
568
630
  return None
569
631
 
570
632
 
571
- def validate_py_runtime_version(provided_py_version_str: str) -> None:
572
- """Validate the provided python version string with python version in current runtime.
573
- If the major or minor is different, errors out.
574
-
575
- Args:
576
- provided_py_version_str: the provided python version string.
577
-
578
- Raises:
579
- SnowflakeMLException: Raised when the provided python version has different major or minor.
580
- """
581
- if provided_py_version_str != snowml_env.PYTHON_VERSION:
582
- provided_py_version = version.parse(provided_py_version_str)
583
- current_py_version = version.parse(snowml_env.PYTHON_VERSION)
584
- if (
585
- provided_py_version.major != current_py_version.major
586
- or provided_py_version.minor != current_py_version.minor
587
- ):
588
- raise snowml_exceptions.SnowflakeMLException(
589
- error_code=error_codes.LOCAL_ENVIRONMENT_ERROR,
590
- original_exception=RuntimeError(
591
- f"Unable to load model which is saved with Python {provided_py_version_str} "
592
- f"while current Python version is {snowml_env.PYTHON_VERSION}. "
593
- "To load model metadata only, set meta_only to True."
594
- ),
595
- )
596
-
597
-
598
633
  def _find_conda_dep_spec(
599
634
  conda_chan_deps: DefaultDict[str, List[requirements.Requirement]], pkg_name: str
600
635
  ) -> Optional[Tuple[str, requirements.Requirement]]:
@@ -0,0 +1,5 @@
1
+ DATASET_ALREADY_EXISTS = "Dataset {} already exists."
2
+ DATASET_VERSION_ALREADY_EXISTS = "Dataset {} version {} already exists."
3
+
4
+ DATASET_NOT_EXIST = "Dataset {} does not exist or is inaccessible."
5
+ DATASET_VERSION_NOT_EXIST = "Dataset {} version '{}' does not exist or is inaccessible."
@@ -0,0 +1,24 @@
1
+ # Error code from Snowflake Python Connector.
2
+ ERRNO_OBJECT_ALREADY_EXISTS = "002002"
3
+ ERRNO_OBJECT_NOT_EXIST = "002043"
4
+ ERRNO_FILES_ALREADY_EXISTING = "001030"
5
+ ERRNO_VERSION_ALREADY_EXISTS = "092917"
6
+ ERRNO_DATASET_NOT_EXIST = "399019"
7
+ ERRNO_DATASET_VERSION_NOT_EXIST = "399012"
8
+ ERRNO_DATASET_VERSION_ALREADY_EXISTS = "399020"
9
+
10
+
11
+ class DatasetError(Exception):
12
+ """Base class for other exceptions."""
13
+
14
+
15
+ class DatasetNotExistError(DatasetError):
16
+ """Raised when the requested Dataset does not exist."""
17
+
18
+
19
+ class DatasetExistError(DatasetError):
20
+ """Raised when there is already an existing Dataset with the same name and version in selected schema."""
21
+
22
+
23
+ class DatasetCannotDeleteError(DatasetError):
24
+ """Raised when a Dataset is unable to get deleted."""
@@ -105,3 +105,6 @@ UNFEASIBLE_ENVIRONMENT_ERROR = "2502"
105
105
 
106
106
  # Missing required client side dependency.
107
107
  CLIENT_DEPENDENCY_MISSING_ERROR = "2511"
108
+
109
+ # Current client side snowpark-ml-python version is outdated and may have forward compatibility issue
110
+ SNOWML_PACKAGE_OUTDATED = "2700"
@@ -0,0 +1,10 @@
1
+ import dataclasses
2
+ from typing import List, Optional
3
+
4
+
5
+ @dataclasses.dataclass(frozen=True)
6
+ class DataSource:
7
+ fully_qualified_name: str
8
+ version: str
9
+ url: str
10
+ exclude_cols: Optional[List[str]] = None
@@ -0,0 +1,44 @@
1
+ import copy
2
+ from typing import List
3
+
4
+ from snowflake import snowpark
5
+ from snowflake.ml._internal.lineage import data_source
6
+
7
+
8
+ class DatasetDataFrame(snowpark.DataFrame):
9
+ """
10
+ Represents a lazily-evaluated dataset. It extends :class:`snowpark.DataFrame` so all
11
+ :class:`snowpark.DataFrame` operations can be applied to it. It holds additional information
12
+ related to the :class`Dataset`.
13
+
14
+ It will be created by dataset.read.to_snowpark_dataframe() API and by the transformations
15
+ that produce a new dataframe.
16
+ """
17
+
18
+ @staticmethod
19
+ def from_dataframe(
20
+ df: snowpark.DataFrame, data_sources: List[data_source.DataSource], inplace: bool = False
21
+ ) -> "DatasetDataFrame":
22
+ """
23
+ Create a new DatasetDataFrame instance from a snowpark.DataFrame instance with
24
+ additional source information.
25
+
26
+ Args:
27
+ df (snowpark.DataFrame): The Snowpark DataFrame to be converted.
28
+ data_sources (List[DataSource]): A list of data sources to associate with the DataFrame.
29
+ inplace (bool): If True, modifies the DataFrame in place; otherwise, returns a new DatasetDataFrame.
30
+
31
+ Returns:
32
+ DatasetDataFrame: A new or modified DatasetDataFrame depending on the 'inplace' argument.
33
+ """
34
+ if not inplace:
35
+ df = copy.deepcopy(df)
36
+ df.__class__ = DatasetDataFrame
37
+ df._data_sources = data_sources # type:ignore[attr-defined]
38
+ return df # type: ignore[return-value]
39
+
40
+ def _get_sources(self) -> List[data_source.DataSource]:
41
+ """
42
+ Returns the data sources associated with the DataFrame.
43
+ """
44
+ return self._data_sources # type: ignore[no-any-return]
@@ -0,0 +1,10 @@
1
+ from .dataset import Dataset
2
+ from .dataset_factory import create_from_dataframe, load_dataset
3
+ from .dataset_reader import DatasetReader
4
+
5
+ __all__ = [
6
+ "Dataset",
7
+ "DatasetReader",
8
+ "create_from_dataframe",
9
+ "load_dataset",
10
+ ]