snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. snowflake/ml/_internal/env_utils.py +72 -31
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  7. snowflake/ml/_internal/telemetry.py +1 -0
  8. snowflake/ml/_internal/utils/identifier.py +1 -1
  9. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  10. snowflake/ml/dataset/__init__.py +11 -0
  11. snowflake/ml/dataset/dataset.py +455 -129
  12. snowflake/ml/dataset/dataset_factory.py +53 -0
  13. snowflake/ml/dataset/dataset_metadata.py +103 -0
  14. snowflake/ml/dataset/dataset_reader.py +199 -0
  15. snowflake/ml/feature_store/__init__.py +6 -0
  16. snowflake/ml/feature_store/access_manager.py +279 -0
  17. snowflake/ml/feature_store/feature_store.py +544 -358
  18. snowflake/ml/feature_store/feature_view.py +55 -16
  19. snowflake/ml/fileset/embedded_stage_fs.py +149 -0
  20. snowflake/ml/fileset/sfcfs.py +0 -4
  21. snowflake/ml/fileset/snowfs.py +160 -0
  22. snowflake/ml/fileset/stage_fs.py +25 -10
  23. snowflake/ml/model/__init__.py +2 -2
  24. snowflake/ml/model/_api.py +16 -1
  25. snowflake/ml/model/_client/model/model_impl.py +65 -31
  26. snowflake/ml/model/_client/model/model_version_impl.py +159 -2
  27. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  28. snowflake/ml/model/_client/ops/model_ops.py +268 -83
  29. snowflake/ml/model/_client/sql/_base.py +34 -0
  30. snowflake/ml/model/_client/sql/model.py +42 -47
  31. snowflake/ml/model/_client/sql/model_version.py +164 -39
  32. snowflake/ml/model/_client/sql/stage.py +6 -32
  33. snowflake/ml/model/_client/sql/tag.py +32 -56
  34. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  35. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  36. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  37. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  38. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  39. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  40. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
  41. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
  42. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  43. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  44. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
  45. snowflake/ml/model/_packager/model_packager.py +0 -3
  46. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  47. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  48. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  49. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  50. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +50 -21
  51. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
  52. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +340 -17
  53. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
  54. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
  55. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
  56. snowflake/ml/modeling/cluster/birch.py +53 -52
  57. snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
  58. snowflake/ml/modeling/cluster/dbscan.py +51 -52
  59. snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
  60. snowflake/ml/modeling/cluster/k_means.py +53 -52
  61. snowflake/ml/modeling/cluster/mean_shift.py +51 -52
  62. snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
  63. snowflake/ml/modeling/cluster/optics.py +51 -52
  64. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
  65. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
  66. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
  67. snowflake/ml/modeling/compose/column_transformer.py +53 -52
  68. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
  69. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
  70. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
  71. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
  72. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
  73. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
  74. snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
  75. snowflake/ml/modeling/covariance/oas.py +51 -52
  76. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
  77. snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
  78. snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
  79. snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
  80. snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
  81. snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
  82. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
  83. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
  84. snowflake/ml/modeling/decomposition/pca.py +53 -52
  85. snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
  86. snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
  87. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
  88. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
  89. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
  90. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
  91. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
  92. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
  93. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
  94. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
  95. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
  96. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
  97. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
  98. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
  99. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
  100. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
  101. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
  102. snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
  103. snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
  104. snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
  105. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
  106. snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
  107. snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
  108. snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
  109. snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
  110. snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
  111. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
  112. snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
  113. snowflake/ml/modeling/framework/base.py +64 -36
  114. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
  115. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
  116. snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
  117. snowflake/ml/modeling/impute/knn_imputer.py +53 -52
  118. snowflake/ml/modeling/impute/missing_indicator.py +53 -52
  119. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
  120. snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
  121. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
  122. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
  123. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
  124. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
  125. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
  126. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
  127. snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
  128. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
  129. snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
  130. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
  131. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
  132. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
  133. snowflake/ml/modeling/linear_model/lars.py +51 -52
  134. snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
  135. snowflake/ml/modeling/linear_model/lasso.py +51 -52
  136. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
  137. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
  138. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
  139. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
  140. snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
  141. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
  142. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
  143. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
  144. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
  145. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
  146. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
  147. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
  148. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
  149. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
  150. snowflake/ml/modeling/linear_model/perceptron.py +51 -52
  151. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
  152. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
  153. snowflake/ml/modeling/linear_model/ridge.py +51 -52
  154. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
  155. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
  156. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
  157. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
  158. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
  159. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
  160. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
  161. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
  162. snowflake/ml/modeling/manifold/isomap.py +53 -52
  163. snowflake/ml/modeling/manifold/mds.py +53 -52
  164. snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
  165. snowflake/ml/modeling/manifold/tsne.py +53 -52
  166. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
  167. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
  168. snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
  169. snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
  170. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
  171. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
  172. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
  173. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
  174. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
  175. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
  176. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
  177. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
  178. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
  179. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
  180. snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
  181. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
  182. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
  183. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
  184. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
  185. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
  186. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
  187. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
  188. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
  189. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
  190. snowflake/ml/modeling/pipeline/pipeline.py +538 -36
  191. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
  192. snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
  193. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
  194. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
  195. snowflake/ml/modeling/svm/linear_svc.py +51 -52
  196. snowflake/ml/modeling/svm/linear_svr.py +51 -52
  197. snowflake/ml/modeling/svm/nu_svc.py +51 -52
  198. snowflake/ml/modeling/svm/nu_svr.py +51 -52
  199. snowflake/ml/modeling/svm/svc.py +51 -52
  200. snowflake/ml/modeling/svm/svr.py +51 -52
  201. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
  202. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
  203. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
  204. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
  205. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
  206. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
  207. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
  208. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
  209. snowflake/ml/registry/_manager/model_manager.py +36 -7
  210. snowflake/ml/registry/model_registry.py +3 -149
  211. snowflake/ml/version.py +1 -1
  212. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/METADATA +112 -7
  213. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/RECORD +216 -206
  214. snowflake/ml/registry/_artifact_manager.py +0 -156
  215. snowflake/ml/registry/artifact.py +0 -46
  216. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/LICENSE.txt +0 -0
  217. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/WHEEL +0 -0
  218. {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,12 @@
1
1
  #!/usr/bin/env python3
2
+ import inspect
3
+ import os
4
+ import posixpath
5
+ import tempfile
2
6
  from itertools import chain
3
7
  from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
4
8
 
9
+ import cloudpickle as cp
5
10
  import numpy as np
6
11
  import pandas as pd
7
12
  from sklearn import __version__ as skversion, pipeline
@@ -10,14 +15,20 @@ from sklearn.preprocessing import FunctionTransformer
10
15
  from sklearn.utils import metaestimators
11
16
 
12
17
  from snowflake import snowpark
13
- from snowflake.ml._internal import telemetry
18
+ from snowflake.ml._internal import file_utils, telemetry
14
19
  from snowflake.ml._internal.exceptions import error_codes, exceptions
15
- from snowflake.ml._internal.utils import snowpark_dataframe_utils
20
+ from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
16
21
  from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
22
+ from snowflake.ml.modeling._internal.model_transformer_builder import (
23
+ ModelTransformerBuilder,
24
+ )
17
25
  from snowflake.ml.modeling.framework import _utils, base
26
+ from snowflake.snowpark import Session, functions as F
27
+ from snowflake.snowpark._internal import utils as snowpark_utils
18
28
 
19
29
  _PROJECT = "ModelDevelopment"
20
30
  _SUBPROJECT = "Framework"
31
+ IN_ML_RUNTIME_ENV_VAR = "IN_SPCS_ML_RUNTIME"
21
32
 
22
33
 
23
34
  def _final_step_has(attr: str) -> Callable[..., bool]:
@@ -104,7 +115,7 @@ class Pipeline(base.BaseTransformer):
104
115
  self._feature_names_in: List[np.ndarray[Any, np.dtype[Any]]] = []
105
116
  self._n_features_in: List[int] = []
106
117
  self._transformers_to_input_indices: Dict[str, List[int]] = {}
107
- self._is_convertible_to_sklearn = True
118
+ self._modifies_label_or_sample_weight = True
108
119
 
109
120
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
110
121
 
@@ -113,6 +124,11 @@ class Pipeline(base.BaseTransformer):
113
124
  if isinstance(obj, base.BaseTransformer):
114
125
  deps = deps | set(obj._get_dependencies())
115
126
  self._deps = list(deps)
127
+ self._sklearn_object = None
128
+ self.label_cols = self._get_label_cols()
129
+ self._is_convertible_to_sklearn = self._is_convertible_to_sklearn_object()
130
+
131
+ self._send_pipeline_configuration_telemetry()
116
132
 
117
133
  @staticmethod
118
134
  def _is_estimator(obj: object) -> bool:
@@ -147,6 +163,33 @@ class Pipeline(base.BaseTransformer):
147
163
  self._n_features_in = []
148
164
  self._transformers_to_input_indices = {}
149
165
 
166
+ def _is_convertible_to_sklearn_object(self) -> bool:
167
+ """Checks if the pipeline can be converted to a native sklearn pipeline.
168
+ - We can not create an sklearn pipeline if its label or sample weight column are
169
+ modified in the pipeline.
170
+ - We can not create an sklearn pipeline if any of its steps cannot be converted to an sklearn pipeline
171
+ - We can not create an sklearn pipeline if input columns are specified in any step other than
172
+ the first step
173
+
174
+ Returns:
175
+ True if the pipeline can be converted to a native sklearn pipeline, else false.
176
+ """
177
+ if self._is_pipeline_modifying_label_or_sample_weight():
178
+ return False
179
+
180
+ # check that nested pipelines can be converted to sklearn
181
+ for _, base_estimator in self.steps:
182
+ if hasattr(base_estimator, "_is_convertible_to_sklearn_object"):
183
+ if not base_estimator._is_convertible_to_sklearn_object():
184
+ return False
185
+
186
+ # check that no column after the first column has 'input columns' set.
187
+ for _, base_estimator in self.steps[1:]:
188
+ if base_estimator.get_input_cols():
189
+ # We only want Falsy values - None and []
190
+ return False
191
+ return True
192
+
150
193
  def _is_pipeline_modifying_label_or_sample_weight(self) -> bool:
151
194
  """
152
195
  Checks if pipeline is modifying label or sample_weight columns.
@@ -188,7 +231,7 @@ class Pipeline(base.BaseTransformer):
188
231
  return [c for c in columns if c not in target_cols]
189
232
 
190
233
  def _append_step_feature_consumption_info(self, step_name: str, all_cols: List[str], input_cols: List[str]) -> None:
191
- if self._is_convertible_to_sklearn:
234
+ if self._modifies_label_or_sample_weight:
192
235
  all_cols = self._get_sanitized_list_of_columns(all_cols)
193
236
  self._feature_names_in.append(np.asarray(all_cols, dtype=object))
194
237
  self._n_features_in.append(len(all_cols))
@@ -208,33 +251,173 @@ class Pipeline(base.BaseTransformer):
208
251
  self, dataset: Union[snowpark.DataFrame, pd.DataFrame]
209
252
  ) -> Union[snowpark.DataFrame, pd.DataFrame]:
210
253
  self._reset()
211
- self._is_convertible_to_sklearn = not self._is_pipeline_modifying_label_or_sample_weight()
254
+ self._modifies_label_or_sample_weight = not self._is_pipeline_modifying_label_or_sample_weight()
212
255
  transformed_dataset = dataset
213
256
  for name, trans in self._get_transformers():
214
257
  self._append_step_feature_consumption_info(
215
258
  step_name=name, all_cols=transformed_dataset.columns[:], input_cols=trans.get_input_cols()
216
259
  )
217
- if has_callable_attr(trans, "fit_transform"):
218
- transformed_dataset = trans.fit_transform(transformed_dataset)
219
- else:
220
- trans.fit(transformed_dataset)
221
- transformed_dataset = trans.transform(transformed_dataset)
260
+ trans.fit(transformed_dataset)
261
+ transformed_dataset = trans.transform(transformed_dataset)
222
262
 
223
263
  return transformed_dataset
224
264
 
265
+ def _upload_model_to_stage(self, stage_name: str, estimator: object, session: Session) -> Tuple[str, str]:
266
+ """
267
+ Util method to pickle and upload the model to a temp Snowflake stage.
268
+
269
+ Args:
270
+ stage_name: Stage name to save model.
271
+ estimator: the pipeline estimator itself
272
+ session: Session object
273
+
274
+ Returns:
275
+ a tuple containing stage file paths for pickled input model for training and location to store trained
276
+ models(response from training sproc).
277
+ """
278
+ # Create a temp file and dump the transform to that file.
279
+ local_transform_file_name = temp_file_utils.get_temp_file_path()
280
+ with open(local_transform_file_name, mode="w+b") as local_transform_file:
281
+ cp.dump(estimator, local_transform_file)
282
+
283
+ # Use posixpath to construct stage paths
284
+ stage_transform_file_name = posixpath.join(stage_name, os.path.basename(local_transform_file_name))
285
+ stage_result_file_name = posixpath.join(stage_name, os.path.basename(local_transform_file_name))
286
+
287
+ # Put locally serialized transform on stage.
288
+ session.file.put(
289
+ local_transform_file_name,
290
+ stage_transform_file_name,
291
+ auto_compress=False,
292
+ overwrite=True,
293
+ )
294
+
295
+ temp_file_utils.cleanup_temp_files([local_transform_file_name])
296
+ return (stage_transform_file_name, stage_result_file_name)
297
+
298
+ def _fit_snowpark_dataframe_within_one_sproc(self, session: Session, dataset: snowpark.DataFrame) -> None:
299
+ # Extract queries that generated the dataframe. We will need to pass it to score procedure.
300
+ sql_queries = dataset.queries["queries"]
301
+
302
+ # Zip the current snowml package
303
+ with tempfile.TemporaryDirectory() as tmpdir:
304
+ snowml_zip_module_filename = os.path.join(tmpdir, "snowflake-ml-python.zip")
305
+ file_utils.zip_python_package(snowml_zip_module_filename, "snowflake.ml")
306
+ imports = [snowml_zip_module_filename]
307
+
308
+ sproc_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.PROCEDURE)
309
+ required_deps = self._deps
310
+ sproc_statement_params = telemetry.get_function_usage_statement_params(
311
+ project=_PROJECT,
312
+ subproject="PIPELINE",
313
+ function_name=telemetry.get_statement_params_full_func_name(
314
+ inspect.currentframe(), self.__class__.__name__
315
+ ),
316
+ api_calls=[F.sproc],
317
+ )
318
+ transform_stage_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.STAGE)
319
+ stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
320
+ session.sql(stage_creation_query).collect()
321
+ (stage_estimator_file_name, stage_result_file_name) = self._upload_model_to_stage(
322
+ transform_stage_name, self, session
323
+ )
324
+
325
+ def pipeline_within_one_sproc(
326
+ session: Session,
327
+ sql_queries: List[str],
328
+ stage_estimator_file_name: str,
329
+ stage_result_file_name: str,
330
+ sproc_statement_params: Dict[str, str],
331
+ ) -> str:
332
+ import os
333
+
334
+ import cloudpickle as cp
335
+ import pandas as pd
336
+
337
+ for query in sql_queries[:-1]:
338
+ _ = session.sql(query).collect(statement_params=sproc_statement_params)
339
+ sp_df = session.sql(sql_queries[-1])
340
+ df: pd.DataFrame = sp_df.to_pandas(statement_params=sproc_statement_params)
341
+ df.columns = sp_df.columns
342
+
343
+ local_estimator_file_name = temp_file_utils.get_temp_file_path()
344
+
345
+ session.file.get(stage_estimator_file_name, local_estimator_file_name)
346
+
347
+ local_estimator_file_path = os.path.join(
348
+ local_estimator_file_name, os.listdir(local_estimator_file_name)[0]
349
+ )
350
+ with open(local_estimator_file_path, mode="r+b") as local_estimator_file_obj:
351
+ estimator = cp.load(local_estimator_file_obj)
352
+
353
+ estimator.fit(df)
354
+
355
+ local_result_file_name = temp_file_utils.get_temp_file_path()
356
+
357
+ with open(local_result_file_name, mode="w+b") as local_result_file_obj:
358
+ cp.dump(estimator, local_result_file_obj)
359
+
360
+ session.file.put(
361
+ local_result_file_name,
362
+ stage_result_file_name,
363
+ auto_compress=False,
364
+ overwrite=True,
365
+ statement_params=sproc_statement_params,
366
+ )
367
+
368
+ return str(os.path.basename(local_result_file_name))
369
+
370
+ session.sproc.register(
371
+ func=pipeline_within_one_sproc,
372
+ is_permanent=False,
373
+ name=sproc_name,
374
+ packages=required_deps, # type: ignore[arg-type]
375
+ replace=True,
376
+ session=session,
377
+ anonymous=True,
378
+ imports=imports, # type: ignore[arg-type]
379
+ statement_params=sproc_statement_params,
380
+ )
381
+
382
+ sproc_export_file_name: str = pipeline_within_one_sproc(
383
+ session,
384
+ sql_queries,
385
+ stage_estimator_file_name,
386
+ stage_result_file_name,
387
+ sproc_statement_params,
388
+ )
389
+
390
+ local_result_file_name = temp_file_utils.get_temp_file_path()
391
+ session.file.get(
392
+ posixpath.join(stage_estimator_file_name, sproc_export_file_name),
393
+ local_result_file_name,
394
+ statement_params=sproc_statement_params,
395
+ )
396
+
397
+ with open(os.path.join(local_result_file_name, sproc_export_file_name), mode="r+b") as result_file_obj:
398
+ fit_estimator = cp.load(result_file_obj)
399
+
400
+ temp_file_utils.cleanup_temp_files([local_result_file_name])
401
+ for key, val in vars(fit_estimator).items():
402
+ setattr(self, key, val)
403
+
225
404
  @telemetry.send_api_usage_telemetry(
226
405
  project=_PROJECT,
227
406
  subproject=_SUBPROJECT,
228
407
  )
229
- def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "Pipeline":
408
+ def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame], squash: Optional[bool] = False) -> "Pipeline":
230
409
  """
231
410
  Fit the entire pipeline using the dataset.
232
411
 
233
412
  Args:
234
413
  dataset: Input dataset.
414
+ squash: Run the whole pipeline within a stored procedure
235
415
 
236
416
  Returns:
237
417
  Fitted pipeline.
418
+
419
+ Raises:
420
+ ValueError: A pipeline incompatible with sklearn is used on MLRS
238
421
  """
239
422
 
240
423
  self._validate_steps()
@@ -243,19 +426,33 @@ class Pipeline(base.BaseTransformer):
243
426
  if isinstance(dataset, snowpark.DataFrame)
244
427
  else dataset
245
428
  )
246
- transformed_dataset = self._fit_transform_dataset(dataset)
247
429
 
248
- estimator = self._get_estimator()
249
- if estimator:
250
- all_cols = transformed_dataset.columns[:]
251
- estimator[1].fit(transformed_dataset)
430
+ if self._can_be_trained_in_ml_runtime(dataset):
431
+ if not self._is_convertible_to_sklearn:
432
+ raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
433
+ self._fit_ml_runtime(dataset)
252
434
 
253
- self._append_step_feature_consumption_info(
254
- step_name=estimator[0], all_cols=all_cols, input_cols=estimator[1].get_input_cols()
255
- )
435
+ elif squash and isinstance(dataset, snowpark.DataFrame):
436
+ session = dataset._session
437
+ assert session is not None
438
+ self._fit_snowpark_dataframe_within_one_sproc(session=session, dataset=dataset)
439
+
440
+ else:
441
+ transformed_dataset = self._fit_transform_dataset(dataset)
442
+
443
+ estimator = self._get_estimator()
444
+ if estimator:
445
+ all_cols = transformed_dataset.columns[:]
446
+ estimator[1].fit(transformed_dataset)
447
+
448
+ self._append_step_feature_consumption_info(
449
+ step_name=estimator[0], all_cols=all_cols, input_cols=estimator[1].get_input_cols()
450
+ )
451
+
452
+ self._generate_model_signatures(dataset=dataset)
256
453
 
257
- self._generate_model_signatures(dataset=dataset)
258
454
  self._is_fitted = True
455
+
259
456
  return self
260
457
 
261
458
  @metaestimators.available_if(_final_step_has("transform")) # type: ignore[misc]
@@ -280,6 +477,22 @@ class Pipeline(base.BaseTransformer):
280
477
  else dataset
281
478
  )
282
479
 
480
+ if self._sklearn_object is not None:
481
+ handler = ModelTransformerBuilder.build(
482
+ dataset=dataset,
483
+ estimator=self._sklearn_object,
484
+ class_name="Pipeline",
485
+ subproject="",
486
+ autogenerated=False,
487
+ )
488
+ return handler.batch_inference(
489
+ inference_method="transform",
490
+ input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
491
+ expected_output_cols=self._infer_output_cols(),
492
+ session=dataset._session,
493
+ dependencies=self._deps,
494
+ )
495
+
283
496
  transformed_dataset = self._transform_dataset(dataset=dataset)
284
497
  estimator = self._get_estimator()
285
498
  if estimator:
@@ -389,8 +602,32 @@ class Pipeline(base.BaseTransformer):
389
602
 
390
603
  Returns:
391
604
  Output dataset.
605
+
606
+ Raises:
607
+ ValueError: An sklearn object has not been fit and stored before calling this function.
392
608
  """
393
- return self._invoke_estimator_func("predict", dataset)
609
+ if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
610
+ if self._sklearn_object is None:
611
+ raise ValueError("Model must be fit before inference.")
612
+
613
+ expected_output_cols = self._infer_output_cols()
614
+ handler = ModelTransformerBuilder.build(
615
+ dataset=dataset,
616
+ estimator=self._sklearn_object,
617
+ class_name="Pipeline",
618
+ subproject="",
619
+ autogenerated=False,
620
+ )
621
+ return handler.batch_inference(
622
+ inference_method="predict",
623
+ input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
624
+ expected_output_cols=expected_output_cols,
625
+ session=dataset._session,
626
+ dependencies=self._deps,
627
+ )
628
+
629
+ else:
630
+ return self._invoke_estimator_func("predict", dataset)
394
631
 
395
632
  @metaestimators.available_if(_final_step_has("score_samples")) # type: ignore[misc]
396
633
  @telemetry.send_api_usage_telemetry(
@@ -408,8 +645,32 @@ class Pipeline(base.BaseTransformer):
408
645
 
409
646
  Returns:
410
647
  Output dataset.
648
+
649
+ Raises:
650
+ ValueError: An sklearn object has not been fit before calling this function
411
651
  """
412
- return self._invoke_estimator_func("score_samples", dataset)
652
+
653
+ if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
654
+ if self._sklearn_object is None:
655
+ raise ValueError("Model must be fit before inference.")
656
+
657
+ expected_output_cols = self._get_output_column_names("score_samples")
658
+ handler = ModelTransformerBuilder.build(
659
+ dataset=dataset,
660
+ estimator=self._sklearn_object,
661
+ class_name="Pipeline",
662
+ subproject="",
663
+ autogenerated=False,
664
+ )
665
+ return handler.batch_inference(
666
+ inference_method="score_samples",
667
+ input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
668
+ expected_output_cols=expected_output_cols,
669
+ session=dataset._session,
670
+ dependencies=self._deps,
671
+ )
672
+ else:
673
+ return self._invoke_estimator_func("score_samples", dataset)
413
674
 
414
675
  @metaestimators.available_if(_final_step_has("predict_proba")) # type: ignore[misc]
415
676
  @telemetry.send_api_usage_telemetry(
@@ -427,8 +688,32 @@ class Pipeline(base.BaseTransformer):
427
688
 
428
689
  Returns:
429
690
  Output dataset.
691
+
692
+ Raises:
693
+ ValueError: An sklearn object has not been fit before calling this function
430
694
  """
431
- return self._invoke_estimator_func("predict_proba", dataset)
695
+
696
+ if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
697
+ if self._sklearn_object is None:
698
+ raise ValueError("Model must be fit before inference.")
699
+ expected_output_cols = self._get_output_column_names("predict_proba")
700
+
701
+ handler = ModelTransformerBuilder.build(
702
+ dataset=dataset,
703
+ estimator=self._sklearn_object,
704
+ class_name="Pipeline",
705
+ subproject="",
706
+ autogenerated=False,
707
+ )
708
+ return handler.batch_inference(
709
+ inference_method="predict_proba",
710
+ input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
711
+ expected_output_cols=expected_output_cols,
712
+ session=dataset._session,
713
+ dependencies=self._deps,
714
+ )
715
+ else:
716
+ return self._invoke_estimator_func("predict_proba", dataset)
432
717
 
433
718
  @metaestimators.available_if(_final_step_has("predict_log_proba")) # type: ignore[misc]
434
719
  @telemetry.send_api_usage_telemetry(
@@ -447,8 +732,31 @@ class Pipeline(base.BaseTransformer):
447
732
 
448
733
  Returns:
449
734
  Output dataset.
735
+
736
+ Raises:
737
+ ValueError: An sklearn object has not been fit before calling this function
450
738
  """
451
- return self._invoke_estimator_func("predict_log_proba", dataset)
739
+ if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
740
+ if self._sklearn_object is None:
741
+ raise ValueError("Model must be fit before inference.")
742
+
743
+ expected_output_cols = self._get_output_column_names("predict_log_proba")
744
+ handler = ModelTransformerBuilder.build(
745
+ dataset=dataset,
746
+ estimator=self._sklearn_object,
747
+ class_name="Pipeline",
748
+ subproject="",
749
+ autogenerated=False,
750
+ )
751
+ return handler.batch_inference(
752
+ inference_method="predict_log_proba",
753
+ input_cols=self.input_cols if self.input_cols else self._infer_input_cols(dataset),
754
+ expected_output_cols=expected_output_cols,
755
+ session=dataset._session,
756
+ dependencies=self._deps,
757
+ )
758
+ else:
759
+ return self._invoke_estimator_func("predict_log_proba", dataset)
452
760
 
453
761
  @metaestimators.available_if(_final_step_has("score")) # type: ignore[misc]
454
762
  @telemetry.send_api_usage_telemetry(
@@ -464,8 +772,30 @@ class Pipeline(base.BaseTransformer):
464
772
 
465
773
  Returns:
466
774
  Output dataset.
775
+
776
+ Raises:
777
+ ValueError: An sklearn object has not been fit before calling this function
467
778
  """
468
- return self._invoke_estimator_func("score", dataset)
779
+
780
+ if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
781
+ if self._sklearn_object is None:
782
+ raise ValueError("Model must be fit before scoreing.")
783
+ handler = ModelTransformerBuilder.build(
784
+ dataset=dataset,
785
+ estimator=self._sklearn_object,
786
+ class_name="Pipeline",
787
+ subproject="",
788
+ autogenerated=False,
789
+ )
790
+ return handler.score(
791
+ input_cols=self._infer_input_cols(),
792
+ label_cols=self._get_label_cols(),
793
+ session=dataset._session,
794
+ dependencies=self._deps,
795
+ score_sproc_imports=[],
796
+ )
797
+ else:
798
+ return self._invoke_estimator_func("score", dataset)
469
799
 
470
800
  def _invoke_estimator_func(
471
801
  self, func_name: str, dataset: Union[snowpark.DataFrame, pd.DataFrame]
@@ -495,15 +825,6 @@ class Pipeline(base.BaseTransformer):
495
825
  res: snowpark.DataFrame = getattr(estimator[1], func_name)(transformed_dataset)
496
826
  return res
497
827
 
498
- def _create_unfitted_sklearn_object(self) -> pipeline.Pipeline:
499
- sksteps = []
500
- for step in self.steps:
501
- if isinstance(step[1], base.BaseTransformer):
502
- sksteps.append(tuple([step[0], _utils.to_native_format(step[1])]))
503
- else:
504
- sksteps.append(tuple([step[0], step[1]]))
505
- return pipeline.Pipeline(steps=sksteps)
506
-
507
828
  def _construct_fitted_column_transformer_object(
508
829
  self,
509
830
  step_name_in_pipeline: str,
@@ -562,15 +883,134 @@ class Pipeline(base.BaseTransformer):
562
883
  ct._name_to_fitted_passthrough = {step_name_in_ct: ft}
563
884
  return ct
564
885
 
886
+ def _fit_ml_runtime(self, dataset: snowpark.DataFrame) -> None:
887
+ """Train the pipeline in the ML Runtime.
888
+
889
+ Args:
890
+ dataset: The training Snowpark dataframe
891
+
892
+ Raises:
893
+ ModuleNotFoundError: The ML Runtime Client is not installed.
894
+ """
895
+ try:
896
+ from snowflake.ml.runtime import MLRuntimeClient
897
+ except ModuleNotFoundError as e:
898
+ # The snowflake.ml.runtime module should always be present when
899
+ # the env var IN_SPCS_ML_RUNTIME is present.
900
+ raise ModuleNotFoundError("ML Runtime Python Client is not installed.") from e
901
+
902
+ client = MLRuntimeClient()
903
+ ml_runtime_compatible_pipeline = self._create_unfitted_sklearn_object()
904
+
905
+ label_cols = self._get_label_cols()
906
+ all_df_cols = dataset.columns
907
+ input_cols = [col for col in all_df_cols if col not in label_cols]
908
+
909
+ trained_pipeline = client.train(
910
+ estimator=ml_runtime_compatible_pipeline,
911
+ dataset=dataset,
912
+ input_cols=input_cols,
913
+ label_cols=label_cols,
914
+ sample_weight_col=self.sample_weight_col,
915
+ )
916
+
917
+ self._sklearn_object = trained_pipeline
918
+
919
+ def _get_label_cols(self) -> List[str]:
920
+ """Util function to get the label columns from the pipeline.
921
+ The label column is only present in the estimator
922
+
923
+ Returns:
924
+ List of label columns, or empty list if no label cols.
925
+ """
926
+ label_cols = []
927
+ estimator = self._get_estimator()
928
+ if estimator is not None:
929
+ label_cols = estimator[1].get_label_cols()
930
+
931
+ return label_cols
932
+
933
+ def _can_be_trained_in_ml_runtime(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> bool:
934
+ """A utility function to determine if the pipeline cam be pushed down to the ML Runtime for training.
935
+ Currently, this is true if:
936
+ - The training dataset is a snowpark dataframe,
937
+ - The IN_SPCS_ML_RUNTIME environment is present and
938
+ - The pipeline can be converted to an sklearn pipeline.
939
+
940
+ Args:
941
+ dataset: The training dataset
942
+
943
+ Returns:
944
+ True if the dataset can be fit in the ml runtime, else false.
945
+
946
+ """
947
+ if not isinstance(dataset, snowpark.DataFrame):
948
+ return False
949
+
950
+ if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
951
+ return False
952
+
953
+ return self._is_convertible_to_sklearn
954
+
955
+ @staticmethod
956
+ def _wrap_transformer_in_column_transformer(
957
+ transformer_name: str, transformer: base.BaseTransformer
958
+ ) -> ColumnTransformer:
959
+ """A helper function to convert a transformer object to an sklearn object and wrap in an sklearn
960
+ ColumnTransformer.
961
+
962
+ Args:
963
+ transformer_name: Name of the transformer to be wrapped.
964
+ transformer: The transformer object to be wrapped.
965
+
966
+ Returns:
967
+ A column transformer sklearn object that uses the input columns from the initial snowpark ml transformer.
968
+ """
969
+ column_transformer = ColumnTransformer(
970
+ transformers=[(transformer_name, Pipeline._get_native_object(transformer), transformer.get_input_cols())],
971
+ remainder="passthrough",
972
+ )
973
+ return column_transformer
974
+
975
+ def _create_unfitted_sklearn_object(self) -> pipeline.Pipeline:
976
+ """Create a sklearn pipeline from the current snowml pipeline.
977
+ ColumnTransformers are used to wrap transformers as their input columns can be specified
978
+ as a subset of the pipeline's input columns.
979
+
980
+ Returns:
981
+ An unfit pipeline that can be fit using the ML runtime client.
982
+ """
983
+
984
+ sklearn_pipeline_steps = []
985
+
986
+ first_step_name, first_step_object = self.steps[0]
987
+
988
+ # Only the first step can have the input_cols field not None/empty.
989
+ if first_step_object.get_input_cols():
990
+ first_step_column_transformer = Pipeline._wrap_transformer_in_column_transformer(
991
+ first_step_name, first_step_object
992
+ )
993
+ first_step_skl = (first_step_name, first_step_column_transformer)
994
+ else:
995
+ first_step_skl = (first_step_name, Pipeline._get_native_object(first_step_object))
996
+
997
+ sklearn_pipeline_steps.append(first_step_skl)
998
+
999
+ for step_name, step_object in self.steps[1:]:
1000
+ skl_step = (step_name, Pipeline._get_native_object(step_object))
1001
+ sklearn_pipeline_steps.append(skl_step)
1002
+
1003
+ return pipeline.Pipeline(sklearn_pipeline_steps)
1004
+
565
1005
  def _create_sklearn_object(self) -> pipeline.Pipeline:
566
1006
  if not self._is_fitted:
567
1007
  return self._create_unfitted_sklearn_object()
568
1008
 
569
- if not self._is_convertible_to_sklearn:
1009
+ if not self._modifies_label_or_sample_weight:
570
1010
  raise exceptions.SnowflakeMLException(
571
1011
  error_code=error_codes.METHOD_NOT_ALLOWED,
572
1012
  original_exception=ValueError(
573
- "The pipeline can't be converted to SKLearn equivalent because it processing label or "
1013
+ "The pipeline can't be converted to SKLearn equivalent because it modifies processing label or "
574
1014
  "sample_weight columns as part of pipeline preprocessing steps which is not allowed in SKLearn."
575
1015
  ),
576
1016
  )
@@ -631,3 +1071,65 @@ class Pipeline(base.BaseTransformer):
631
1071
  original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
632
1072
  )
633
1073
  return self._model_signature_dict
1074
+
1075
+ @staticmethod
1076
+ def _get_native_object(estimator: base.BaseEstimator) -> object:
1077
+ """A helper function to get the native(sklearn, xgboost, or lightgbm)
1078
+ object from a snowpark ml estimator.
1079
+ TODO - better type hinting - is there a common base class for all xgb/lgbm estimators?
1080
+
1081
+ Args:
1082
+ estimator: the estimator from which to derive the native object.
1083
+
1084
+ Returns:
1085
+ a native estimator object
1086
+
1087
+ Raises:
1088
+ ValueError: The estimator is not an sklearn, xgboost, or lightgbm estimator.
1089
+ """
1090
+ methods = ["to_sklearn", "to_xgboost", "to_lightgbm"]
1091
+ for method_name in methods:
1092
+ if hasattr(estimator, method_name):
1093
+ try:
1094
+ result = getattr(estimator, method_name)()
1095
+ return result
1096
+ except exceptions.SnowflakeMLException:
1097
+ pass # Do nothing and continue to the next method
1098
+ raise ValueError("The estimator must be an sklearn, xgboost, or lightgbm estimator.")
1099
+
1100
+ def to_sklearn(self) -> pipeline.Pipeline:
1101
+ """Returns an sklearn Pipeline representing the object, if possible.
1102
+
1103
+ Returns:
1104
+ previously fit sklearn Pipeline if present, else an unfit pipeline
1105
+
1106
+ Raises:
1107
+ ValueError: The pipeline cannot be represented as an sklearn pipeline.
1108
+ """
1109
+ if self._is_fitted:
1110
+ if self._sklearn_object is not None:
1111
+ return self._sklearn_object
1112
+ else:
1113
+ return self._create_sklearn_object()
1114
+ else:
1115
+ if self._is_convertible_to_sklearn:
1116
+ return self._create_unfitted_sklearn_object()
1117
+ else:
1118
+ raise ValueError("This pipeline can not be converted to an sklearn pipeline.")
1119
+
1120
+ def _send_pipeline_configuration_telemetry(self) -> None:
1121
+ """Track information about the pipeline setup. Currently, we want to track:
1122
+ - Whether the pipeline is converible to an sklearn pipeline
1123
+ - Whether the pipeline is being used in the SPCS ml runtime.
1124
+ """
1125
+
1126
+ telemetry_data = {
1127
+ "pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
1128
+ "in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
1129
+ }
1130
+ telemetry.send_custom_usage(
1131
+ project=_PROJECT,
1132
+ subproject=_SUBPROJECT,
1133
+ telemetry_type=telemetry.TelemetryField.TYPE_SNOWML_PIPELINE_USAGE.value,
1134
+ data=telemetry_data,
1135
+ )