snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -287,18 +287,24 @@ class EllipticEnvelope(BaseTransformer):
287
287
  self._get_model_signatures(dataset)
288
288
  return self
289
289
 
290
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
291
- if self._drop_input_cols:
292
- return []
293
- else:
294
- return list(set(dataset.columns) - set(self.output_cols))
295
-
296
290
  def _batch_inference_validate_snowpark(
297
291
  self,
298
292
  dataset: DataFrame,
299
293
  inference_method: str,
300
294
  ) -> List[str]:
301
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
295
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
296
+ return the available package that exists in the snowflake anaconda channel
297
+
298
+ Args:
299
+ dataset: snowpark dataframe
300
+ inference_method: the inference method such as predict, score...
301
+
302
+ Raises:
303
+ SnowflakeMLException: If the estimator is not fitted, raise error
304
+ SnowflakeMLException: If the session is None, raise error
305
+
306
+ Returns:
307
+ A list of available package that exists in the snowflake anaconda channel
302
308
  """
303
309
  if not self._is_fitted:
304
310
  raise exceptions.SnowflakeMLException(
@@ -372,7 +378,7 @@ class EllipticEnvelope(BaseTransformer):
372
378
  transform_kwargs = dict(
373
379
  session = dataset._session,
374
380
  dependencies = self._deps,
375
- pass_through_cols = self._get_pass_through_columns(dataset),
381
+ drop_input_cols = self._drop_input_cols,
376
382
  expected_output_cols_type = expected_type_inferred,
377
383
  )
378
384
 
@@ -432,16 +438,16 @@ class EllipticEnvelope(BaseTransformer):
432
438
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
433
439
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
434
440
  # each row containing a list of values.
435
- expected_dtype = "ARRAY"
441
+ expected_dtype = "array"
436
442
 
437
443
  # If we were unable to assign a type to this transform in the factory, infer the type here.
438
444
  if expected_dtype == "":
439
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
445
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
440
446
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
441
- expected_dtype = "ARRAY"
442
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
447
+ expected_dtype = "array"
448
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
443
449
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
444
- expected_dtype = "ARRAY"
450
+ expected_dtype = "array"
445
451
  else:
446
452
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
447
453
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -459,7 +465,7 @@ class EllipticEnvelope(BaseTransformer):
459
465
  transform_kwargs = dict(
460
466
  session = dataset._session,
461
467
  dependencies = self._deps,
462
- pass_through_cols = self._get_pass_through_columns(dataset),
468
+ drop_input_cols = self._drop_input_cols,
463
469
  expected_output_cols_type = expected_dtype,
464
470
  )
465
471
 
@@ -512,7 +518,7 @@ class EllipticEnvelope(BaseTransformer):
512
518
  subproject=_SUBPROJECT,
513
519
  )
514
520
  output_result, fitted_estimator = model_trainer.train_fit_predict(
515
- pass_through_columns=self._get_pass_through_columns(dataset),
521
+ drop_input_cols=self._drop_input_cols,
516
522
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
517
523
  )
518
524
  self._sklearn_object = fitted_estimator
@@ -530,44 +536,6 @@ class EllipticEnvelope(BaseTransformer):
530
536
  assert self._sklearn_object is not None
531
537
  return self._sklearn_object.embedding_
532
538
 
533
-
534
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
535
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
536
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
537
- """
538
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
539
- if output_cols:
540
- output_cols = [
541
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
542
- for c in output_cols
543
- ]
544
- elif getattr(self._sklearn_object, "classes_", None) is None:
545
- output_cols = [output_cols_prefix]
546
- elif self._sklearn_object is not None:
547
- classes = self._sklearn_object.classes_
548
- if isinstance(classes, numpy.ndarray):
549
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
550
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
551
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
552
- output_cols = []
553
- for i, cl in enumerate(classes):
554
- # For binary classification, there is only one output column for each class
555
- # ndarray as the two classes are complementary.
556
- if len(cl) == 2:
557
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
558
- else:
559
- output_cols.extend([
560
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
561
- ])
562
- else:
563
- output_cols = []
564
-
565
- # Make sure column names are valid snowflake identifiers.
566
- assert output_cols is not None # Make MyPy happy
567
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
568
-
569
- return rv
570
-
571
539
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
572
540
  @telemetry.send_api_usage_telemetry(
573
541
  project=_PROJECT,
@@ -607,7 +575,7 @@ class EllipticEnvelope(BaseTransformer):
607
575
  transform_kwargs = dict(
608
576
  session=dataset._session,
609
577
  dependencies=self._deps,
610
- pass_through_cols=self._get_pass_through_columns(dataset),
578
+ drop_input_cols = self._drop_input_cols,
611
579
  expected_output_cols_type="float",
612
580
  )
613
581
 
@@ -672,7 +640,7 @@ class EllipticEnvelope(BaseTransformer):
672
640
  transform_kwargs = dict(
673
641
  session=dataset._session,
674
642
  dependencies=self._deps,
675
- pass_through_cols=self._get_pass_through_columns(dataset),
643
+ drop_input_cols = self._drop_input_cols,
676
644
  expected_output_cols_type="float",
677
645
  )
678
646
  elif isinstance(dataset, pd.DataFrame):
@@ -735,7 +703,7 @@ class EllipticEnvelope(BaseTransformer):
735
703
  transform_kwargs = dict(
736
704
  session=dataset._session,
737
705
  dependencies=self._deps,
738
- pass_through_cols=self._get_pass_through_columns(dataset),
706
+ drop_input_cols = self._drop_input_cols,
739
707
  expected_output_cols_type="float",
740
708
  )
741
709
 
@@ -802,7 +770,7 @@ class EllipticEnvelope(BaseTransformer):
802
770
  transform_kwargs = dict(
803
771
  session=dataset._session,
804
772
  dependencies=self._deps,
805
- pass_through_cols=self._get_pass_through_columns(dataset),
773
+ drop_input_cols = self._drop_input_cols,
806
774
  expected_output_cols_type="float",
807
775
  )
808
776
 
@@ -858,13 +826,17 @@ class EllipticEnvelope(BaseTransformer):
858
826
  transform_kwargs: ScoreKwargsTypedDict = dict()
859
827
 
860
828
  if isinstance(dataset, DataFrame):
829
+ self._deps = self._batch_inference_validate_snowpark(
830
+ dataset=dataset,
831
+ inference_method="score",
832
+ )
861
833
  selected_cols = self._get_active_columns()
862
834
  if len(selected_cols) > 0:
863
835
  dataset = dataset.select(selected_cols)
864
836
  assert isinstance(dataset._session, Session) # keep mypy happy
865
837
  transform_kwargs = dict(
866
838
  session=dataset._session,
867
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
839
+ dependencies=["snowflake-snowpark-python"] + self._deps,
868
840
  score_sproc_imports=['sklearn'],
869
841
  )
870
842
  elif isinstance(dataset, pd.DataFrame):
@@ -938,9 +910,9 @@ class EllipticEnvelope(BaseTransformer):
938
910
  transform_kwargs = dict(
939
911
  session = dataset._session,
940
912
  dependencies = self._deps,
941
- pass_through_cols = self._get_pass_through_columns(dataset),
942
- expected_output_cols_type = "array",
943
- n_neighbors = n_neighbors,
913
+ drop_input_cols = self._drop_input_cols,
914
+ expected_output_cols_type="array",
915
+ n_neighbors = n_neighbors,
944
916
  return_distance = return_distance
945
917
  )
946
918
  elif isinstance(dataset, pd.DataFrame):
@@ -263,18 +263,24 @@ class EmpiricalCovariance(BaseTransformer):
263
263
  self._get_model_signatures(dataset)
264
264
  return self
265
265
 
266
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
267
- if self._drop_input_cols:
268
- return []
269
- else:
270
- return list(set(dataset.columns) - set(self.output_cols))
271
-
272
266
  def _batch_inference_validate_snowpark(
273
267
  self,
274
268
  dataset: DataFrame,
275
269
  inference_method: str,
276
270
  ) -> List[str]:
277
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
271
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
272
+ return the available package that exists in the snowflake anaconda channel
273
+
274
+ Args:
275
+ dataset: snowpark dataframe
276
+ inference_method: the inference method such as predict, score...
277
+
278
+ Raises:
279
+ SnowflakeMLException: If the estimator is not fitted, raise error
280
+ SnowflakeMLException: If the session is None, raise error
281
+
282
+ Returns:
283
+ A list of available package that exists in the snowflake anaconda channel
278
284
  """
279
285
  if not self._is_fitted:
280
286
  raise exceptions.SnowflakeMLException(
@@ -346,7 +352,7 @@ class EmpiricalCovariance(BaseTransformer):
346
352
  transform_kwargs = dict(
347
353
  session = dataset._session,
348
354
  dependencies = self._deps,
349
- pass_through_cols = self._get_pass_through_columns(dataset),
355
+ drop_input_cols = self._drop_input_cols,
350
356
  expected_output_cols_type = expected_type_inferred,
351
357
  )
352
358
 
@@ -406,16 +412,16 @@ class EmpiricalCovariance(BaseTransformer):
406
412
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
407
413
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
408
414
  # each row containing a list of values.
409
- expected_dtype = "ARRAY"
415
+ expected_dtype = "array"
410
416
 
411
417
  # If we were unable to assign a type to this transform in the factory, infer the type here.
412
418
  if expected_dtype == "":
413
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
419
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
414
420
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
415
- expected_dtype = "ARRAY"
416
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
421
+ expected_dtype = "array"
422
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
417
423
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
418
- expected_dtype = "ARRAY"
424
+ expected_dtype = "array"
419
425
  else:
420
426
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
421
427
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -433,7 +439,7 @@ class EmpiricalCovariance(BaseTransformer):
433
439
  transform_kwargs = dict(
434
440
  session = dataset._session,
435
441
  dependencies = self._deps,
436
- pass_through_cols = self._get_pass_through_columns(dataset),
442
+ drop_input_cols = self._drop_input_cols,
437
443
  expected_output_cols_type = expected_dtype,
438
444
  )
439
445
 
@@ -484,7 +490,7 @@ class EmpiricalCovariance(BaseTransformer):
484
490
  subproject=_SUBPROJECT,
485
491
  )
486
492
  output_result, fitted_estimator = model_trainer.train_fit_predict(
487
- pass_through_columns=self._get_pass_through_columns(dataset),
493
+ drop_input_cols=self._drop_input_cols,
488
494
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
489
495
  )
490
496
  self._sklearn_object = fitted_estimator
@@ -502,44 +508,6 @@ class EmpiricalCovariance(BaseTransformer):
502
508
  assert self._sklearn_object is not None
503
509
  return self._sklearn_object.embedding_
504
510
 
505
-
506
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
507
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
508
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
509
- """
510
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
511
- if output_cols:
512
- output_cols = [
513
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
514
- for c in output_cols
515
- ]
516
- elif getattr(self._sklearn_object, "classes_", None) is None:
517
- output_cols = [output_cols_prefix]
518
- elif self._sklearn_object is not None:
519
- classes = self._sklearn_object.classes_
520
- if isinstance(classes, numpy.ndarray):
521
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
522
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
523
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
524
- output_cols = []
525
- for i, cl in enumerate(classes):
526
- # For binary classification, there is only one output column for each class
527
- # ndarray as the two classes are complementary.
528
- if len(cl) == 2:
529
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
530
- else:
531
- output_cols.extend([
532
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
533
- ])
534
- else:
535
- output_cols = []
536
-
537
- # Make sure column names are valid snowflake identifiers.
538
- assert output_cols is not None # Make MyPy happy
539
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
540
-
541
- return rv
542
-
543
511
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
544
512
  @telemetry.send_api_usage_telemetry(
545
513
  project=_PROJECT,
@@ -579,7 +547,7 @@ class EmpiricalCovariance(BaseTransformer):
579
547
  transform_kwargs = dict(
580
548
  session=dataset._session,
581
549
  dependencies=self._deps,
582
- pass_through_cols=self._get_pass_through_columns(dataset),
550
+ drop_input_cols = self._drop_input_cols,
583
551
  expected_output_cols_type="float",
584
552
  )
585
553
 
@@ -644,7 +612,7 @@ class EmpiricalCovariance(BaseTransformer):
644
612
  transform_kwargs = dict(
645
613
  session=dataset._session,
646
614
  dependencies=self._deps,
647
- pass_through_cols=self._get_pass_through_columns(dataset),
615
+ drop_input_cols = self._drop_input_cols,
648
616
  expected_output_cols_type="float",
649
617
  )
650
618
  elif isinstance(dataset, pd.DataFrame):
@@ -705,7 +673,7 @@ class EmpiricalCovariance(BaseTransformer):
705
673
  transform_kwargs = dict(
706
674
  session=dataset._session,
707
675
  dependencies=self._deps,
708
- pass_through_cols=self._get_pass_through_columns(dataset),
676
+ drop_input_cols = self._drop_input_cols,
709
677
  expected_output_cols_type="float",
710
678
  )
711
679
 
@@ -770,7 +738,7 @@ class EmpiricalCovariance(BaseTransformer):
770
738
  transform_kwargs = dict(
771
739
  session=dataset._session,
772
740
  dependencies=self._deps,
773
- pass_through_cols=self._get_pass_through_columns(dataset),
741
+ drop_input_cols = self._drop_input_cols,
774
742
  expected_output_cols_type="float",
775
743
  )
776
744
 
@@ -826,13 +794,17 @@ class EmpiricalCovariance(BaseTransformer):
826
794
  transform_kwargs: ScoreKwargsTypedDict = dict()
827
795
 
828
796
  if isinstance(dataset, DataFrame):
797
+ self._deps = self._batch_inference_validate_snowpark(
798
+ dataset=dataset,
799
+ inference_method="score",
800
+ )
829
801
  selected_cols = self._get_active_columns()
830
802
  if len(selected_cols) > 0:
831
803
  dataset = dataset.select(selected_cols)
832
804
  assert isinstance(dataset._session, Session) # keep mypy happy
833
805
  transform_kwargs = dict(
834
806
  session=dataset._session,
835
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
807
+ dependencies=["snowflake-snowpark-python"] + self._deps,
836
808
  score_sproc_imports=['sklearn'],
837
809
  )
838
810
  elif isinstance(dataset, pd.DataFrame):
@@ -906,9 +878,9 @@ class EmpiricalCovariance(BaseTransformer):
906
878
  transform_kwargs = dict(
907
879
  session = dataset._session,
908
880
  dependencies = self._deps,
909
- pass_through_cols = self._get_pass_through_columns(dataset),
910
- expected_output_cols_type = "array",
911
- n_neighbors = n_neighbors,
881
+ drop_input_cols = self._drop_input_cols,
882
+ expected_output_cols_type="array",
883
+ n_neighbors = n_neighbors,
912
884
  return_distance = return_distance
913
885
  )
914
886
  elif isinstance(dataset, pd.DataFrame):
@@ -311,18 +311,24 @@ class GraphicalLasso(BaseTransformer):
311
311
  self._get_model_signatures(dataset)
312
312
  return self
313
313
 
314
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
315
- if self._drop_input_cols:
316
- return []
317
- else:
318
- return list(set(dataset.columns) - set(self.output_cols))
319
-
320
314
  def _batch_inference_validate_snowpark(
321
315
  self,
322
316
  dataset: DataFrame,
323
317
  inference_method: str,
324
318
  ) -> List[str]:
325
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
319
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
320
+ return the available package that exists in the snowflake anaconda channel
321
+
322
+ Args:
323
+ dataset: snowpark dataframe
324
+ inference_method: the inference method such as predict, score...
325
+
326
+ Raises:
327
+ SnowflakeMLException: If the estimator is not fitted, raise error
328
+ SnowflakeMLException: If the session is None, raise error
329
+
330
+ Returns:
331
+ A list of available package that exists in the snowflake anaconda channel
326
332
  """
327
333
  if not self._is_fitted:
328
334
  raise exceptions.SnowflakeMLException(
@@ -394,7 +400,7 @@ class GraphicalLasso(BaseTransformer):
394
400
  transform_kwargs = dict(
395
401
  session = dataset._session,
396
402
  dependencies = self._deps,
397
- pass_through_cols = self._get_pass_through_columns(dataset),
403
+ drop_input_cols = self._drop_input_cols,
398
404
  expected_output_cols_type = expected_type_inferred,
399
405
  )
400
406
 
@@ -454,16 +460,16 @@ class GraphicalLasso(BaseTransformer):
454
460
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
455
461
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
456
462
  # each row containing a list of values.
457
- expected_dtype = "ARRAY"
463
+ expected_dtype = "array"
458
464
 
459
465
  # If we were unable to assign a type to this transform in the factory, infer the type here.
460
466
  if expected_dtype == "":
461
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
467
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
462
468
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
463
- expected_dtype = "ARRAY"
464
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
469
+ expected_dtype = "array"
470
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
465
471
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
466
- expected_dtype = "ARRAY"
472
+ expected_dtype = "array"
467
473
  else:
468
474
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
469
475
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -481,7 +487,7 @@ class GraphicalLasso(BaseTransformer):
481
487
  transform_kwargs = dict(
482
488
  session = dataset._session,
483
489
  dependencies = self._deps,
484
- pass_through_cols = self._get_pass_through_columns(dataset),
490
+ drop_input_cols = self._drop_input_cols,
485
491
  expected_output_cols_type = expected_dtype,
486
492
  )
487
493
 
@@ -532,7 +538,7 @@ class GraphicalLasso(BaseTransformer):
532
538
  subproject=_SUBPROJECT,
533
539
  )
534
540
  output_result, fitted_estimator = model_trainer.train_fit_predict(
535
- pass_through_columns=self._get_pass_through_columns(dataset),
541
+ drop_input_cols=self._drop_input_cols,
536
542
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
537
543
  )
538
544
  self._sklearn_object = fitted_estimator
@@ -550,44 +556,6 @@ class GraphicalLasso(BaseTransformer):
550
556
  assert self._sklearn_object is not None
551
557
  return self._sklearn_object.embedding_
552
558
 
553
-
554
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
555
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
556
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
557
- """
558
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
559
- if output_cols:
560
- output_cols = [
561
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
562
- for c in output_cols
563
- ]
564
- elif getattr(self._sklearn_object, "classes_", None) is None:
565
- output_cols = [output_cols_prefix]
566
- elif self._sklearn_object is not None:
567
- classes = self._sklearn_object.classes_
568
- if isinstance(classes, numpy.ndarray):
569
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
570
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
571
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
572
- output_cols = []
573
- for i, cl in enumerate(classes):
574
- # For binary classification, there is only one output column for each class
575
- # ndarray as the two classes are complementary.
576
- if len(cl) == 2:
577
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
578
- else:
579
- output_cols.extend([
580
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
581
- ])
582
- else:
583
- output_cols = []
584
-
585
- # Make sure column names are valid snowflake identifiers.
586
- assert output_cols is not None # Make MyPy happy
587
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
588
-
589
- return rv
590
-
591
559
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
592
560
  @telemetry.send_api_usage_telemetry(
593
561
  project=_PROJECT,
@@ -627,7 +595,7 @@ class GraphicalLasso(BaseTransformer):
627
595
  transform_kwargs = dict(
628
596
  session=dataset._session,
629
597
  dependencies=self._deps,
630
- pass_through_cols=self._get_pass_through_columns(dataset),
598
+ drop_input_cols = self._drop_input_cols,
631
599
  expected_output_cols_type="float",
632
600
  )
633
601
 
@@ -692,7 +660,7 @@ class GraphicalLasso(BaseTransformer):
692
660
  transform_kwargs = dict(
693
661
  session=dataset._session,
694
662
  dependencies=self._deps,
695
- pass_through_cols=self._get_pass_through_columns(dataset),
663
+ drop_input_cols = self._drop_input_cols,
696
664
  expected_output_cols_type="float",
697
665
  )
698
666
  elif isinstance(dataset, pd.DataFrame):
@@ -753,7 +721,7 @@ class GraphicalLasso(BaseTransformer):
753
721
  transform_kwargs = dict(
754
722
  session=dataset._session,
755
723
  dependencies=self._deps,
756
- pass_through_cols=self._get_pass_through_columns(dataset),
724
+ drop_input_cols = self._drop_input_cols,
757
725
  expected_output_cols_type="float",
758
726
  )
759
727
 
@@ -818,7 +786,7 @@ class GraphicalLasso(BaseTransformer):
818
786
  transform_kwargs = dict(
819
787
  session=dataset._session,
820
788
  dependencies=self._deps,
821
- pass_through_cols=self._get_pass_through_columns(dataset),
789
+ drop_input_cols = self._drop_input_cols,
822
790
  expected_output_cols_type="float",
823
791
  )
824
792
 
@@ -874,13 +842,17 @@ class GraphicalLasso(BaseTransformer):
874
842
  transform_kwargs: ScoreKwargsTypedDict = dict()
875
843
 
876
844
  if isinstance(dataset, DataFrame):
845
+ self._deps = self._batch_inference_validate_snowpark(
846
+ dataset=dataset,
847
+ inference_method="score",
848
+ )
877
849
  selected_cols = self._get_active_columns()
878
850
  if len(selected_cols) > 0:
879
851
  dataset = dataset.select(selected_cols)
880
852
  assert isinstance(dataset._session, Session) # keep mypy happy
881
853
  transform_kwargs = dict(
882
854
  session=dataset._session,
883
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
855
+ dependencies=["snowflake-snowpark-python"] + self._deps,
884
856
  score_sproc_imports=['sklearn'],
885
857
  )
886
858
  elif isinstance(dataset, pd.DataFrame):
@@ -954,9 +926,9 @@ class GraphicalLasso(BaseTransformer):
954
926
  transform_kwargs = dict(
955
927
  session = dataset._session,
956
928
  dependencies = self._deps,
957
- pass_through_cols = self._get_pass_through_columns(dataset),
958
- expected_output_cols_type = "array",
959
- n_neighbors = n_neighbors,
929
+ drop_input_cols = self._drop_input_cols,
930
+ expected_output_cols_type="array",
931
+ n_neighbors = n_neighbors,
960
932
  return_distance = return_distance
961
933
  )
962
934
  elif isinstance(dataset, pd.DataFrame):