snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -337,18 +337,24 @@ class GraphicalLassoCV(BaseTransformer):
337
337
  self._get_model_signatures(dataset)
338
338
  return self
339
339
 
340
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
341
- if self._drop_input_cols:
342
- return []
343
- else:
344
- return list(set(dataset.columns) - set(self.output_cols))
345
-
346
340
  def _batch_inference_validate_snowpark(
347
341
  self,
348
342
  dataset: DataFrame,
349
343
  inference_method: str,
350
344
  ) -> List[str]:
351
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
345
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
346
+ return the available package that exists in the snowflake anaconda channel
347
+
348
+ Args:
349
+ dataset: snowpark dataframe
350
+ inference_method: the inference method such as predict, score...
351
+
352
+ Raises:
353
+ SnowflakeMLException: If the estimator is not fitted, raise error
354
+ SnowflakeMLException: If the session is None, raise error
355
+
356
+ Returns:
357
+ A list of available package that exists in the snowflake anaconda channel
352
358
  """
353
359
  if not self._is_fitted:
354
360
  raise exceptions.SnowflakeMLException(
@@ -420,7 +426,7 @@ class GraphicalLassoCV(BaseTransformer):
420
426
  transform_kwargs = dict(
421
427
  session = dataset._session,
422
428
  dependencies = self._deps,
423
- pass_through_cols = self._get_pass_through_columns(dataset),
429
+ drop_input_cols = self._drop_input_cols,
424
430
  expected_output_cols_type = expected_type_inferred,
425
431
  )
426
432
 
@@ -480,16 +486,16 @@ class GraphicalLassoCV(BaseTransformer):
480
486
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
481
487
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
482
488
  # each row containing a list of values.
483
- expected_dtype = "ARRAY"
489
+ expected_dtype = "array"
484
490
 
485
491
  # If we were unable to assign a type to this transform in the factory, infer the type here.
486
492
  if expected_dtype == "":
487
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
493
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
488
494
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
489
- expected_dtype = "ARRAY"
490
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
495
+ expected_dtype = "array"
496
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
491
497
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
492
- expected_dtype = "ARRAY"
498
+ expected_dtype = "array"
493
499
  else:
494
500
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
495
501
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -507,7 +513,7 @@ class GraphicalLassoCV(BaseTransformer):
507
513
  transform_kwargs = dict(
508
514
  session = dataset._session,
509
515
  dependencies = self._deps,
510
- pass_through_cols = self._get_pass_through_columns(dataset),
516
+ drop_input_cols = self._drop_input_cols,
511
517
  expected_output_cols_type = expected_dtype,
512
518
  )
513
519
 
@@ -558,7 +564,7 @@ class GraphicalLassoCV(BaseTransformer):
558
564
  subproject=_SUBPROJECT,
559
565
  )
560
566
  output_result, fitted_estimator = model_trainer.train_fit_predict(
561
- pass_through_columns=self._get_pass_through_columns(dataset),
567
+ drop_input_cols=self._drop_input_cols,
562
568
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
563
569
  )
564
570
  self._sklearn_object = fitted_estimator
@@ -576,44 +582,6 @@ class GraphicalLassoCV(BaseTransformer):
576
582
  assert self._sklearn_object is not None
577
583
  return self._sklearn_object.embedding_
578
584
 
579
-
580
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
581
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
582
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
583
- """
584
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
585
- if output_cols:
586
- output_cols = [
587
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
588
- for c in output_cols
589
- ]
590
- elif getattr(self._sklearn_object, "classes_", None) is None:
591
- output_cols = [output_cols_prefix]
592
- elif self._sklearn_object is not None:
593
- classes = self._sklearn_object.classes_
594
- if isinstance(classes, numpy.ndarray):
595
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
596
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
597
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
598
- output_cols = []
599
- for i, cl in enumerate(classes):
600
- # For binary classification, there is only one output column for each class
601
- # ndarray as the two classes are complementary.
602
- if len(cl) == 2:
603
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
604
- else:
605
- output_cols.extend([
606
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
607
- ])
608
- else:
609
- output_cols = []
610
-
611
- # Make sure column names are valid snowflake identifiers.
612
- assert output_cols is not None # Make MyPy happy
613
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
614
-
615
- return rv
616
-
617
585
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
618
586
  @telemetry.send_api_usage_telemetry(
619
587
  project=_PROJECT,
@@ -653,7 +621,7 @@ class GraphicalLassoCV(BaseTransformer):
653
621
  transform_kwargs = dict(
654
622
  session=dataset._session,
655
623
  dependencies=self._deps,
656
- pass_through_cols=self._get_pass_through_columns(dataset),
624
+ drop_input_cols = self._drop_input_cols,
657
625
  expected_output_cols_type="float",
658
626
  )
659
627
 
@@ -718,7 +686,7 @@ class GraphicalLassoCV(BaseTransformer):
718
686
  transform_kwargs = dict(
719
687
  session=dataset._session,
720
688
  dependencies=self._deps,
721
- pass_through_cols=self._get_pass_through_columns(dataset),
689
+ drop_input_cols = self._drop_input_cols,
722
690
  expected_output_cols_type="float",
723
691
  )
724
692
  elif isinstance(dataset, pd.DataFrame):
@@ -779,7 +747,7 @@ class GraphicalLassoCV(BaseTransformer):
779
747
  transform_kwargs = dict(
780
748
  session=dataset._session,
781
749
  dependencies=self._deps,
782
- pass_through_cols=self._get_pass_through_columns(dataset),
750
+ drop_input_cols = self._drop_input_cols,
783
751
  expected_output_cols_type="float",
784
752
  )
785
753
 
@@ -844,7 +812,7 @@ class GraphicalLassoCV(BaseTransformer):
844
812
  transform_kwargs = dict(
845
813
  session=dataset._session,
846
814
  dependencies=self._deps,
847
- pass_through_cols=self._get_pass_through_columns(dataset),
815
+ drop_input_cols = self._drop_input_cols,
848
816
  expected_output_cols_type="float",
849
817
  )
850
818
 
@@ -900,13 +868,17 @@ class GraphicalLassoCV(BaseTransformer):
900
868
  transform_kwargs: ScoreKwargsTypedDict = dict()
901
869
 
902
870
  if isinstance(dataset, DataFrame):
871
+ self._deps = self._batch_inference_validate_snowpark(
872
+ dataset=dataset,
873
+ inference_method="score",
874
+ )
903
875
  selected_cols = self._get_active_columns()
904
876
  if len(selected_cols) > 0:
905
877
  dataset = dataset.select(selected_cols)
906
878
  assert isinstance(dataset._session, Session) # keep mypy happy
907
879
  transform_kwargs = dict(
908
880
  session=dataset._session,
909
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
881
+ dependencies=["snowflake-snowpark-python"] + self._deps,
910
882
  score_sproc_imports=['sklearn'],
911
883
  )
912
884
  elif isinstance(dataset, pd.DataFrame):
@@ -980,9 +952,9 @@ class GraphicalLassoCV(BaseTransformer):
980
952
  transform_kwargs = dict(
981
953
  session = dataset._session,
982
954
  dependencies = self._deps,
983
- pass_through_cols = self._get_pass_through_columns(dataset),
984
- expected_output_cols_type = "array",
985
- n_neighbors = n_neighbors,
955
+ drop_input_cols = self._drop_input_cols,
956
+ expected_output_cols_type="array",
957
+ n_neighbors = n_neighbors,
986
958
  return_distance = return_distance
987
959
  )
988
960
  elif isinstance(dataset, pd.DataFrame):
@@ -270,18 +270,24 @@ class LedoitWolf(BaseTransformer):
270
270
  self._get_model_signatures(dataset)
271
271
  return self
272
272
 
273
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
274
- if self._drop_input_cols:
275
- return []
276
- else:
277
- return list(set(dataset.columns) - set(self.output_cols))
278
-
279
273
  def _batch_inference_validate_snowpark(
280
274
  self,
281
275
  dataset: DataFrame,
282
276
  inference_method: str,
283
277
  ) -> List[str]:
284
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
278
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
279
+ return the available package that exists in the snowflake anaconda channel
280
+
281
+ Args:
282
+ dataset: snowpark dataframe
283
+ inference_method: the inference method such as predict, score...
284
+
285
+ Raises:
286
+ SnowflakeMLException: If the estimator is not fitted, raise error
287
+ SnowflakeMLException: If the session is None, raise error
288
+
289
+ Returns:
290
+ A list of available package that exists in the snowflake anaconda channel
285
291
  """
286
292
  if not self._is_fitted:
287
293
  raise exceptions.SnowflakeMLException(
@@ -353,7 +359,7 @@ class LedoitWolf(BaseTransformer):
353
359
  transform_kwargs = dict(
354
360
  session = dataset._session,
355
361
  dependencies = self._deps,
356
- pass_through_cols = self._get_pass_through_columns(dataset),
362
+ drop_input_cols = self._drop_input_cols,
357
363
  expected_output_cols_type = expected_type_inferred,
358
364
  )
359
365
 
@@ -413,16 +419,16 @@ class LedoitWolf(BaseTransformer):
413
419
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
414
420
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
415
421
  # each row containing a list of values.
416
- expected_dtype = "ARRAY"
422
+ expected_dtype = "array"
417
423
 
418
424
  # If we were unable to assign a type to this transform in the factory, infer the type here.
419
425
  if expected_dtype == "":
420
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
426
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
421
427
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
422
- expected_dtype = "ARRAY"
423
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
428
+ expected_dtype = "array"
429
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
424
430
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
425
- expected_dtype = "ARRAY"
431
+ expected_dtype = "array"
426
432
  else:
427
433
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
428
434
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -440,7 +446,7 @@ class LedoitWolf(BaseTransformer):
440
446
  transform_kwargs = dict(
441
447
  session = dataset._session,
442
448
  dependencies = self._deps,
443
- pass_through_cols = self._get_pass_through_columns(dataset),
449
+ drop_input_cols = self._drop_input_cols,
444
450
  expected_output_cols_type = expected_dtype,
445
451
  )
446
452
 
@@ -491,7 +497,7 @@ class LedoitWolf(BaseTransformer):
491
497
  subproject=_SUBPROJECT,
492
498
  )
493
499
  output_result, fitted_estimator = model_trainer.train_fit_predict(
494
- pass_through_columns=self._get_pass_through_columns(dataset),
500
+ drop_input_cols=self._drop_input_cols,
495
501
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
496
502
  )
497
503
  self._sklearn_object = fitted_estimator
@@ -509,44 +515,6 @@ class LedoitWolf(BaseTransformer):
509
515
  assert self._sklearn_object is not None
510
516
  return self._sklearn_object.embedding_
511
517
 
512
-
513
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
514
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
515
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
516
- """
517
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
518
- if output_cols:
519
- output_cols = [
520
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
521
- for c in output_cols
522
- ]
523
- elif getattr(self._sklearn_object, "classes_", None) is None:
524
- output_cols = [output_cols_prefix]
525
- elif self._sklearn_object is not None:
526
- classes = self._sklearn_object.classes_
527
- if isinstance(classes, numpy.ndarray):
528
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
529
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
530
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
531
- output_cols = []
532
- for i, cl in enumerate(classes):
533
- # For binary classification, there is only one output column for each class
534
- # ndarray as the two classes are complementary.
535
- if len(cl) == 2:
536
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
537
- else:
538
- output_cols.extend([
539
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
540
- ])
541
- else:
542
- output_cols = []
543
-
544
- # Make sure column names are valid snowflake identifiers.
545
- assert output_cols is not None # Make MyPy happy
546
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
547
-
548
- return rv
549
-
550
518
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
551
519
  @telemetry.send_api_usage_telemetry(
552
520
  project=_PROJECT,
@@ -586,7 +554,7 @@ class LedoitWolf(BaseTransformer):
586
554
  transform_kwargs = dict(
587
555
  session=dataset._session,
588
556
  dependencies=self._deps,
589
- pass_through_cols=self._get_pass_through_columns(dataset),
557
+ drop_input_cols = self._drop_input_cols,
590
558
  expected_output_cols_type="float",
591
559
  )
592
560
 
@@ -651,7 +619,7 @@ class LedoitWolf(BaseTransformer):
651
619
  transform_kwargs = dict(
652
620
  session=dataset._session,
653
621
  dependencies=self._deps,
654
- pass_through_cols=self._get_pass_through_columns(dataset),
622
+ drop_input_cols = self._drop_input_cols,
655
623
  expected_output_cols_type="float",
656
624
  )
657
625
  elif isinstance(dataset, pd.DataFrame):
@@ -712,7 +680,7 @@ class LedoitWolf(BaseTransformer):
712
680
  transform_kwargs = dict(
713
681
  session=dataset._session,
714
682
  dependencies=self._deps,
715
- pass_through_cols=self._get_pass_through_columns(dataset),
683
+ drop_input_cols = self._drop_input_cols,
716
684
  expected_output_cols_type="float",
717
685
  )
718
686
 
@@ -777,7 +745,7 @@ class LedoitWolf(BaseTransformer):
777
745
  transform_kwargs = dict(
778
746
  session=dataset._session,
779
747
  dependencies=self._deps,
780
- pass_through_cols=self._get_pass_through_columns(dataset),
748
+ drop_input_cols = self._drop_input_cols,
781
749
  expected_output_cols_type="float",
782
750
  )
783
751
 
@@ -833,13 +801,17 @@ class LedoitWolf(BaseTransformer):
833
801
  transform_kwargs: ScoreKwargsTypedDict = dict()
834
802
 
835
803
  if isinstance(dataset, DataFrame):
804
+ self._deps = self._batch_inference_validate_snowpark(
805
+ dataset=dataset,
806
+ inference_method="score",
807
+ )
836
808
  selected_cols = self._get_active_columns()
837
809
  if len(selected_cols) > 0:
838
810
  dataset = dataset.select(selected_cols)
839
811
  assert isinstance(dataset._session, Session) # keep mypy happy
840
812
  transform_kwargs = dict(
841
813
  session=dataset._session,
842
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
814
+ dependencies=["snowflake-snowpark-python"] + self._deps,
843
815
  score_sproc_imports=['sklearn'],
844
816
  )
845
817
  elif isinstance(dataset, pd.DataFrame):
@@ -913,9 +885,9 @@ class LedoitWolf(BaseTransformer):
913
885
  transform_kwargs = dict(
914
886
  session = dataset._session,
915
887
  dependencies = self._deps,
916
- pass_through_cols = self._get_pass_through_columns(dataset),
917
- expected_output_cols_type = "array",
918
- n_neighbors = n_neighbors,
888
+ drop_input_cols = self._drop_input_cols,
889
+ expected_output_cols_type="array",
890
+ n_neighbors = n_neighbors,
919
891
  return_distance = return_distance
920
892
  )
921
893
  elif isinstance(dataset, pd.DataFrame):
@@ -282,18 +282,24 @@ class MinCovDet(BaseTransformer):
282
282
  self._get_model_signatures(dataset)
283
283
  return self
284
284
 
285
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
286
- if self._drop_input_cols:
287
- return []
288
- else:
289
- return list(set(dataset.columns) - set(self.output_cols))
290
-
291
285
  def _batch_inference_validate_snowpark(
292
286
  self,
293
287
  dataset: DataFrame,
294
288
  inference_method: str,
295
289
  ) -> List[str]:
296
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
290
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
291
+ return the available package that exists in the snowflake anaconda channel
292
+
293
+ Args:
294
+ dataset: snowpark dataframe
295
+ inference_method: the inference method such as predict, score...
296
+
297
+ Raises:
298
+ SnowflakeMLException: If the estimator is not fitted, raise error
299
+ SnowflakeMLException: If the session is None, raise error
300
+
301
+ Returns:
302
+ A list of available package that exists in the snowflake anaconda channel
297
303
  """
298
304
  if not self._is_fitted:
299
305
  raise exceptions.SnowflakeMLException(
@@ -365,7 +371,7 @@ class MinCovDet(BaseTransformer):
365
371
  transform_kwargs = dict(
366
372
  session = dataset._session,
367
373
  dependencies = self._deps,
368
- pass_through_cols = self._get_pass_through_columns(dataset),
374
+ drop_input_cols = self._drop_input_cols,
369
375
  expected_output_cols_type = expected_type_inferred,
370
376
  )
371
377
 
@@ -425,16 +431,16 @@ class MinCovDet(BaseTransformer):
425
431
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
426
432
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
427
433
  # each row containing a list of values.
428
- expected_dtype = "ARRAY"
434
+ expected_dtype = "array"
429
435
 
430
436
  # If we were unable to assign a type to this transform in the factory, infer the type here.
431
437
  if expected_dtype == "":
432
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
438
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
433
439
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
434
- expected_dtype = "ARRAY"
435
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
440
+ expected_dtype = "array"
441
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
436
442
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
437
- expected_dtype = "ARRAY"
443
+ expected_dtype = "array"
438
444
  else:
439
445
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
440
446
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -452,7 +458,7 @@ class MinCovDet(BaseTransformer):
452
458
  transform_kwargs = dict(
453
459
  session = dataset._session,
454
460
  dependencies = self._deps,
455
- pass_through_cols = self._get_pass_through_columns(dataset),
461
+ drop_input_cols = self._drop_input_cols,
456
462
  expected_output_cols_type = expected_dtype,
457
463
  )
458
464
 
@@ -503,7 +509,7 @@ class MinCovDet(BaseTransformer):
503
509
  subproject=_SUBPROJECT,
504
510
  )
505
511
  output_result, fitted_estimator = model_trainer.train_fit_predict(
506
- pass_through_columns=self._get_pass_through_columns(dataset),
512
+ drop_input_cols=self._drop_input_cols,
507
513
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
508
514
  )
509
515
  self._sklearn_object = fitted_estimator
@@ -521,44 +527,6 @@ class MinCovDet(BaseTransformer):
521
527
  assert self._sklearn_object is not None
522
528
  return self._sklearn_object.embedding_
523
529
 
524
-
525
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
526
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
527
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
528
- """
529
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
530
- if output_cols:
531
- output_cols = [
532
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
533
- for c in output_cols
534
- ]
535
- elif getattr(self._sklearn_object, "classes_", None) is None:
536
- output_cols = [output_cols_prefix]
537
- elif self._sklearn_object is not None:
538
- classes = self._sklearn_object.classes_
539
- if isinstance(classes, numpy.ndarray):
540
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
541
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
542
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
543
- output_cols = []
544
- for i, cl in enumerate(classes):
545
- # For binary classification, there is only one output column for each class
546
- # ndarray as the two classes are complementary.
547
- if len(cl) == 2:
548
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
549
- else:
550
- output_cols.extend([
551
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
552
- ])
553
- else:
554
- output_cols = []
555
-
556
- # Make sure column names are valid snowflake identifiers.
557
- assert output_cols is not None # Make MyPy happy
558
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
559
-
560
- return rv
561
-
562
530
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
563
531
  @telemetry.send_api_usage_telemetry(
564
532
  project=_PROJECT,
@@ -598,7 +566,7 @@ class MinCovDet(BaseTransformer):
598
566
  transform_kwargs = dict(
599
567
  session=dataset._session,
600
568
  dependencies=self._deps,
601
- pass_through_cols=self._get_pass_through_columns(dataset),
569
+ drop_input_cols = self._drop_input_cols,
602
570
  expected_output_cols_type="float",
603
571
  )
604
572
 
@@ -663,7 +631,7 @@ class MinCovDet(BaseTransformer):
663
631
  transform_kwargs = dict(
664
632
  session=dataset._session,
665
633
  dependencies=self._deps,
666
- pass_through_cols=self._get_pass_through_columns(dataset),
634
+ drop_input_cols = self._drop_input_cols,
667
635
  expected_output_cols_type="float",
668
636
  )
669
637
  elif isinstance(dataset, pd.DataFrame):
@@ -724,7 +692,7 @@ class MinCovDet(BaseTransformer):
724
692
  transform_kwargs = dict(
725
693
  session=dataset._session,
726
694
  dependencies=self._deps,
727
- pass_through_cols=self._get_pass_through_columns(dataset),
695
+ drop_input_cols = self._drop_input_cols,
728
696
  expected_output_cols_type="float",
729
697
  )
730
698
 
@@ -789,7 +757,7 @@ class MinCovDet(BaseTransformer):
789
757
  transform_kwargs = dict(
790
758
  session=dataset._session,
791
759
  dependencies=self._deps,
792
- pass_through_cols=self._get_pass_through_columns(dataset),
760
+ drop_input_cols = self._drop_input_cols,
793
761
  expected_output_cols_type="float",
794
762
  )
795
763
 
@@ -845,13 +813,17 @@ class MinCovDet(BaseTransformer):
845
813
  transform_kwargs: ScoreKwargsTypedDict = dict()
846
814
 
847
815
  if isinstance(dataset, DataFrame):
816
+ self._deps = self._batch_inference_validate_snowpark(
817
+ dataset=dataset,
818
+ inference_method="score",
819
+ )
848
820
  selected_cols = self._get_active_columns()
849
821
  if len(selected_cols) > 0:
850
822
  dataset = dataset.select(selected_cols)
851
823
  assert isinstance(dataset._session, Session) # keep mypy happy
852
824
  transform_kwargs = dict(
853
825
  session=dataset._session,
854
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
826
+ dependencies=["snowflake-snowpark-python"] + self._deps,
855
827
  score_sproc_imports=['sklearn'],
856
828
  )
857
829
  elif isinstance(dataset, pd.DataFrame):
@@ -925,9 +897,9 @@ class MinCovDet(BaseTransformer):
925
897
  transform_kwargs = dict(
926
898
  session = dataset._session,
927
899
  dependencies = self._deps,
928
- pass_through_cols = self._get_pass_through_columns(dataset),
929
- expected_output_cols_type = "array",
930
- n_neighbors = n_neighbors,
900
+ drop_input_cols = self._drop_input_cols,
901
+ expected_output_cols_type="array",
902
+ n_neighbors = n_neighbors,
931
903
  return_distance = return_distance
932
904
  )
933
905
  elif isinstance(dataset, pd.DataFrame):