snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -365,18 +365,24 @@ class ElasticNetCV(BaseTransformer):
365
365
  self._get_model_signatures(dataset)
366
366
  return self
367
367
 
368
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
369
- if self._drop_input_cols:
370
- return []
371
- else:
372
- return list(set(dataset.columns) - set(self.output_cols))
373
-
374
368
  def _batch_inference_validate_snowpark(
375
369
  self,
376
370
  dataset: DataFrame,
377
371
  inference_method: str,
378
372
  ) -> List[str]:
379
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
373
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
374
+ return the available package that exists in the snowflake anaconda channel
375
+
376
+ Args:
377
+ dataset: snowpark dataframe
378
+ inference_method: the inference method such as predict, score...
379
+
380
+ Raises:
381
+ SnowflakeMLException: If the estimator is not fitted, raise error
382
+ SnowflakeMLException: If the session is None, raise error
383
+
384
+ Returns:
385
+ A list of available package that exists in the snowflake anaconda channel
380
386
  """
381
387
  if not self._is_fitted:
382
388
  raise exceptions.SnowflakeMLException(
@@ -450,7 +456,7 @@ class ElasticNetCV(BaseTransformer):
450
456
  transform_kwargs = dict(
451
457
  session = dataset._session,
452
458
  dependencies = self._deps,
453
- pass_through_cols = self._get_pass_through_columns(dataset),
459
+ drop_input_cols = self._drop_input_cols,
454
460
  expected_output_cols_type = expected_type_inferred,
455
461
  )
456
462
 
@@ -510,16 +516,16 @@ class ElasticNetCV(BaseTransformer):
510
516
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
511
517
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
512
518
  # each row containing a list of values.
513
- expected_dtype = "ARRAY"
519
+ expected_dtype = "array"
514
520
 
515
521
  # If we were unable to assign a type to this transform in the factory, infer the type here.
516
522
  if expected_dtype == "":
517
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
523
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
518
524
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
519
- expected_dtype = "ARRAY"
520
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
525
+ expected_dtype = "array"
526
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
521
527
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
522
- expected_dtype = "ARRAY"
528
+ expected_dtype = "array"
523
529
  else:
524
530
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
525
531
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -537,7 +543,7 @@ class ElasticNetCV(BaseTransformer):
537
543
  transform_kwargs = dict(
538
544
  session = dataset._session,
539
545
  dependencies = self._deps,
540
- pass_through_cols = self._get_pass_through_columns(dataset),
546
+ drop_input_cols = self._drop_input_cols,
541
547
  expected_output_cols_type = expected_dtype,
542
548
  )
543
549
 
@@ -588,7 +594,7 @@ class ElasticNetCV(BaseTransformer):
588
594
  subproject=_SUBPROJECT,
589
595
  )
590
596
  output_result, fitted_estimator = model_trainer.train_fit_predict(
591
- pass_through_columns=self._get_pass_through_columns(dataset),
597
+ drop_input_cols=self._drop_input_cols,
592
598
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
593
599
  )
594
600
  self._sklearn_object = fitted_estimator
@@ -606,44 +612,6 @@ class ElasticNetCV(BaseTransformer):
606
612
  assert self._sklearn_object is not None
607
613
  return self._sklearn_object.embedding_
608
614
 
609
-
610
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
611
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
612
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
613
- """
614
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
615
- if output_cols:
616
- output_cols = [
617
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
618
- for c in output_cols
619
- ]
620
- elif getattr(self._sklearn_object, "classes_", None) is None:
621
- output_cols = [output_cols_prefix]
622
- elif self._sklearn_object is not None:
623
- classes = self._sklearn_object.classes_
624
- if isinstance(classes, numpy.ndarray):
625
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
626
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
627
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
628
- output_cols = []
629
- for i, cl in enumerate(classes):
630
- # For binary classification, there is only one output column for each class
631
- # ndarray as the two classes are complementary.
632
- if len(cl) == 2:
633
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
634
- else:
635
- output_cols.extend([
636
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
637
- ])
638
- else:
639
- output_cols = []
640
-
641
- # Make sure column names are valid snowflake identifiers.
642
- assert output_cols is not None # Make MyPy happy
643
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
644
-
645
- return rv
646
-
647
615
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
648
616
  @telemetry.send_api_usage_telemetry(
649
617
  project=_PROJECT,
@@ -683,7 +651,7 @@ class ElasticNetCV(BaseTransformer):
683
651
  transform_kwargs = dict(
684
652
  session=dataset._session,
685
653
  dependencies=self._deps,
686
- pass_through_cols=self._get_pass_through_columns(dataset),
654
+ drop_input_cols = self._drop_input_cols,
687
655
  expected_output_cols_type="float",
688
656
  )
689
657
 
@@ -748,7 +716,7 @@ class ElasticNetCV(BaseTransformer):
748
716
  transform_kwargs = dict(
749
717
  session=dataset._session,
750
718
  dependencies=self._deps,
751
- pass_through_cols=self._get_pass_through_columns(dataset),
719
+ drop_input_cols = self._drop_input_cols,
752
720
  expected_output_cols_type="float",
753
721
  )
754
722
  elif isinstance(dataset, pd.DataFrame):
@@ -809,7 +777,7 @@ class ElasticNetCV(BaseTransformer):
809
777
  transform_kwargs = dict(
810
778
  session=dataset._session,
811
779
  dependencies=self._deps,
812
- pass_through_cols=self._get_pass_through_columns(dataset),
780
+ drop_input_cols = self._drop_input_cols,
813
781
  expected_output_cols_type="float",
814
782
  )
815
783
 
@@ -874,7 +842,7 @@ class ElasticNetCV(BaseTransformer):
874
842
  transform_kwargs = dict(
875
843
  session=dataset._session,
876
844
  dependencies=self._deps,
877
- pass_through_cols=self._get_pass_through_columns(dataset),
845
+ drop_input_cols = self._drop_input_cols,
878
846
  expected_output_cols_type="float",
879
847
  )
880
848
 
@@ -930,13 +898,17 @@ class ElasticNetCV(BaseTransformer):
930
898
  transform_kwargs: ScoreKwargsTypedDict = dict()
931
899
 
932
900
  if isinstance(dataset, DataFrame):
901
+ self._deps = self._batch_inference_validate_snowpark(
902
+ dataset=dataset,
903
+ inference_method="score",
904
+ )
933
905
  selected_cols = self._get_active_columns()
934
906
  if len(selected_cols) > 0:
935
907
  dataset = dataset.select(selected_cols)
936
908
  assert isinstance(dataset._session, Session) # keep mypy happy
937
909
  transform_kwargs = dict(
938
910
  session=dataset._session,
939
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
911
+ dependencies=["snowflake-snowpark-python"] + self._deps,
940
912
  score_sproc_imports=['sklearn'],
941
913
  )
942
914
  elif isinstance(dataset, pd.DataFrame):
@@ -1010,9 +982,9 @@ class ElasticNetCV(BaseTransformer):
1010
982
  transform_kwargs = dict(
1011
983
  session = dataset._session,
1012
984
  dependencies = self._deps,
1013
- pass_through_cols = self._get_pass_through_columns(dataset),
1014
- expected_output_cols_type = "array",
1015
- n_neighbors = n_neighbors,
985
+ drop_input_cols = self._drop_input_cols,
986
+ expected_output_cols_type="array",
987
+ n_neighbors = n_neighbors,
1016
988
  return_distance = return_distance
1017
989
  )
1018
990
  elif isinstance(dataset, pd.DataFrame):
@@ -310,18 +310,24 @@ class GammaRegressor(BaseTransformer):
310
310
  self._get_model_signatures(dataset)
311
311
  return self
312
312
 
313
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
314
- if self._drop_input_cols:
315
- return []
316
- else:
317
- return list(set(dataset.columns) - set(self.output_cols))
318
-
319
313
  def _batch_inference_validate_snowpark(
320
314
  self,
321
315
  dataset: DataFrame,
322
316
  inference_method: str,
323
317
  ) -> List[str]:
324
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
318
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
319
+ return the available package that exists in the snowflake anaconda channel
320
+
321
+ Args:
322
+ dataset: snowpark dataframe
323
+ inference_method: the inference method such as predict, score...
324
+
325
+ Raises:
326
+ SnowflakeMLException: If the estimator is not fitted, raise error
327
+ SnowflakeMLException: If the session is None, raise error
328
+
329
+ Returns:
330
+ A list of available package that exists in the snowflake anaconda channel
325
331
  """
326
332
  if not self._is_fitted:
327
333
  raise exceptions.SnowflakeMLException(
@@ -395,7 +401,7 @@ class GammaRegressor(BaseTransformer):
395
401
  transform_kwargs = dict(
396
402
  session = dataset._session,
397
403
  dependencies = self._deps,
398
- pass_through_cols = self._get_pass_through_columns(dataset),
404
+ drop_input_cols = self._drop_input_cols,
399
405
  expected_output_cols_type = expected_type_inferred,
400
406
  )
401
407
 
@@ -455,16 +461,16 @@ class GammaRegressor(BaseTransformer):
455
461
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
456
462
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
457
463
  # each row containing a list of values.
458
- expected_dtype = "ARRAY"
464
+ expected_dtype = "array"
459
465
 
460
466
  # If we were unable to assign a type to this transform in the factory, infer the type here.
461
467
  if expected_dtype == "":
462
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
468
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
463
469
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
464
- expected_dtype = "ARRAY"
465
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
470
+ expected_dtype = "array"
471
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
466
472
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
467
- expected_dtype = "ARRAY"
473
+ expected_dtype = "array"
468
474
  else:
469
475
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
470
476
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -482,7 +488,7 @@ class GammaRegressor(BaseTransformer):
482
488
  transform_kwargs = dict(
483
489
  session = dataset._session,
484
490
  dependencies = self._deps,
485
- pass_through_cols = self._get_pass_through_columns(dataset),
491
+ drop_input_cols = self._drop_input_cols,
486
492
  expected_output_cols_type = expected_dtype,
487
493
  )
488
494
 
@@ -533,7 +539,7 @@ class GammaRegressor(BaseTransformer):
533
539
  subproject=_SUBPROJECT,
534
540
  )
535
541
  output_result, fitted_estimator = model_trainer.train_fit_predict(
536
- pass_through_columns=self._get_pass_through_columns(dataset),
542
+ drop_input_cols=self._drop_input_cols,
537
543
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
538
544
  )
539
545
  self._sklearn_object = fitted_estimator
@@ -551,44 +557,6 @@ class GammaRegressor(BaseTransformer):
551
557
  assert self._sklearn_object is not None
552
558
  return self._sklearn_object.embedding_
553
559
 
554
-
555
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
556
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
557
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
558
- """
559
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
560
- if output_cols:
561
- output_cols = [
562
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
563
- for c in output_cols
564
- ]
565
- elif getattr(self._sklearn_object, "classes_", None) is None:
566
- output_cols = [output_cols_prefix]
567
- elif self._sklearn_object is not None:
568
- classes = self._sklearn_object.classes_
569
- if isinstance(classes, numpy.ndarray):
570
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
571
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
572
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
573
- output_cols = []
574
- for i, cl in enumerate(classes):
575
- # For binary classification, there is only one output column for each class
576
- # ndarray as the two classes are complementary.
577
- if len(cl) == 2:
578
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
579
- else:
580
- output_cols.extend([
581
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
582
- ])
583
- else:
584
- output_cols = []
585
-
586
- # Make sure column names are valid snowflake identifiers.
587
- assert output_cols is not None # Make MyPy happy
588
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
589
-
590
- return rv
591
-
592
560
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
593
561
  @telemetry.send_api_usage_telemetry(
594
562
  project=_PROJECT,
@@ -628,7 +596,7 @@ class GammaRegressor(BaseTransformer):
628
596
  transform_kwargs = dict(
629
597
  session=dataset._session,
630
598
  dependencies=self._deps,
631
- pass_through_cols=self._get_pass_through_columns(dataset),
599
+ drop_input_cols = self._drop_input_cols,
632
600
  expected_output_cols_type="float",
633
601
  )
634
602
 
@@ -693,7 +661,7 @@ class GammaRegressor(BaseTransformer):
693
661
  transform_kwargs = dict(
694
662
  session=dataset._session,
695
663
  dependencies=self._deps,
696
- pass_through_cols=self._get_pass_through_columns(dataset),
664
+ drop_input_cols = self._drop_input_cols,
697
665
  expected_output_cols_type="float",
698
666
  )
699
667
  elif isinstance(dataset, pd.DataFrame):
@@ -754,7 +722,7 @@ class GammaRegressor(BaseTransformer):
754
722
  transform_kwargs = dict(
755
723
  session=dataset._session,
756
724
  dependencies=self._deps,
757
- pass_through_cols=self._get_pass_through_columns(dataset),
725
+ drop_input_cols = self._drop_input_cols,
758
726
  expected_output_cols_type="float",
759
727
  )
760
728
 
@@ -819,7 +787,7 @@ class GammaRegressor(BaseTransformer):
819
787
  transform_kwargs = dict(
820
788
  session=dataset._session,
821
789
  dependencies=self._deps,
822
- pass_through_cols=self._get_pass_through_columns(dataset),
790
+ drop_input_cols = self._drop_input_cols,
823
791
  expected_output_cols_type="float",
824
792
  )
825
793
 
@@ -875,13 +843,17 @@ class GammaRegressor(BaseTransformer):
875
843
  transform_kwargs: ScoreKwargsTypedDict = dict()
876
844
 
877
845
  if isinstance(dataset, DataFrame):
846
+ self._deps = self._batch_inference_validate_snowpark(
847
+ dataset=dataset,
848
+ inference_method="score",
849
+ )
878
850
  selected_cols = self._get_active_columns()
879
851
  if len(selected_cols) > 0:
880
852
  dataset = dataset.select(selected_cols)
881
853
  assert isinstance(dataset._session, Session) # keep mypy happy
882
854
  transform_kwargs = dict(
883
855
  session=dataset._session,
884
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
856
+ dependencies=["snowflake-snowpark-python"] + self._deps,
885
857
  score_sproc_imports=['sklearn'],
886
858
  )
887
859
  elif isinstance(dataset, pd.DataFrame):
@@ -955,9 +927,9 @@ class GammaRegressor(BaseTransformer):
955
927
  transform_kwargs = dict(
956
928
  session = dataset._session,
957
929
  dependencies = self._deps,
958
- pass_through_cols = self._get_pass_through_columns(dataset),
959
- expected_output_cols_type = "array",
960
- n_neighbors = n_neighbors,
930
+ drop_input_cols = self._drop_input_cols,
931
+ expected_output_cols_type="array",
932
+ n_neighbors = n_neighbors,
961
933
  return_distance = return_distance
962
934
  )
963
935
  elif isinstance(dataset, pd.DataFrame):
@@ -293,18 +293,24 @@ class HuberRegressor(BaseTransformer):
293
293
  self._get_model_signatures(dataset)
294
294
  return self
295
295
 
296
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
297
- if self._drop_input_cols:
298
- return []
299
- else:
300
- return list(set(dataset.columns) - set(self.output_cols))
301
-
302
296
  def _batch_inference_validate_snowpark(
303
297
  self,
304
298
  dataset: DataFrame,
305
299
  inference_method: str,
306
300
  ) -> List[str]:
307
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
301
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
302
+ return the available package that exists in the snowflake anaconda channel
303
+
304
+ Args:
305
+ dataset: snowpark dataframe
306
+ inference_method: the inference method such as predict, score...
307
+
308
+ Raises:
309
+ SnowflakeMLException: If the estimator is not fitted, raise error
310
+ SnowflakeMLException: If the session is None, raise error
311
+
312
+ Returns:
313
+ A list of available package that exists in the snowflake anaconda channel
308
314
  """
309
315
  if not self._is_fitted:
310
316
  raise exceptions.SnowflakeMLException(
@@ -378,7 +384,7 @@ class HuberRegressor(BaseTransformer):
378
384
  transform_kwargs = dict(
379
385
  session = dataset._session,
380
386
  dependencies = self._deps,
381
- pass_through_cols = self._get_pass_through_columns(dataset),
387
+ drop_input_cols = self._drop_input_cols,
382
388
  expected_output_cols_type = expected_type_inferred,
383
389
  )
384
390
 
@@ -438,16 +444,16 @@ class HuberRegressor(BaseTransformer):
438
444
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
439
445
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
440
446
  # each row containing a list of values.
441
- expected_dtype = "ARRAY"
447
+ expected_dtype = "array"
442
448
 
443
449
  # If we were unable to assign a type to this transform in the factory, infer the type here.
444
450
  if expected_dtype == "":
445
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
451
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
446
452
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
447
- expected_dtype = "ARRAY"
448
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
453
+ expected_dtype = "array"
454
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
449
455
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
450
- expected_dtype = "ARRAY"
456
+ expected_dtype = "array"
451
457
  else:
452
458
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
453
459
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -465,7 +471,7 @@ class HuberRegressor(BaseTransformer):
465
471
  transform_kwargs = dict(
466
472
  session = dataset._session,
467
473
  dependencies = self._deps,
468
- pass_through_cols = self._get_pass_through_columns(dataset),
474
+ drop_input_cols = self._drop_input_cols,
469
475
  expected_output_cols_type = expected_dtype,
470
476
  )
471
477
 
@@ -516,7 +522,7 @@ class HuberRegressor(BaseTransformer):
516
522
  subproject=_SUBPROJECT,
517
523
  )
518
524
  output_result, fitted_estimator = model_trainer.train_fit_predict(
519
- pass_through_columns=self._get_pass_through_columns(dataset),
525
+ drop_input_cols=self._drop_input_cols,
520
526
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
521
527
  )
522
528
  self._sklearn_object = fitted_estimator
@@ -534,44 +540,6 @@ class HuberRegressor(BaseTransformer):
534
540
  assert self._sklearn_object is not None
535
541
  return self._sklearn_object.embedding_
536
542
 
537
-
538
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
539
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
540
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
541
- """
542
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
543
- if output_cols:
544
- output_cols = [
545
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
546
- for c in output_cols
547
- ]
548
- elif getattr(self._sklearn_object, "classes_", None) is None:
549
- output_cols = [output_cols_prefix]
550
- elif self._sklearn_object is not None:
551
- classes = self._sklearn_object.classes_
552
- if isinstance(classes, numpy.ndarray):
553
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
554
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
555
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
556
- output_cols = []
557
- for i, cl in enumerate(classes):
558
- # For binary classification, there is only one output column for each class
559
- # ndarray as the two classes are complementary.
560
- if len(cl) == 2:
561
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
562
- else:
563
- output_cols.extend([
564
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
565
- ])
566
- else:
567
- output_cols = []
568
-
569
- # Make sure column names are valid snowflake identifiers.
570
- assert output_cols is not None # Make MyPy happy
571
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
572
-
573
- return rv
574
-
575
543
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
576
544
  @telemetry.send_api_usage_telemetry(
577
545
  project=_PROJECT,
@@ -611,7 +579,7 @@ class HuberRegressor(BaseTransformer):
611
579
  transform_kwargs = dict(
612
580
  session=dataset._session,
613
581
  dependencies=self._deps,
614
- pass_through_cols=self._get_pass_through_columns(dataset),
582
+ drop_input_cols = self._drop_input_cols,
615
583
  expected_output_cols_type="float",
616
584
  )
617
585
 
@@ -676,7 +644,7 @@ class HuberRegressor(BaseTransformer):
676
644
  transform_kwargs = dict(
677
645
  session=dataset._session,
678
646
  dependencies=self._deps,
679
- pass_through_cols=self._get_pass_through_columns(dataset),
647
+ drop_input_cols = self._drop_input_cols,
680
648
  expected_output_cols_type="float",
681
649
  )
682
650
  elif isinstance(dataset, pd.DataFrame):
@@ -737,7 +705,7 @@ class HuberRegressor(BaseTransformer):
737
705
  transform_kwargs = dict(
738
706
  session=dataset._session,
739
707
  dependencies=self._deps,
740
- pass_through_cols=self._get_pass_through_columns(dataset),
708
+ drop_input_cols = self._drop_input_cols,
741
709
  expected_output_cols_type="float",
742
710
  )
743
711
 
@@ -802,7 +770,7 @@ class HuberRegressor(BaseTransformer):
802
770
  transform_kwargs = dict(
803
771
  session=dataset._session,
804
772
  dependencies=self._deps,
805
- pass_through_cols=self._get_pass_through_columns(dataset),
773
+ drop_input_cols = self._drop_input_cols,
806
774
  expected_output_cols_type="float",
807
775
  )
808
776
 
@@ -858,13 +826,17 @@ class HuberRegressor(BaseTransformer):
858
826
  transform_kwargs: ScoreKwargsTypedDict = dict()
859
827
 
860
828
  if isinstance(dataset, DataFrame):
829
+ self._deps = self._batch_inference_validate_snowpark(
830
+ dataset=dataset,
831
+ inference_method="score",
832
+ )
861
833
  selected_cols = self._get_active_columns()
862
834
  if len(selected_cols) > 0:
863
835
  dataset = dataset.select(selected_cols)
864
836
  assert isinstance(dataset._session, Session) # keep mypy happy
865
837
  transform_kwargs = dict(
866
838
  session=dataset._session,
867
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
839
+ dependencies=["snowflake-snowpark-python"] + self._deps,
868
840
  score_sproc_imports=['sklearn'],
869
841
  )
870
842
  elif isinstance(dataset, pd.DataFrame):
@@ -938,9 +910,9 @@ class HuberRegressor(BaseTransformer):
938
910
  transform_kwargs = dict(
939
911
  session = dataset._session,
940
912
  dependencies = self._deps,
941
- pass_through_cols = self._get_pass_through_columns(dataset),
942
- expected_output_cols_type = "array",
943
- n_neighbors = n_neighbors,
913
+ drop_input_cols = self._drop_input_cols,
914
+ expected_output_cols_type="array",
915
+ n_neighbors = n_neighbors,
944
916
  return_distance = return_distance
945
917
  )
946
918
  elif isinstance(dataset, pd.DataFrame):