snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -301,18 +301,24 @@ class SpectralCoclustering(BaseTransformer):
301
301
  self._get_model_signatures(dataset)
302
302
  return self
303
303
 
304
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
305
- if self._drop_input_cols:
306
- return []
307
- else:
308
- return list(set(dataset.columns) - set(self.output_cols))
309
-
310
304
  def _batch_inference_validate_snowpark(
311
305
  self,
312
306
  dataset: DataFrame,
313
307
  inference_method: str,
314
308
  ) -> List[str]:
315
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
309
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
310
+ return the available package that exists in the snowflake anaconda channel
311
+
312
+ Args:
313
+ dataset: snowpark dataframe
314
+ inference_method: the inference method such as predict, score...
315
+
316
+ Raises:
317
+ SnowflakeMLException: If the estimator is not fitted, raise error
318
+ SnowflakeMLException: If the session is None, raise error
319
+
320
+ Returns:
321
+ A list of available package that exists in the snowflake anaconda channel
316
322
  """
317
323
  if not self._is_fitted:
318
324
  raise exceptions.SnowflakeMLException(
@@ -384,7 +390,7 @@ class SpectralCoclustering(BaseTransformer):
384
390
  transform_kwargs = dict(
385
391
  session = dataset._session,
386
392
  dependencies = self._deps,
387
- pass_through_cols = self._get_pass_through_columns(dataset),
393
+ drop_input_cols = self._drop_input_cols,
388
394
  expected_output_cols_type = expected_type_inferred,
389
395
  )
390
396
 
@@ -444,16 +450,16 @@ class SpectralCoclustering(BaseTransformer):
444
450
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
445
451
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
446
452
  # each row containing a list of values.
447
- expected_dtype = "ARRAY"
453
+ expected_dtype = "array"
448
454
 
449
455
  # If we were unable to assign a type to this transform in the factory, infer the type here.
450
456
  if expected_dtype == "":
451
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
457
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
452
458
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
453
- expected_dtype = "ARRAY"
454
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
459
+ expected_dtype = "array"
460
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
455
461
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
456
- expected_dtype = "ARRAY"
462
+ expected_dtype = "array"
457
463
  else:
458
464
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
459
465
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -471,7 +477,7 @@ class SpectralCoclustering(BaseTransformer):
471
477
  transform_kwargs = dict(
472
478
  session = dataset._session,
473
479
  dependencies = self._deps,
474
- pass_through_cols = self._get_pass_through_columns(dataset),
480
+ drop_input_cols = self._drop_input_cols,
475
481
  expected_output_cols_type = expected_dtype,
476
482
  )
477
483
 
@@ -522,7 +528,7 @@ class SpectralCoclustering(BaseTransformer):
522
528
  subproject=_SUBPROJECT,
523
529
  )
524
530
  output_result, fitted_estimator = model_trainer.train_fit_predict(
525
- pass_through_columns=self._get_pass_through_columns(dataset),
531
+ drop_input_cols=self._drop_input_cols,
526
532
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
527
533
  )
528
534
  self._sklearn_object = fitted_estimator
@@ -540,44 +546,6 @@ class SpectralCoclustering(BaseTransformer):
540
546
  assert self._sklearn_object is not None
541
547
  return self._sklearn_object.embedding_
542
548
 
543
-
544
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
545
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
546
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
547
- """
548
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
549
- if output_cols:
550
- output_cols = [
551
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
552
- for c in output_cols
553
- ]
554
- elif getattr(self._sklearn_object, "classes_", None) is None:
555
- output_cols = [output_cols_prefix]
556
- elif self._sklearn_object is not None:
557
- classes = self._sklearn_object.classes_
558
- if isinstance(classes, numpy.ndarray):
559
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
560
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
561
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
562
- output_cols = []
563
- for i, cl in enumerate(classes):
564
- # For binary classification, there is only one output column for each class
565
- # ndarray as the two classes are complementary.
566
- if len(cl) == 2:
567
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
568
- else:
569
- output_cols.extend([
570
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
571
- ])
572
- else:
573
- output_cols = []
574
-
575
- # Make sure column names are valid snowflake identifiers.
576
- assert output_cols is not None # Make MyPy happy
577
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
578
-
579
- return rv
580
-
581
549
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
582
550
  @telemetry.send_api_usage_telemetry(
583
551
  project=_PROJECT,
@@ -617,7 +585,7 @@ class SpectralCoclustering(BaseTransformer):
617
585
  transform_kwargs = dict(
618
586
  session=dataset._session,
619
587
  dependencies=self._deps,
620
- pass_through_cols=self._get_pass_through_columns(dataset),
588
+ drop_input_cols = self._drop_input_cols,
621
589
  expected_output_cols_type="float",
622
590
  )
623
591
 
@@ -682,7 +650,7 @@ class SpectralCoclustering(BaseTransformer):
682
650
  transform_kwargs = dict(
683
651
  session=dataset._session,
684
652
  dependencies=self._deps,
685
- pass_through_cols=self._get_pass_through_columns(dataset),
653
+ drop_input_cols = self._drop_input_cols,
686
654
  expected_output_cols_type="float",
687
655
  )
688
656
  elif isinstance(dataset, pd.DataFrame):
@@ -743,7 +711,7 @@ class SpectralCoclustering(BaseTransformer):
743
711
  transform_kwargs = dict(
744
712
  session=dataset._session,
745
713
  dependencies=self._deps,
746
- pass_through_cols=self._get_pass_through_columns(dataset),
714
+ drop_input_cols = self._drop_input_cols,
747
715
  expected_output_cols_type="float",
748
716
  )
749
717
 
@@ -808,7 +776,7 @@ class SpectralCoclustering(BaseTransformer):
808
776
  transform_kwargs = dict(
809
777
  session=dataset._session,
810
778
  dependencies=self._deps,
811
- pass_through_cols=self._get_pass_through_columns(dataset),
779
+ drop_input_cols = self._drop_input_cols,
812
780
  expected_output_cols_type="float",
813
781
  )
814
782
 
@@ -862,13 +830,17 @@ class SpectralCoclustering(BaseTransformer):
862
830
  transform_kwargs: ScoreKwargsTypedDict = dict()
863
831
 
864
832
  if isinstance(dataset, DataFrame):
833
+ self._deps = self._batch_inference_validate_snowpark(
834
+ dataset=dataset,
835
+ inference_method="score",
836
+ )
865
837
  selected_cols = self._get_active_columns()
866
838
  if len(selected_cols) > 0:
867
839
  dataset = dataset.select(selected_cols)
868
840
  assert isinstance(dataset._session, Session) # keep mypy happy
869
841
  transform_kwargs = dict(
870
842
  session=dataset._session,
871
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
843
+ dependencies=["snowflake-snowpark-python"] + self._deps,
872
844
  score_sproc_imports=['sklearn'],
873
845
  )
874
846
  elif isinstance(dataset, pd.DataFrame):
@@ -942,9 +914,9 @@ class SpectralCoclustering(BaseTransformer):
942
914
  transform_kwargs = dict(
943
915
  session = dataset._session,
944
916
  dependencies = self._deps,
945
- pass_through_cols = self._get_pass_through_columns(dataset),
946
- expected_output_cols_type = "array",
947
- n_neighbors = n_neighbors,
917
+ drop_input_cols = self._drop_input_cols,
918
+ expected_output_cols_type="array",
919
+ n_neighbors = n_neighbors,
948
920
  return_distance = return_distance
949
921
  )
950
922
  elif isinstance(dataset, pd.DataFrame):
@@ -331,18 +331,24 @@ class ColumnTransformer(BaseTransformer):
331
331
  self._get_model_signatures(dataset)
332
332
  return self
333
333
 
334
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
335
- if self._drop_input_cols:
336
- return []
337
- else:
338
- return list(set(dataset.columns) - set(self.output_cols))
339
-
340
334
  def _batch_inference_validate_snowpark(
341
335
  self,
342
336
  dataset: DataFrame,
343
337
  inference_method: str,
344
338
  ) -> List[str]:
345
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
339
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
340
+ return the available package that exists in the snowflake anaconda channel
341
+
342
+ Args:
343
+ dataset: snowpark dataframe
344
+ inference_method: the inference method such as predict, score...
345
+
346
+ Raises:
347
+ SnowflakeMLException: If the estimator is not fitted, raise error
348
+ SnowflakeMLException: If the session is None, raise error
349
+
350
+ Returns:
351
+ A list of available package that exists in the snowflake anaconda channel
346
352
  """
347
353
  if not self._is_fitted:
348
354
  raise exceptions.SnowflakeMLException(
@@ -414,7 +420,7 @@ class ColumnTransformer(BaseTransformer):
414
420
  transform_kwargs = dict(
415
421
  session = dataset._session,
416
422
  dependencies = self._deps,
417
- pass_through_cols = self._get_pass_through_columns(dataset),
423
+ drop_input_cols = self._drop_input_cols,
418
424
  expected_output_cols_type = expected_type_inferred,
419
425
  )
420
426
 
@@ -476,16 +482,16 @@ class ColumnTransformer(BaseTransformer):
476
482
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
477
483
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
478
484
  # each row containing a list of values.
479
- expected_dtype = "ARRAY"
485
+ expected_dtype = "array"
480
486
 
481
487
  # If we were unable to assign a type to this transform in the factory, infer the type here.
482
488
  if expected_dtype == "":
483
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
489
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
484
490
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
485
- expected_dtype = "ARRAY"
486
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
491
+ expected_dtype = "array"
492
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
487
493
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
488
- expected_dtype = "ARRAY"
494
+ expected_dtype = "array"
489
495
  else:
490
496
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
491
497
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -503,7 +509,7 @@ class ColumnTransformer(BaseTransformer):
503
509
  transform_kwargs = dict(
504
510
  session = dataset._session,
505
511
  dependencies = self._deps,
506
- pass_through_cols = self._get_pass_through_columns(dataset),
512
+ drop_input_cols = self._drop_input_cols,
507
513
  expected_output_cols_type = expected_dtype,
508
514
  )
509
515
 
@@ -554,7 +560,7 @@ class ColumnTransformer(BaseTransformer):
554
560
  subproject=_SUBPROJECT,
555
561
  )
556
562
  output_result, fitted_estimator = model_trainer.train_fit_predict(
557
- pass_through_columns=self._get_pass_through_columns(dataset),
563
+ drop_input_cols=self._drop_input_cols,
558
564
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
559
565
  )
560
566
  self._sklearn_object = fitted_estimator
@@ -572,44 +578,6 @@ class ColumnTransformer(BaseTransformer):
572
578
  assert self._sklearn_object is not None
573
579
  return self._sklearn_object.embedding_
574
580
 
575
-
576
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
577
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
578
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
579
- """
580
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
581
- if output_cols:
582
- output_cols = [
583
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
584
- for c in output_cols
585
- ]
586
- elif getattr(self._sklearn_object, "classes_", None) is None:
587
- output_cols = [output_cols_prefix]
588
- elif self._sklearn_object is not None:
589
- classes = self._sklearn_object.classes_
590
- if isinstance(classes, numpy.ndarray):
591
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
592
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
593
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
594
- output_cols = []
595
- for i, cl in enumerate(classes):
596
- # For binary classification, there is only one output column for each class
597
- # ndarray as the two classes are complementary.
598
- if len(cl) == 2:
599
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
600
- else:
601
- output_cols.extend([
602
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
603
- ])
604
- else:
605
- output_cols = []
606
-
607
- # Make sure column names are valid snowflake identifiers.
608
- assert output_cols is not None # Make MyPy happy
609
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
610
-
611
- return rv
612
-
613
581
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
614
582
  @telemetry.send_api_usage_telemetry(
615
583
  project=_PROJECT,
@@ -649,7 +617,7 @@ class ColumnTransformer(BaseTransformer):
649
617
  transform_kwargs = dict(
650
618
  session=dataset._session,
651
619
  dependencies=self._deps,
652
- pass_through_cols=self._get_pass_through_columns(dataset),
620
+ drop_input_cols = self._drop_input_cols,
653
621
  expected_output_cols_type="float",
654
622
  )
655
623
 
@@ -714,7 +682,7 @@ class ColumnTransformer(BaseTransformer):
714
682
  transform_kwargs = dict(
715
683
  session=dataset._session,
716
684
  dependencies=self._deps,
717
- pass_through_cols=self._get_pass_through_columns(dataset),
685
+ drop_input_cols = self._drop_input_cols,
718
686
  expected_output_cols_type="float",
719
687
  )
720
688
  elif isinstance(dataset, pd.DataFrame):
@@ -775,7 +743,7 @@ class ColumnTransformer(BaseTransformer):
775
743
  transform_kwargs = dict(
776
744
  session=dataset._session,
777
745
  dependencies=self._deps,
778
- pass_through_cols=self._get_pass_through_columns(dataset),
746
+ drop_input_cols = self._drop_input_cols,
779
747
  expected_output_cols_type="float",
780
748
  )
781
749
 
@@ -840,7 +808,7 @@ class ColumnTransformer(BaseTransformer):
840
808
  transform_kwargs = dict(
841
809
  session=dataset._session,
842
810
  dependencies=self._deps,
843
- pass_through_cols=self._get_pass_through_columns(dataset),
811
+ drop_input_cols = self._drop_input_cols,
844
812
  expected_output_cols_type="float",
845
813
  )
846
814
 
@@ -894,13 +862,17 @@ class ColumnTransformer(BaseTransformer):
894
862
  transform_kwargs: ScoreKwargsTypedDict = dict()
895
863
 
896
864
  if isinstance(dataset, DataFrame):
865
+ self._deps = self._batch_inference_validate_snowpark(
866
+ dataset=dataset,
867
+ inference_method="score",
868
+ )
897
869
  selected_cols = self._get_active_columns()
898
870
  if len(selected_cols) > 0:
899
871
  dataset = dataset.select(selected_cols)
900
872
  assert isinstance(dataset._session, Session) # keep mypy happy
901
873
  transform_kwargs = dict(
902
874
  session=dataset._session,
903
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
875
+ dependencies=["snowflake-snowpark-python"] + self._deps,
904
876
  score_sproc_imports=['sklearn'],
905
877
  )
906
878
  elif isinstance(dataset, pd.DataFrame):
@@ -974,9 +946,9 @@ class ColumnTransformer(BaseTransformer):
974
946
  transform_kwargs = dict(
975
947
  session = dataset._session,
976
948
  dependencies = self._deps,
977
- pass_through_cols = self._get_pass_through_columns(dataset),
978
- expected_output_cols_type = "array",
979
- n_neighbors = n_neighbors,
949
+ drop_input_cols = self._drop_input_cols,
950
+ expected_output_cols_type="array",
951
+ n_neighbors = n_neighbors,
980
952
  return_distance = return_distance
981
953
  )
982
954
  elif isinstance(dataset, pd.DataFrame):
@@ -292,18 +292,24 @@ class TransformedTargetRegressor(BaseTransformer):
292
292
  self._get_model_signatures(dataset)
293
293
  return self
294
294
 
295
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
296
- if self._drop_input_cols:
297
- return []
298
- else:
299
- return list(set(dataset.columns) - set(self.output_cols))
300
-
301
295
  def _batch_inference_validate_snowpark(
302
296
  self,
303
297
  dataset: DataFrame,
304
298
  inference_method: str,
305
299
  ) -> List[str]:
306
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
300
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
301
+ return the available package that exists in the snowflake anaconda channel
302
+
303
+ Args:
304
+ dataset: snowpark dataframe
305
+ inference_method: the inference method such as predict, score...
306
+
307
+ Raises:
308
+ SnowflakeMLException: If the estimator is not fitted, raise error
309
+ SnowflakeMLException: If the session is None, raise error
310
+
311
+ Returns:
312
+ A list of available package that exists in the snowflake anaconda channel
307
313
  """
308
314
  if not self._is_fitted:
309
315
  raise exceptions.SnowflakeMLException(
@@ -377,7 +383,7 @@ class TransformedTargetRegressor(BaseTransformer):
377
383
  transform_kwargs = dict(
378
384
  session = dataset._session,
379
385
  dependencies = self._deps,
380
- pass_through_cols = self._get_pass_through_columns(dataset),
386
+ drop_input_cols = self._drop_input_cols,
381
387
  expected_output_cols_type = expected_type_inferred,
382
388
  )
383
389
 
@@ -437,16 +443,16 @@ class TransformedTargetRegressor(BaseTransformer):
437
443
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
438
444
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
439
445
  # each row containing a list of values.
440
- expected_dtype = "ARRAY"
446
+ expected_dtype = "array"
441
447
 
442
448
  # If we were unable to assign a type to this transform in the factory, infer the type here.
443
449
  if expected_dtype == "":
444
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
450
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
445
451
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
446
- expected_dtype = "ARRAY"
447
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
452
+ expected_dtype = "array"
453
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
448
454
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
449
- expected_dtype = "ARRAY"
455
+ expected_dtype = "array"
450
456
  else:
451
457
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
452
458
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -464,7 +470,7 @@ class TransformedTargetRegressor(BaseTransformer):
464
470
  transform_kwargs = dict(
465
471
  session = dataset._session,
466
472
  dependencies = self._deps,
467
- pass_through_cols = self._get_pass_through_columns(dataset),
473
+ drop_input_cols = self._drop_input_cols,
468
474
  expected_output_cols_type = expected_dtype,
469
475
  )
470
476
 
@@ -515,7 +521,7 @@ class TransformedTargetRegressor(BaseTransformer):
515
521
  subproject=_SUBPROJECT,
516
522
  )
517
523
  output_result, fitted_estimator = model_trainer.train_fit_predict(
518
- pass_through_columns=self._get_pass_through_columns(dataset),
524
+ drop_input_cols=self._drop_input_cols,
519
525
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
520
526
  )
521
527
  self._sklearn_object = fitted_estimator
@@ -533,44 +539,6 @@ class TransformedTargetRegressor(BaseTransformer):
533
539
  assert self._sklearn_object is not None
534
540
  return self._sklearn_object.embedding_
535
541
 
536
-
537
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
538
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
539
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
540
- """
541
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
542
- if output_cols:
543
- output_cols = [
544
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
545
- for c in output_cols
546
- ]
547
- elif getattr(self._sklearn_object, "classes_", None) is None:
548
- output_cols = [output_cols_prefix]
549
- elif self._sklearn_object is not None:
550
- classes = self._sklearn_object.classes_
551
- if isinstance(classes, numpy.ndarray):
552
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
553
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
554
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
555
- output_cols = []
556
- for i, cl in enumerate(classes):
557
- # For binary classification, there is only one output column for each class
558
- # ndarray as the two classes are complementary.
559
- if len(cl) == 2:
560
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
561
- else:
562
- output_cols.extend([
563
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
564
- ])
565
- else:
566
- output_cols = []
567
-
568
- # Make sure column names are valid snowflake identifiers.
569
- assert output_cols is not None # Make MyPy happy
570
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
571
-
572
- return rv
573
-
574
542
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
575
543
  @telemetry.send_api_usage_telemetry(
576
544
  project=_PROJECT,
@@ -610,7 +578,7 @@ class TransformedTargetRegressor(BaseTransformer):
610
578
  transform_kwargs = dict(
611
579
  session=dataset._session,
612
580
  dependencies=self._deps,
613
- pass_through_cols=self._get_pass_through_columns(dataset),
581
+ drop_input_cols = self._drop_input_cols,
614
582
  expected_output_cols_type="float",
615
583
  )
616
584
 
@@ -675,7 +643,7 @@ class TransformedTargetRegressor(BaseTransformer):
675
643
  transform_kwargs = dict(
676
644
  session=dataset._session,
677
645
  dependencies=self._deps,
678
- pass_through_cols=self._get_pass_through_columns(dataset),
646
+ drop_input_cols = self._drop_input_cols,
679
647
  expected_output_cols_type="float",
680
648
  )
681
649
  elif isinstance(dataset, pd.DataFrame):
@@ -736,7 +704,7 @@ class TransformedTargetRegressor(BaseTransformer):
736
704
  transform_kwargs = dict(
737
705
  session=dataset._session,
738
706
  dependencies=self._deps,
739
- pass_through_cols=self._get_pass_through_columns(dataset),
707
+ drop_input_cols = self._drop_input_cols,
740
708
  expected_output_cols_type="float",
741
709
  )
742
710
 
@@ -801,7 +769,7 @@ class TransformedTargetRegressor(BaseTransformer):
801
769
  transform_kwargs = dict(
802
770
  session=dataset._session,
803
771
  dependencies=self._deps,
804
- pass_through_cols=self._get_pass_through_columns(dataset),
772
+ drop_input_cols = self._drop_input_cols,
805
773
  expected_output_cols_type="float",
806
774
  )
807
775
 
@@ -857,13 +825,17 @@ class TransformedTargetRegressor(BaseTransformer):
857
825
  transform_kwargs: ScoreKwargsTypedDict = dict()
858
826
 
859
827
  if isinstance(dataset, DataFrame):
828
+ self._deps = self._batch_inference_validate_snowpark(
829
+ dataset=dataset,
830
+ inference_method="score",
831
+ )
860
832
  selected_cols = self._get_active_columns()
861
833
  if len(selected_cols) > 0:
862
834
  dataset = dataset.select(selected_cols)
863
835
  assert isinstance(dataset._session, Session) # keep mypy happy
864
836
  transform_kwargs = dict(
865
837
  session=dataset._session,
866
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
838
+ dependencies=["snowflake-snowpark-python"] + self._deps,
867
839
  score_sproc_imports=['sklearn'],
868
840
  )
869
841
  elif isinstance(dataset, pd.DataFrame):
@@ -937,9 +909,9 @@ class TransformedTargetRegressor(BaseTransformer):
937
909
  transform_kwargs = dict(
938
910
  session = dataset._session,
939
911
  dependencies = self._deps,
940
- pass_through_cols = self._get_pass_through_columns(dataset),
941
- expected_output_cols_type = "array",
942
- n_neighbors = n_neighbors,
912
+ drop_input_cols = self._drop_input_cols,
913
+ expected_output_cols_type="array",
914
+ n_neighbors = n_neighbors,
943
915
  return_distance = return_distance
944
916
  )
945
917
  elif isinstance(dataset, pd.DataFrame):