snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -310,18 +310,24 @@ class PoissonRegressor(BaseTransformer):
310
310
  self._get_model_signatures(dataset)
311
311
  return self
312
312
 
313
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
314
- if self._drop_input_cols:
315
- return []
316
- else:
317
- return list(set(dataset.columns) - set(self.output_cols))
318
-
319
313
  def _batch_inference_validate_snowpark(
320
314
  self,
321
315
  dataset: DataFrame,
322
316
  inference_method: str,
323
317
  ) -> List[str]:
324
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
318
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
319
+ return the available package that exists in the snowflake anaconda channel
320
+
321
+ Args:
322
+ dataset: snowpark dataframe
323
+ inference_method: the inference method such as predict, score...
324
+
325
+ Raises:
326
+ SnowflakeMLException: If the estimator is not fitted, raise error
327
+ SnowflakeMLException: If the session is None, raise error
328
+
329
+ Returns:
330
+ A list of available package that exists in the snowflake anaconda channel
325
331
  """
326
332
  if not self._is_fitted:
327
333
  raise exceptions.SnowflakeMLException(
@@ -395,7 +401,7 @@ class PoissonRegressor(BaseTransformer):
395
401
  transform_kwargs = dict(
396
402
  session = dataset._session,
397
403
  dependencies = self._deps,
398
- pass_through_cols = self._get_pass_through_columns(dataset),
404
+ drop_input_cols = self._drop_input_cols,
399
405
  expected_output_cols_type = expected_type_inferred,
400
406
  )
401
407
 
@@ -455,16 +461,16 @@ class PoissonRegressor(BaseTransformer):
455
461
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
456
462
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
457
463
  # each row containing a list of values.
458
- expected_dtype = "ARRAY"
464
+ expected_dtype = "array"
459
465
 
460
466
  # If we were unable to assign a type to this transform in the factory, infer the type here.
461
467
  if expected_dtype == "":
462
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
468
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
463
469
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
464
- expected_dtype = "ARRAY"
465
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
470
+ expected_dtype = "array"
471
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
466
472
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
467
- expected_dtype = "ARRAY"
473
+ expected_dtype = "array"
468
474
  else:
469
475
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
470
476
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -482,7 +488,7 @@ class PoissonRegressor(BaseTransformer):
482
488
  transform_kwargs = dict(
483
489
  session = dataset._session,
484
490
  dependencies = self._deps,
485
- pass_through_cols = self._get_pass_through_columns(dataset),
491
+ drop_input_cols = self._drop_input_cols,
486
492
  expected_output_cols_type = expected_dtype,
487
493
  )
488
494
 
@@ -533,7 +539,7 @@ class PoissonRegressor(BaseTransformer):
533
539
  subproject=_SUBPROJECT,
534
540
  )
535
541
  output_result, fitted_estimator = model_trainer.train_fit_predict(
536
- pass_through_columns=self._get_pass_through_columns(dataset),
542
+ drop_input_cols=self._drop_input_cols,
537
543
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
538
544
  )
539
545
  self._sklearn_object = fitted_estimator
@@ -551,44 +557,6 @@ class PoissonRegressor(BaseTransformer):
551
557
  assert self._sklearn_object is not None
552
558
  return self._sklearn_object.embedding_
553
559
 
554
-
555
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
556
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
557
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
558
- """
559
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
560
- if output_cols:
561
- output_cols = [
562
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
563
- for c in output_cols
564
- ]
565
- elif getattr(self._sklearn_object, "classes_", None) is None:
566
- output_cols = [output_cols_prefix]
567
- elif self._sklearn_object is not None:
568
- classes = self._sklearn_object.classes_
569
- if isinstance(classes, numpy.ndarray):
570
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
571
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
572
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
573
- output_cols = []
574
- for i, cl in enumerate(classes):
575
- # For binary classification, there is only one output column for each class
576
- # ndarray as the two classes are complementary.
577
- if len(cl) == 2:
578
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
579
- else:
580
- output_cols.extend([
581
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
582
- ])
583
- else:
584
- output_cols = []
585
-
586
- # Make sure column names are valid snowflake identifiers.
587
- assert output_cols is not None # Make MyPy happy
588
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
589
-
590
- return rv
591
-
592
560
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
593
561
  @telemetry.send_api_usage_telemetry(
594
562
  project=_PROJECT,
@@ -628,7 +596,7 @@ class PoissonRegressor(BaseTransformer):
628
596
  transform_kwargs = dict(
629
597
  session=dataset._session,
630
598
  dependencies=self._deps,
631
- pass_through_cols=self._get_pass_through_columns(dataset),
599
+ drop_input_cols = self._drop_input_cols,
632
600
  expected_output_cols_type="float",
633
601
  )
634
602
 
@@ -693,7 +661,7 @@ class PoissonRegressor(BaseTransformer):
693
661
  transform_kwargs = dict(
694
662
  session=dataset._session,
695
663
  dependencies=self._deps,
696
- pass_through_cols=self._get_pass_through_columns(dataset),
664
+ drop_input_cols = self._drop_input_cols,
697
665
  expected_output_cols_type="float",
698
666
  )
699
667
  elif isinstance(dataset, pd.DataFrame):
@@ -754,7 +722,7 @@ class PoissonRegressor(BaseTransformer):
754
722
  transform_kwargs = dict(
755
723
  session=dataset._session,
756
724
  dependencies=self._deps,
757
- pass_through_cols=self._get_pass_through_columns(dataset),
725
+ drop_input_cols = self._drop_input_cols,
758
726
  expected_output_cols_type="float",
759
727
  )
760
728
 
@@ -819,7 +787,7 @@ class PoissonRegressor(BaseTransformer):
819
787
  transform_kwargs = dict(
820
788
  session=dataset._session,
821
789
  dependencies=self._deps,
822
- pass_through_cols=self._get_pass_through_columns(dataset),
790
+ drop_input_cols = self._drop_input_cols,
823
791
  expected_output_cols_type="float",
824
792
  )
825
793
 
@@ -875,13 +843,17 @@ class PoissonRegressor(BaseTransformer):
875
843
  transform_kwargs: ScoreKwargsTypedDict = dict()
876
844
 
877
845
  if isinstance(dataset, DataFrame):
846
+ self._deps = self._batch_inference_validate_snowpark(
847
+ dataset=dataset,
848
+ inference_method="score",
849
+ )
878
850
  selected_cols = self._get_active_columns()
879
851
  if len(selected_cols) > 0:
880
852
  dataset = dataset.select(selected_cols)
881
853
  assert isinstance(dataset._session, Session) # keep mypy happy
882
854
  transform_kwargs = dict(
883
855
  session=dataset._session,
884
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
856
+ dependencies=["snowflake-snowpark-python"] + self._deps,
885
857
  score_sproc_imports=['sklearn'],
886
858
  )
887
859
  elif isinstance(dataset, pd.DataFrame):
@@ -955,9 +927,9 @@ class PoissonRegressor(BaseTransformer):
955
927
  transform_kwargs = dict(
956
928
  session = dataset._session,
957
929
  dependencies = self._deps,
958
- pass_through_cols = self._get_pass_through_columns(dataset),
959
- expected_output_cols_type = "array",
960
- n_neighbors = n_neighbors,
930
+ drop_input_cols = self._drop_input_cols,
931
+ expected_output_cols_type="array",
932
+ n_neighbors = n_neighbors,
961
933
  return_distance = return_distance
962
934
  )
963
935
  elif isinstance(dataset, pd.DataFrame):
@@ -366,18 +366,24 @@ class RANSACRegressor(BaseTransformer):
366
366
  self._get_model_signatures(dataset)
367
367
  return self
368
368
 
369
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
370
- if self._drop_input_cols:
371
- return []
372
- else:
373
- return list(set(dataset.columns) - set(self.output_cols))
374
-
375
369
  def _batch_inference_validate_snowpark(
376
370
  self,
377
371
  dataset: DataFrame,
378
372
  inference_method: str,
379
373
  ) -> List[str]:
380
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
374
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
375
+ return the available package that exists in the snowflake anaconda channel
376
+
377
+ Args:
378
+ dataset: snowpark dataframe
379
+ inference_method: the inference method such as predict, score...
380
+
381
+ Raises:
382
+ SnowflakeMLException: If the estimator is not fitted, raise error
383
+ SnowflakeMLException: If the session is None, raise error
384
+
385
+ Returns:
386
+ A list of available package that exists in the snowflake anaconda channel
381
387
  """
382
388
  if not self._is_fitted:
383
389
  raise exceptions.SnowflakeMLException(
@@ -451,7 +457,7 @@ class RANSACRegressor(BaseTransformer):
451
457
  transform_kwargs = dict(
452
458
  session = dataset._session,
453
459
  dependencies = self._deps,
454
- pass_through_cols = self._get_pass_through_columns(dataset),
460
+ drop_input_cols = self._drop_input_cols,
455
461
  expected_output_cols_type = expected_type_inferred,
456
462
  )
457
463
 
@@ -511,16 +517,16 @@ class RANSACRegressor(BaseTransformer):
511
517
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
512
518
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
513
519
  # each row containing a list of values.
514
- expected_dtype = "ARRAY"
520
+ expected_dtype = "array"
515
521
 
516
522
  # If we were unable to assign a type to this transform in the factory, infer the type here.
517
523
  if expected_dtype == "":
518
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
524
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
519
525
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
520
- expected_dtype = "ARRAY"
521
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
526
+ expected_dtype = "array"
527
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
522
528
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
523
- expected_dtype = "ARRAY"
529
+ expected_dtype = "array"
524
530
  else:
525
531
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
526
532
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -538,7 +544,7 @@ class RANSACRegressor(BaseTransformer):
538
544
  transform_kwargs = dict(
539
545
  session = dataset._session,
540
546
  dependencies = self._deps,
541
- pass_through_cols = self._get_pass_through_columns(dataset),
547
+ drop_input_cols = self._drop_input_cols,
542
548
  expected_output_cols_type = expected_dtype,
543
549
  )
544
550
 
@@ -589,7 +595,7 @@ class RANSACRegressor(BaseTransformer):
589
595
  subproject=_SUBPROJECT,
590
596
  )
591
597
  output_result, fitted_estimator = model_trainer.train_fit_predict(
592
- pass_through_columns=self._get_pass_through_columns(dataset),
598
+ drop_input_cols=self._drop_input_cols,
593
599
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
594
600
  )
595
601
  self._sklearn_object = fitted_estimator
@@ -607,44 +613,6 @@ class RANSACRegressor(BaseTransformer):
607
613
  assert self._sklearn_object is not None
608
614
  return self._sklearn_object.embedding_
609
615
 
610
-
611
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
612
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
613
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
614
- """
615
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
616
- if output_cols:
617
- output_cols = [
618
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
619
- for c in output_cols
620
- ]
621
- elif getattr(self._sklearn_object, "classes_", None) is None:
622
- output_cols = [output_cols_prefix]
623
- elif self._sklearn_object is not None:
624
- classes = self._sklearn_object.classes_
625
- if isinstance(classes, numpy.ndarray):
626
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
627
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
628
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
629
- output_cols = []
630
- for i, cl in enumerate(classes):
631
- # For binary classification, there is only one output column for each class
632
- # ndarray as the two classes are complementary.
633
- if len(cl) == 2:
634
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
635
- else:
636
- output_cols.extend([
637
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
638
- ])
639
- else:
640
- output_cols = []
641
-
642
- # Make sure column names are valid snowflake identifiers.
643
- assert output_cols is not None # Make MyPy happy
644
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
645
-
646
- return rv
647
-
648
616
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
649
617
  @telemetry.send_api_usage_telemetry(
650
618
  project=_PROJECT,
@@ -684,7 +652,7 @@ class RANSACRegressor(BaseTransformer):
684
652
  transform_kwargs = dict(
685
653
  session=dataset._session,
686
654
  dependencies=self._deps,
687
- pass_through_cols=self._get_pass_through_columns(dataset),
655
+ drop_input_cols = self._drop_input_cols,
688
656
  expected_output_cols_type="float",
689
657
  )
690
658
 
@@ -749,7 +717,7 @@ class RANSACRegressor(BaseTransformer):
749
717
  transform_kwargs = dict(
750
718
  session=dataset._session,
751
719
  dependencies=self._deps,
752
- pass_through_cols=self._get_pass_through_columns(dataset),
720
+ drop_input_cols = self._drop_input_cols,
753
721
  expected_output_cols_type="float",
754
722
  )
755
723
  elif isinstance(dataset, pd.DataFrame):
@@ -810,7 +778,7 @@ class RANSACRegressor(BaseTransformer):
810
778
  transform_kwargs = dict(
811
779
  session=dataset._session,
812
780
  dependencies=self._deps,
813
- pass_through_cols=self._get_pass_through_columns(dataset),
781
+ drop_input_cols = self._drop_input_cols,
814
782
  expected_output_cols_type="float",
815
783
  )
816
784
 
@@ -875,7 +843,7 @@ class RANSACRegressor(BaseTransformer):
875
843
  transform_kwargs = dict(
876
844
  session=dataset._session,
877
845
  dependencies=self._deps,
878
- pass_through_cols=self._get_pass_through_columns(dataset),
846
+ drop_input_cols = self._drop_input_cols,
879
847
  expected_output_cols_type="float",
880
848
  )
881
849
 
@@ -931,13 +899,17 @@ class RANSACRegressor(BaseTransformer):
931
899
  transform_kwargs: ScoreKwargsTypedDict = dict()
932
900
 
933
901
  if isinstance(dataset, DataFrame):
902
+ self._deps = self._batch_inference_validate_snowpark(
903
+ dataset=dataset,
904
+ inference_method="score",
905
+ )
934
906
  selected_cols = self._get_active_columns()
935
907
  if len(selected_cols) > 0:
936
908
  dataset = dataset.select(selected_cols)
937
909
  assert isinstance(dataset._session, Session) # keep mypy happy
938
910
  transform_kwargs = dict(
939
911
  session=dataset._session,
940
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
912
+ dependencies=["snowflake-snowpark-python"] + self._deps,
941
913
  score_sproc_imports=['sklearn'],
942
914
  )
943
915
  elif isinstance(dataset, pd.DataFrame):
@@ -1011,9 +983,9 @@ class RANSACRegressor(BaseTransformer):
1011
983
  transform_kwargs = dict(
1012
984
  session = dataset._session,
1013
985
  dependencies = self._deps,
1014
- pass_through_cols = self._get_pass_through_columns(dataset),
1015
- expected_output_cols_type = "array",
1016
- n_neighbors = n_neighbors,
986
+ drop_input_cols = self._drop_input_cols,
987
+ expected_output_cols_type="array",
988
+ n_neighbors = n_neighbors,
1017
989
  return_distance = return_distance
1018
990
  )
1019
991
  elif isinstance(dataset, pd.DataFrame):
@@ -358,18 +358,24 @@ class Ridge(BaseTransformer):
358
358
  self._get_model_signatures(dataset)
359
359
  return self
360
360
 
361
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
362
- if self._drop_input_cols:
363
- return []
364
- else:
365
- return list(set(dataset.columns) - set(self.output_cols))
366
-
367
361
  def _batch_inference_validate_snowpark(
368
362
  self,
369
363
  dataset: DataFrame,
370
364
  inference_method: str,
371
365
  ) -> List[str]:
372
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
366
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
367
+ return the available package that exists in the snowflake anaconda channel
368
+
369
+ Args:
370
+ dataset: snowpark dataframe
371
+ inference_method: the inference method such as predict, score...
372
+
373
+ Raises:
374
+ SnowflakeMLException: If the estimator is not fitted, raise error
375
+ SnowflakeMLException: If the session is None, raise error
376
+
377
+ Returns:
378
+ A list of available package that exists in the snowflake anaconda channel
373
379
  """
374
380
  if not self._is_fitted:
375
381
  raise exceptions.SnowflakeMLException(
@@ -443,7 +449,7 @@ class Ridge(BaseTransformer):
443
449
  transform_kwargs = dict(
444
450
  session = dataset._session,
445
451
  dependencies = self._deps,
446
- pass_through_cols = self._get_pass_through_columns(dataset),
452
+ drop_input_cols = self._drop_input_cols,
447
453
  expected_output_cols_type = expected_type_inferred,
448
454
  )
449
455
 
@@ -503,16 +509,16 @@ class Ridge(BaseTransformer):
503
509
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
504
510
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
505
511
  # each row containing a list of values.
506
- expected_dtype = "ARRAY"
512
+ expected_dtype = "array"
507
513
 
508
514
  # If we were unable to assign a type to this transform in the factory, infer the type here.
509
515
  if expected_dtype == "":
510
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
516
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
511
517
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
512
- expected_dtype = "ARRAY"
513
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
518
+ expected_dtype = "array"
519
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
514
520
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
515
- expected_dtype = "ARRAY"
521
+ expected_dtype = "array"
516
522
  else:
517
523
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
518
524
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -530,7 +536,7 @@ class Ridge(BaseTransformer):
530
536
  transform_kwargs = dict(
531
537
  session = dataset._session,
532
538
  dependencies = self._deps,
533
- pass_through_cols = self._get_pass_through_columns(dataset),
539
+ drop_input_cols = self._drop_input_cols,
534
540
  expected_output_cols_type = expected_dtype,
535
541
  )
536
542
 
@@ -581,7 +587,7 @@ class Ridge(BaseTransformer):
581
587
  subproject=_SUBPROJECT,
582
588
  )
583
589
  output_result, fitted_estimator = model_trainer.train_fit_predict(
584
- pass_through_columns=self._get_pass_through_columns(dataset),
590
+ drop_input_cols=self._drop_input_cols,
585
591
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
586
592
  )
587
593
  self._sklearn_object = fitted_estimator
@@ -599,44 +605,6 @@ class Ridge(BaseTransformer):
599
605
  assert self._sklearn_object is not None
600
606
  return self._sklearn_object.embedding_
601
607
 
602
-
603
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
604
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
605
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
606
- """
607
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
608
- if output_cols:
609
- output_cols = [
610
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
611
- for c in output_cols
612
- ]
613
- elif getattr(self._sklearn_object, "classes_", None) is None:
614
- output_cols = [output_cols_prefix]
615
- elif self._sklearn_object is not None:
616
- classes = self._sklearn_object.classes_
617
- if isinstance(classes, numpy.ndarray):
618
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
619
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
620
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
621
- output_cols = []
622
- for i, cl in enumerate(classes):
623
- # For binary classification, there is only one output column for each class
624
- # ndarray as the two classes are complementary.
625
- if len(cl) == 2:
626
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
627
- else:
628
- output_cols.extend([
629
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
630
- ])
631
- else:
632
- output_cols = []
633
-
634
- # Make sure column names are valid snowflake identifiers.
635
- assert output_cols is not None # Make MyPy happy
636
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
637
-
638
- return rv
639
-
640
608
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
641
609
  @telemetry.send_api_usage_telemetry(
642
610
  project=_PROJECT,
@@ -676,7 +644,7 @@ class Ridge(BaseTransformer):
676
644
  transform_kwargs = dict(
677
645
  session=dataset._session,
678
646
  dependencies=self._deps,
679
- pass_through_cols=self._get_pass_through_columns(dataset),
647
+ drop_input_cols = self._drop_input_cols,
680
648
  expected_output_cols_type="float",
681
649
  )
682
650
 
@@ -741,7 +709,7 @@ class Ridge(BaseTransformer):
741
709
  transform_kwargs = dict(
742
710
  session=dataset._session,
743
711
  dependencies=self._deps,
744
- pass_through_cols=self._get_pass_through_columns(dataset),
712
+ drop_input_cols = self._drop_input_cols,
745
713
  expected_output_cols_type="float",
746
714
  )
747
715
  elif isinstance(dataset, pd.DataFrame):
@@ -802,7 +770,7 @@ class Ridge(BaseTransformer):
802
770
  transform_kwargs = dict(
803
771
  session=dataset._session,
804
772
  dependencies=self._deps,
805
- pass_through_cols=self._get_pass_through_columns(dataset),
773
+ drop_input_cols = self._drop_input_cols,
806
774
  expected_output_cols_type="float",
807
775
  )
808
776
 
@@ -867,7 +835,7 @@ class Ridge(BaseTransformer):
867
835
  transform_kwargs = dict(
868
836
  session=dataset._session,
869
837
  dependencies=self._deps,
870
- pass_through_cols=self._get_pass_through_columns(dataset),
838
+ drop_input_cols = self._drop_input_cols,
871
839
  expected_output_cols_type="float",
872
840
  )
873
841
 
@@ -923,13 +891,17 @@ class Ridge(BaseTransformer):
923
891
  transform_kwargs: ScoreKwargsTypedDict = dict()
924
892
 
925
893
  if isinstance(dataset, DataFrame):
894
+ self._deps = self._batch_inference_validate_snowpark(
895
+ dataset=dataset,
896
+ inference_method="score",
897
+ )
926
898
  selected_cols = self._get_active_columns()
927
899
  if len(selected_cols) > 0:
928
900
  dataset = dataset.select(selected_cols)
929
901
  assert isinstance(dataset._session, Session) # keep mypy happy
930
902
  transform_kwargs = dict(
931
903
  session=dataset._session,
932
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
904
+ dependencies=["snowflake-snowpark-python"] + self._deps,
933
905
  score_sproc_imports=['sklearn'],
934
906
  )
935
907
  elif isinstance(dataset, pd.DataFrame):
@@ -1003,9 +975,9 @@ class Ridge(BaseTransformer):
1003
975
  transform_kwargs = dict(
1004
976
  session = dataset._session,
1005
977
  dependencies = self._deps,
1006
- pass_through_cols = self._get_pass_through_columns(dataset),
1007
- expected_output_cols_type = "array",
1008
- n_neighbors = n_neighbors,
978
+ drop_input_cols = self._drop_input_cols,
979
+ expected_output_cols_type="array",
980
+ n_neighbors = n_neighbors,
1009
981
  return_distance = return_distance
1010
982
  )
1011
983
  elif isinstance(dataset, pd.DataFrame):