snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -312,18 +312,24 @@ class FactorAnalysis(BaseTransformer):
312
312
  self._get_model_signatures(dataset)
313
313
  return self
314
314
 
315
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
316
- if self._drop_input_cols:
317
- return []
318
- else:
319
- return list(set(dataset.columns) - set(self.output_cols))
320
-
321
315
  def _batch_inference_validate_snowpark(
322
316
  self,
323
317
  dataset: DataFrame,
324
318
  inference_method: str,
325
319
  ) -> List[str]:
326
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
320
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
321
+ return the available package that exists in the snowflake anaconda channel
322
+
323
+ Args:
324
+ dataset: snowpark dataframe
325
+ inference_method: the inference method such as predict, score...
326
+
327
+ Raises:
328
+ SnowflakeMLException: If the estimator is not fitted, raise error
329
+ SnowflakeMLException: If the session is None, raise error
330
+
331
+ Returns:
332
+ A list of available package that exists in the snowflake anaconda channel
327
333
  """
328
334
  if not self._is_fitted:
329
335
  raise exceptions.SnowflakeMLException(
@@ -395,7 +401,7 @@ class FactorAnalysis(BaseTransformer):
395
401
  transform_kwargs = dict(
396
402
  session = dataset._session,
397
403
  dependencies = self._deps,
398
- pass_through_cols = self._get_pass_through_columns(dataset),
404
+ drop_input_cols = self._drop_input_cols,
399
405
  expected_output_cols_type = expected_type_inferred,
400
406
  )
401
407
 
@@ -457,16 +463,16 @@ class FactorAnalysis(BaseTransformer):
457
463
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
458
464
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
459
465
  # each row containing a list of values.
460
- expected_dtype = "ARRAY"
466
+ expected_dtype = "array"
461
467
 
462
468
  # If we were unable to assign a type to this transform in the factory, infer the type here.
463
469
  if expected_dtype == "":
464
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
470
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
465
471
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
466
- expected_dtype = "ARRAY"
467
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
472
+ expected_dtype = "array"
473
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
468
474
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
469
- expected_dtype = "ARRAY"
475
+ expected_dtype = "array"
470
476
  else:
471
477
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
472
478
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -484,7 +490,7 @@ class FactorAnalysis(BaseTransformer):
484
490
  transform_kwargs = dict(
485
491
  session = dataset._session,
486
492
  dependencies = self._deps,
487
- pass_through_cols = self._get_pass_through_columns(dataset),
493
+ drop_input_cols = self._drop_input_cols,
488
494
  expected_output_cols_type = expected_dtype,
489
495
  )
490
496
 
@@ -535,7 +541,7 @@ class FactorAnalysis(BaseTransformer):
535
541
  subproject=_SUBPROJECT,
536
542
  )
537
543
  output_result, fitted_estimator = model_trainer.train_fit_predict(
538
- pass_through_columns=self._get_pass_through_columns(dataset),
544
+ drop_input_cols=self._drop_input_cols,
539
545
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
540
546
  )
541
547
  self._sklearn_object = fitted_estimator
@@ -553,44 +559,6 @@ class FactorAnalysis(BaseTransformer):
553
559
  assert self._sklearn_object is not None
554
560
  return self._sklearn_object.embedding_
555
561
 
556
-
557
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
558
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
559
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
560
- """
561
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
562
- if output_cols:
563
- output_cols = [
564
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
565
- for c in output_cols
566
- ]
567
- elif getattr(self._sklearn_object, "classes_", None) is None:
568
- output_cols = [output_cols_prefix]
569
- elif self._sklearn_object is not None:
570
- classes = self._sklearn_object.classes_
571
- if isinstance(classes, numpy.ndarray):
572
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
573
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
574
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
575
- output_cols = []
576
- for i, cl in enumerate(classes):
577
- # For binary classification, there is only one output column for each class
578
- # ndarray as the two classes are complementary.
579
- if len(cl) == 2:
580
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
581
- else:
582
- output_cols.extend([
583
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
584
- ])
585
- else:
586
- output_cols = []
587
-
588
- # Make sure column names are valid snowflake identifiers.
589
- assert output_cols is not None # Make MyPy happy
590
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
591
-
592
- return rv
593
-
594
562
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
595
563
  @telemetry.send_api_usage_telemetry(
596
564
  project=_PROJECT,
@@ -630,7 +598,7 @@ class FactorAnalysis(BaseTransformer):
630
598
  transform_kwargs = dict(
631
599
  session=dataset._session,
632
600
  dependencies=self._deps,
633
- pass_through_cols=self._get_pass_through_columns(dataset),
601
+ drop_input_cols = self._drop_input_cols,
634
602
  expected_output_cols_type="float",
635
603
  )
636
604
 
@@ -695,7 +663,7 @@ class FactorAnalysis(BaseTransformer):
695
663
  transform_kwargs = dict(
696
664
  session=dataset._session,
697
665
  dependencies=self._deps,
698
- pass_through_cols=self._get_pass_through_columns(dataset),
666
+ drop_input_cols = self._drop_input_cols,
699
667
  expected_output_cols_type="float",
700
668
  )
701
669
  elif isinstance(dataset, pd.DataFrame):
@@ -756,7 +724,7 @@ class FactorAnalysis(BaseTransformer):
756
724
  transform_kwargs = dict(
757
725
  session=dataset._session,
758
726
  dependencies=self._deps,
759
- pass_through_cols=self._get_pass_through_columns(dataset),
727
+ drop_input_cols = self._drop_input_cols,
760
728
  expected_output_cols_type="float",
761
729
  )
762
730
 
@@ -823,7 +791,7 @@ class FactorAnalysis(BaseTransformer):
823
791
  transform_kwargs = dict(
824
792
  session=dataset._session,
825
793
  dependencies=self._deps,
826
- pass_through_cols=self._get_pass_through_columns(dataset),
794
+ drop_input_cols = self._drop_input_cols,
827
795
  expected_output_cols_type="float",
828
796
  )
829
797
 
@@ -879,13 +847,17 @@ class FactorAnalysis(BaseTransformer):
879
847
  transform_kwargs: ScoreKwargsTypedDict = dict()
880
848
 
881
849
  if isinstance(dataset, DataFrame):
850
+ self._deps = self._batch_inference_validate_snowpark(
851
+ dataset=dataset,
852
+ inference_method="score",
853
+ )
882
854
  selected_cols = self._get_active_columns()
883
855
  if len(selected_cols) > 0:
884
856
  dataset = dataset.select(selected_cols)
885
857
  assert isinstance(dataset._session, Session) # keep mypy happy
886
858
  transform_kwargs = dict(
887
859
  session=dataset._session,
888
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
860
+ dependencies=["snowflake-snowpark-python"] + self._deps,
889
861
  score_sproc_imports=['sklearn'],
890
862
  )
891
863
  elif isinstance(dataset, pd.DataFrame):
@@ -959,9 +931,9 @@ class FactorAnalysis(BaseTransformer):
959
931
  transform_kwargs = dict(
960
932
  session = dataset._session,
961
933
  dependencies = self._deps,
962
- pass_through_cols = self._get_pass_through_columns(dataset),
963
- expected_output_cols_type = "array",
964
- n_neighbors = n_neighbors,
934
+ drop_input_cols = self._drop_input_cols,
935
+ expected_output_cols_type="array",
936
+ n_neighbors = n_neighbors,
965
937
  return_distance = return_distance
966
938
  )
967
939
  elif isinstance(dataset, pd.DataFrame):
@@ -330,18 +330,24 @@ class FastICA(BaseTransformer):
330
330
  self._get_model_signatures(dataset)
331
331
  return self
332
332
 
333
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
334
- if self._drop_input_cols:
335
- return []
336
- else:
337
- return list(set(dataset.columns) - set(self.output_cols))
338
-
339
333
  def _batch_inference_validate_snowpark(
340
334
  self,
341
335
  dataset: DataFrame,
342
336
  inference_method: str,
343
337
  ) -> List[str]:
344
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
338
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
339
+ return the available package that exists in the snowflake anaconda channel
340
+
341
+ Args:
342
+ dataset: snowpark dataframe
343
+ inference_method: the inference method such as predict, score...
344
+
345
+ Raises:
346
+ SnowflakeMLException: If the estimator is not fitted, raise error
347
+ SnowflakeMLException: If the session is None, raise error
348
+
349
+ Returns:
350
+ A list of available package that exists in the snowflake anaconda channel
345
351
  """
346
352
  if not self._is_fitted:
347
353
  raise exceptions.SnowflakeMLException(
@@ -413,7 +419,7 @@ class FastICA(BaseTransformer):
413
419
  transform_kwargs = dict(
414
420
  session = dataset._session,
415
421
  dependencies = self._deps,
416
- pass_through_cols = self._get_pass_through_columns(dataset),
422
+ drop_input_cols = self._drop_input_cols,
417
423
  expected_output_cols_type = expected_type_inferred,
418
424
  )
419
425
 
@@ -475,16 +481,16 @@ class FastICA(BaseTransformer):
475
481
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
476
482
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
477
483
  # each row containing a list of values.
478
- expected_dtype = "ARRAY"
484
+ expected_dtype = "array"
479
485
 
480
486
  # If we were unable to assign a type to this transform in the factory, infer the type here.
481
487
  if expected_dtype == "":
482
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
488
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
483
489
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
484
- expected_dtype = "ARRAY"
485
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
490
+ expected_dtype = "array"
491
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
486
492
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
487
- expected_dtype = "ARRAY"
493
+ expected_dtype = "array"
488
494
  else:
489
495
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
490
496
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -502,7 +508,7 @@ class FastICA(BaseTransformer):
502
508
  transform_kwargs = dict(
503
509
  session = dataset._session,
504
510
  dependencies = self._deps,
505
- pass_through_cols = self._get_pass_through_columns(dataset),
511
+ drop_input_cols = self._drop_input_cols,
506
512
  expected_output_cols_type = expected_dtype,
507
513
  )
508
514
 
@@ -553,7 +559,7 @@ class FastICA(BaseTransformer):
553
559
  subproject=_SUBPROJECT,
554
560
  )
555
561
  output_result, fitted_estimator = model_trainer.train_fit_predict(
556
- pass_through_columns=self._get_pass_through_columns(dataset),
562
+ drop_input_cols=self._drop_input_cols,
557
563
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
558
564
  )
559
565
  self._sklearn_object = fitted_estimator
@@ -571,44 +577,6 @@ class FastICA(BaseTransformer):
571
577
  assert self._sklearn_object is not None
572
578
  return self._sklearn_object.embedding_
573
579
 
574
-
575
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
576
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
577
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
578
- """
579
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
580
- if output_cols:
581
- output_cols = [
582
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
583
- for c in output_cols
584
- ]
585
- elif getattr(self._sklearn_object, "classes_", None) is None:
586
- output_cols = [output_cols_prefix]
587
- elif self._sklearn_object is not None:
588
- classes = self._sklearn_object.classes_
589
- if isinstance(classes, numpy.ndarray):
590
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
591
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
592
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
593
- output_cols = []
594
- for i, cl in enumerate(classes):
595
- # For binary classification, there is only one output column for each class
596
- # ndarray as the two classes are complementary.
597
- if len(cl) == 2:
598
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
599
- else:
600
- output_cols.extend([
601
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
602
- ])
603
- else:
604
- output_cols = []
605
-
606
- # Make sure column names are valid snowflake identifiers.
607
- assert output_cols is not None # Make MyPy happy
608
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
609
-
610
- return rv
611
-
612
580
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
613
581
  @telemetry.send_api_usage_telemetry(
614
582
  project=_PROJECT,
@@ -648,7 +616,7 @@ class FastICA(BaseTransformer):
648
616
  transform_kwargs = dict(
649
617
  session=dataset._session,
650
618
  dependencies=self._deps,
651
- pass_through_cols=self._get_pass_through_columns(dataset),
619
+ drop_input_cols = self._drop_input_cols,
652
620
  expected_output_cols_type="float",
653
621
  )
654
622
 
@@ -713,7 +681,7 @@ class FastICA(BaseTransformer):
713
681
  transform_kwargs = dict(
714
682
  session=dataset._session,
715
683
  dependencies=self._deps,
716
- pass_through_cols=self._get_pass_through_columns(dataset),
684
+ drop_input_cols = self._drop_input_cols,
717
685
  expected_output_cols_type="float",
718
686
  )
719
687
  elif isinstance(dataset, pd.DataFrame):
@@ -774,7 +742,7 @@ class FastICA(BaseTransformer):
774
742
  transform_kwargs = dict(
775
743
  session=dataset._session,
776
744
  dependencies=self._deps,
777
- pass_through_cols=self._get_pass_through_columns(dataset),
745
+ drop_input_cols = self._drop_input_cols,
778
746
  expected_output_cols_type="float",
779
747
  )
780
748
 
@@ -839,7 +807,7 @@ class FastICA(BaseTransformer):
839
807
  transform_kwargs = dict(
840
808
  session=dataset._session,
841
809
  dependencies=self._deps,
842
- pass_through_cols=self._get_pass_through_columns(dataset),
810
+ drop_input_cols = self._drop_input_cols,
843
811
  expected_output_cols_type="float",
844
812
  )
845
813
 
@@ -893,13 +861,17 @@ class FastICA(BaseTransformer):
893
861
  transform_kwargs: ScoreKwargsTypedDict = dict()
894
862
 
895
863
  if isinstance(dataset, DataFrame):
864
+ self._deps = self._batch_inference_validate_snowpark(
865
+ dataset=dataset,
866
+ inference_method="score",
867
+ )
896
868
  selected_cols = self._get_active_columns()
897
869
  if len(selected_cols) > 0:
898
870
  dataset = dataset.select(selected_cols)
899
871
  assert isinstance(dataset._session, Session) # keep mypy happy
900
872
  transform_kwargs = dict(
901
873
  session=dataset._session,
902
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
874
+ dependencies=["snowflake-snowpark-python"] + self._deps,
903
875
  score_sproc_imports=['sklearn'],
904
876
  )
905
877
  elif isinstance(dataset, pd.DataFrame):
@@ -973,9 +945,9 @@ class FastICA(BaseTransformer):
973
945
  transform_kwargs = dict(
974
946
  session = dataset._session,
975
947
  dependencies = self._deps,
976
- pass_through_cols = self._get_pass_through_columns(dataset),
977
- expected_output_cols_type = "array",
978
- n_neighbors = n_neighbors,
948
+ drop_input_cols = self._drop_input_cols,
949
+ expected_output_cols_type="array",
950
+ n_neighbors = n_neighbors,
979
951
  return_distance = return_distance
980
952
  )
981
953
  elif isinstance(dataset, pd.DataFrame):
@@ -282,18 +282,24 @@ class IncrementalPCA(BaseTransformer):
282
282
  self._get_model_signatures(dataset)
283
283
  return self
284
284
 
285
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
286
- if self._drop_input_cols:
287
- return []
288
- else:
289
- return list(set(dataset.columns) - set(self.output_cols))
290
-
291
285
  def _batch_inference_validate_snowpark(
292
286
  self,
293
287
  dataset: DataFrame,
294
288
  inference_method: str,
295
289
  ) -> List[str]:
296
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
290
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
291
+ return the available package that exists in the snowflake anaconda channel
292
+
293
+ Args:
294
+ dataset: snowpark dataframe
295
+ inference_method: the inference method such as predict, score...
296
+
297
+ Raises:
298
+ SnowflakeMLException: If the estimator is not fitted, raise error
299
+ SnowflakeMLException: If the session is None, raise error
300
+
301
+ Returns:
302
+ A list of available package that exists in the snowflake anaconda channel
297
303
  """
298
304
  if not self._is_fitted:
299
305
  raise exceptions.SnowflakeMLException(
@@ -365,7 +371,7 @@ class IncrementalPCA(BaseTransformer):
365
371
  transform_kwargs = dict(
366
372
  session = dataset._session,
367
373
  dependencies = self._deps,
368
- pass_through_cols = self._get_pass_through_columns(dataset),
374
+ drop_input_cols = self._drop_input_cols,
369
375
  expected_output_cols_type = expected_type_inferred,
370
376
  )
371
377
 
@@ -427,16 +433,16 @@ class IncrementalPCA(BaseTransformer):
427
433
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
428
434
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
429
435
  # each row containing a list of values.
430
- expected_dtype = "ARRAY"
436
+ expected_dtype = "array"
431
437
 
432
438
  # If we were unable to assign a type to this transform in the factory, infer the type here.
433
439
  if expected_dtype == "":
434
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
440
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
435
441
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
436
- expected_dtype = "ARRAY"
437
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
442
+ expected_dtype = "array"
443
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
438
444
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
439
- expected_dtype = "ARRAY"
445
+ expected_dtype = "array"
440
446
  else:
441
447
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
442
448
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -454,7 +460,7 @@ class IncrementalPCA(BaseTransformer):
454
460
  transform_kwargs = dict(
455
461
  session = dataset._session,
456
462
  dependencies = self._deps,
457
- pass_through_cols = self._get_pass_through_columns(dataset),
463
+ drop_input_cols = self._drop_input_cols,
458
464
  expected_output_cols_type = expected_dtype,
459
465
  )
460
466
 
@@ -505,7 +511,7 @@ class IncrementalPCA(BaseTransformer):
505
511
  subproject=_SUBPROJECT,
506
512
  )
507
513
  output_result, fitted_estimator = model_trainer.train_fit_predict(
508
- pass_through_columns=self._get_pass_through_columns(dataset),
514
+ drop_input_cols=self._drop_input_cols,
509
515
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
510
516
  )
511
517
  self._sklearn_object = fitted_estimator
@@ -523,44 +529,6 @@ class IncrementalPCA(BaseTransformer):
523
529
  assert self._sklearn_object is not None
524
530
  return self._sklearn_object.embedding_
525
531
 
526
-
527
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
528
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
529
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
530
- """
531
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
532
- if output_cols:
533
- output_cols = [
534
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
535
- for c in output_cols
536
- ]
537
- elif getattr(self._sklearn_object, "classes_", None) is None:
538
- output_cols = [output_cols_prefix]
539
- elif self._sklearn_object is not None:
540
- classes = self._sklearn_object.classes_
541
- if isinstance(classes, numpy.ndarray):
542
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
543
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
544
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
545
- output_cols = []
546
- for i, cl in enumerate(classes):
547
- # For binary classification, there is only one output column for each class
548
- # ndarray as the two classes are complementary.
549
- if len(cl) == 2:
550
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
551
- else:
552
- output_cols.extend([
553
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
554
- ])
555
- else:
556
- output_cols = []
557
-
558
- # Make sure column names are valid snowflake identifiers.
559
- assert output_cols is not None # Make MyPy happy
560
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
561
-
562
- return rv
563
-
564
532
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
565
533
  @telemetry.send_api_usage_telemetry(
566
534
  project=_PROJECT,
@@ -600,7 +568,7 @@ class IncrementalPCA(BaseTransformer):
600
568
  transform_kwargs = dict(
601
569
  session=dataset._session,
602
570
  dependencies=self._deps,
603
- pass_through_cols=self._get_pass_through_columns(dataset),
571
+ drop_input_cols = self._drop_input_cols,
604
572
  expected_output_cols_type="float",
605
573
  )
606
574
 
@@ -665,7 +633,7 @@ class IncrementalPCA(BaseTransformer):
665
633
  transform_kwargs = dict(
666
634
  session=dataset._session,
667
635
  dependencies=self._deps,
668
- pass_through_cols=self._get_pass_through_columns(dataset),
636
+ drop_input_cols = self._drop_input_cols,
669
637
  expected_output_cols_type="float",
670
638
  )
671
639
  elif isinstance(dataset, pd.DataFrame):
@@ -726,7 +694,7 @@ class IncrementalPCA(BaseTransformer):
726
694
  transform_kwargs = dict(
727
695
  session=dataset._session,
728
696
  dependencies=self._deps,
729
- pass_through_cols=self._get_pass_through_columns(dataset),
697
+ drop_input_cols = self._drop_input_cols,
730
698
  expected_output_cols_type="float",
731
699
  )
732
700
 
@@ -791,7 +759,7 @@ class IncrementalPCA(BaseTransformer):
791
759
  transform_kwargs = dict(
792
760
  session=dataset._session,
793
761
  dependencies=self._deps,
794
- pass_through_cols=self._get_pass_through_columns(dataset),
762
+ drop_input_cols = self._drop_input_cols,
795
763
  expected_output_cols_type="float",
796
764
  )
797
765
 
@@ -845,13 +813,17 @@ class IncrementalPCA(BaseTransformer):
845
813
  transform_kwargs: ScoreKwargsTypedDict = dict()
846
814
 
847
815
  if isinstance(dataset, DataFrame):
816
+ self._deps = self._batch_inference_validate_snowpark(
817
+ dataset=dataset,
818
+ inference_method="score",
819
+ )
848
820
  selected_cols = self._get_active_columns()
849
821
  if len(selected_cols) > 0:
850
822
  dataset = dataset.select(selected_cols)
851
823
  assert isinstance(dataset._session, Session) # keep mypy happy
852
824
  transform_kwargs = dict(
853
825
  session=dataset._session,
854
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
826
+ dependencies=["snowflake-snowpark-python"] + self._deps,
855
827
  score_sproc_imports=['sklearn'],
856
828
  )
857
829
  elif isinstance(dataset, pd.DataFrame):
@@ -925,9 +897,9 @@ class IncrementalPCA(BaseTransformer):
925
897
  transform_kwargs = dict(
926
898
  session = dataset._session,
927
899
  dependencies = self._deps,
928
- pass_through_cols = self._get_pass_through_columns(dataset),
929
- expected_output_cols_type = "array",
930
- n_neighbors = n_neighbors,
900
+ drop_input_cols = self._drop_input_cols,
901
+ expected_output_cols_type="array",
902
+ n_neighbors = n_neighbors,
931
903
  return_distance = return_distance
932
904
  )
933
905
  elif isinstance(dataset, pd.DataFrame):