snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -322,18 +322,24 @@ class MDS(BaseTransformer):
322
322
  self._get_model_signatures(dataset)
323
323
  return self
324
324
 
325
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
326
- if self._drop_input_cols:
327
- return []
328
- else:
329
- return list(set(dataset.columns) - set(self.output_cols))
330
-
331
325
  def _batch_inference_validate_snowpark(
332
326
  self,
333
327
  dataset: DataFrame,
334
328
  inference_method: str,
335
329
  ) -> List[str]:
336
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
330
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
331
+ return the available package that exists in the snowflake anaconda channel
332
+
333
+ Args:
334
+ dataset: snowpark dataframe
335
+ inference_method: the inference method such as predict, score...
336
+
337
+ Raises:
338
+ SnowflakeMLException: If the estimator is not fitted, raise error
339
+ SnowflakeMLException: If the session is None, raise error
340
+
341
+ Returns:
342
+ A list of available package that exists in the snowflake anaconda channel
337
343
  """
338
344
  if not self._is_fitted:
339
345
  raise exceptions.SnowflakeMLException(
@@ -405,7 +411,7 @@ class MDS(BaseTransformer):
405
411
  transform_kwargs = dict(
406
412
  session = dataset._session,
407
413
  dependencies = self._deps,
408
- pass_through_cols = self._get_pass_through_columns(dataset),
414
+ drop_input_cols = self._drop_input_cols,
409
415
  expected_output_cols_type = expected_type_inferred,
410
416
  )
411
417
 
@@ -465,16 +471,16 @@ class MDS(BaseTransformer):
465
471
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
466
472
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
467
473
  # each row containing a list of values.
468
- expected_dtype = "ARRAY"
474
+ expected_dtype = "array"
469
475
 
470
476
  # If we were unable to assign a type to this transform in the factory, infer the type here.
471
477
  if expected_dtype == "":
472
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
478
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
473
479
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
474
- expected_dtype = "ARRAY"
475
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
480
+ expected_dtype = "array"
481
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
476
482
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
477
- expected_dtype = "ARRAY"
483
+ expected_dtype = "array"
478
484
  else:
479
485
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
480
486
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -492,7 +498,7 @@ class MDS(BaseTransformer):
492
498
  transform_kwargs = dict(
493
499
  session = dataset._session,
494
500
  dependencies = self._deps,
495
- pass_through_cols = self._get_pass_through_columns(dataset),
501
+ drop_input_cols = self._drop_input_cols,
496
502
  expected_output_cols_type = expected_dtype,
497
503
  )
498
504
 
@@ -543,7 +549,7 @@ class MDS(BaseTransformer):
543
549
  subproject=_SUBPROJECT,
544
550
  )
545
551
  output_result, fitted_estimator = model_trainer.train_fit_predict(
546
- pass_through_columns=self._get_pass_through_columns(dataset),
552
+ drop_input_cols=self._drop_input_cols,
547
553
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
548
554
  )
549
555
  self._sklearn_object = fitted_estimator
@@ -561,44 +567,6 @@ class MDS(BaseTransformer):
561
567
  assert self._sklearn_object is not None
562
568
  return self._sklearn_object.embedding_
563
569
 
564
-
565
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
566
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
567
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
568
- """
569
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
570
- if output_cols:
571
- output_cols = [
572
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
573
- for c in output_cols
574
- ]
575
- elif getattr(self._sklearn_object, "classes_", None) is None:
576
- output_cols = [output_cols_prefix]
577
- elif self._sklearn_object is not None:
578
- classes = self._sklearn_object.classes_
579
- if isinstance(classes, numpy.ndarray):
580
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
581
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
582
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
583
- output_cols = []
584
- for i, cl in enumerate(classes):
585
- # For binary classification, there is only one output column for each class
586
- # ndarray as the two classes are complementary.
587
- if len(cl) == 2:
588
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
589
- else:
590
- output_cols.extend([
591
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
592
- ])
593
- else:
594
- output_cols = []
595
-
596
- # Make sure column names are valid snowflake identifiers.
597
- assert output_cols is not None # Make MyPy happy
598
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
599
-
600
- return rv
601
-
602
570
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
603
571
  @telemetry.send_api_usage_telemetry(
604
572
  project=_PROJECT,
@@ -638,7 +606,7 @@ class MDS(BaseTransformer):
638
606
  transform_kwargs = dict(
639
607
  session=dataset._session,
640
608
  dependencies=self._deps,
641
- pass_through_cols=self._get_pass_through_columns(dataset),
609
+ drop_input_cols = self._drop_input_cols,
642
610
  expected_output_cols_type="float",
643
611
  )
644
612
 
@@ -703,7 +671,7 @@ class MDS(BaseTransformer):
703
671
  transform_kwargs = dict(
704
672
  session=dataset._session,
705
673
  dependencies=self._deps,
706
- pass_through_cols=self._get_pass_through_columns(dataset),
674
+ drop_input_cols = self._drop_input_cols,
707
675
  expected_output_cols_type="float",
708
676
  )
709
677
  elif isinstance(dataset, pd.DataFrame):
@@ -764,7 +732,7 @@ class MDS(BaseTransformer):
764
732
  transform_kwargs = dict(
765
733
  session=dataset._session,
766
734
  dependencies=self._deps,
767
- pass_through_cols=self._get_pass_through_columns(dataset),
735
+ drop_input_cols = self._drop_input_cols,
768
736
  expected_output_cols_type="float",
769
737
  )
770
738
 
@@ -829,7 +797,7 @@ class MDS(BaseTransformer):
829
797
  transform_kwargs = dict(
830
798
  session=dataset._session,
831
799
  dependencies=self._deps,
832
- pass_through_cols=self._get_pass_through_columns(dataset),
800
+ drop_input_cols = self._drop_input_cols,
833
801
  expected_output_cols_type="float",
834
802
  )
835
803
 
@@ -883,13 +851,17 @@ class MDS(BaseTransformer):
883
851
  transform_kwargs: ScoreKwargsTypedDict = dict()
884
852
 
885
853
  if isinstance(dataset, DataFrame):
854
+ self._deps = self._batch_inference_validate_snowpark(
855
+ dataset=dataset,
856
+ inference_method="score",
857
+ )
886
858
  selected_cols = self._get_active_columns()
887
859
  if len(selected_cols) > 0:
888
860
  dataset = dataset.select(selected_cols)
889
861
  assert isinstance(dataset._session, Session) # keep mypy happy
890
862
  transform_kwargs = dict(
891
863
  session=dataset._session,
892
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
864
+ dependencies=["snowflake-snowpark-python"] + self._deps,
893
865
  score_sproc_imports=['sklearn'],
894
866
  )
895
867
  elif isinstance(dataset, pd.DataFrame):
@@ -963,9 +935,9 @@ class MDS(BaseTransformer):
963
935
  transform_kwargs = dict(
964
936
  session = dataset._session,
965
937
  dependencies = self._deps,
966
- pass_through_cols = self._get_pass_through_columns(dataset),
967
- expected_output_cols_type = "array",
968
- n_neighbors = n_neighbors,
938
+ drop_input_cols = self._drop_input_cols,
939
+ expected_output_cols_type="array",
940
+ n_neighbors = n_neighbors,
969
941
  return_distance = return_distance
970
942
  )
971
943
  elif isinstance(dataset, pd.DataFrame):
@@ -324,18 +324,24 @@ class SpectralEmbedding(BaseTransformer):
324
324
  self._get_model_signatures(dataset)
325
325
  return self
326
326
 
327
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
328
- if self._drop_input_cols:
329
- return []
330
- else:
331
- return list(set(dataset.columns) - set(self.output_cols))
332
-
333
327
  def _batch_inference_validate_snowpark(
334
328
  self,
335
329
  dataset: DataFrame,
336
330
  inference_method: str,
337
331
  ) -> List[str]:
338
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
332
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
333
+ return the available package that exists in the snowflake anaconda channel
334
+
335
+ Args:
336
+ dataset: snowpark dataframe
337
+ inference_method: the inference method such as predict, score...
338
+
339
+ Raises:
340
+ SnowflakeMLException: If the estimator is not fitted, raise error
341
+ SnowflakeMLException: If the session is None, raise error
342
+
343
+ Returns:
344
+ A list of available package that exists in the snowflake anaconda channel
339
345
  """
340
346
  if not self._is_fitted:
341
347
  raise exceptions.SnowflakeMLException(
@@ -407,7 +413,7 @@ class SpectralEmbedding(BaseTransformer):
407
413
  transform_kwargs = dict(
408
414
  session = dataset._session,
409
415
  dependencies = self._deps,
410
- pass_through_cols = self._get_pass_through_columns(dataset),
416
+ drop_input_cols = self._drop_input_cols,
411
417
  expected_output_cols_type = expected_type_inferred,
412
418
  )
413
419
 
@@ -467,16 +473,16 @@ class SpectralEmbedding(BaseTransformer):
467
473
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
468
474
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
469
475
  # each row containing a list of values.
470
- expected_dtype = "ARRAY"
476
+ expected_dtype = "array"
471
477
 
472
478
  # If we were unable to assign a type to this transform in the factory, infer the type here.
473
479
  if expected_dtype == "":
474
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
480
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
475
481
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
476
- expected_dtype = "ARRAY"
477
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
482
+ expected_dtype = "array"
483
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
478
484
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
479
- expected_dtype = "ARRAY"
485
+ expected_dtype = "array"
480
486
  else:
481
487
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
482
488
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -494,7 +500,7 @@ class SpectralEmbedding(BaseTransformer):
494
500
  transform_kwargs = dict(
495
501
  session = dataset._session,
496
502
  dependencies = self._deps,
497
- pass_through_cols = self._get_pass_through_columns(dataset),
503
+ drop_input_cols = self._drop_input_cols,
498
504
  expected_output_cols_type = expected_dtype,
499
505
  )
500
506
 
@@ -545,7 +551,7 @@ class SpectralEmbedding(BaseTransformer):
545
551
  subproject=_SUBPROJECT,
546
552
  )
547
553
  output_result, fitted_estimator = model_trainer.train_fit_predict(
548
- pass_through_columns=self._get_pass_through_columns(dataset),
554
+ drop_input_cols=self._drop_input_cols,
549
555
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
550
556
  )
551
557
  self._sklearn_object = fitted_estimator
@@ -563,44 +569,6 @@ class SpectralEmbedding(BaseTransformer):
563
569
  assert self._sklearn_object is not None
564
570
  return self._sklearn_object.embedding_
565
571
 
566
-
567
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
568
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
569
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
570
- """
571
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
572
- if output_cols:
573
- output_cols = [
574
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
575
- for c in output_cols
576
- ]
577
- elif getattr(self._sklearn_object, "classes_", None) is None:
578
- output_cols = [output_cols_prefix]
579
- elif self._sklearn_object is not None:
580
- classes = self._sklearn_object.classes_
581
- if isinstance(classes, numpy.ndarray):
582
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
583
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
584
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
585
- output_cols = []
586
- for i, cl in enumerate(classes):
587
- # For binary classification, there is only one output column for each class
588
- # ndarray as the two classes are complementary.
589
- if len(cl) == 2:
590
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
591
- else:
592
- output_cols.extend([
593
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
594
- ])
595
- else:
596
- output_cols = []
597
-
598
- # Make sure column names are valid snowflake identifiers.
599
- assert output_cols is not None # Make MyPy happy
600
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
601
-
602
- return rv
603
-
604
572
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
605
573
  @telemetry.send_api_usage_telemetry(
606
574
  project=_PROJECT,
@@ -640,7 +608,7 @@ class SpectralEmbedding(BaseTransformer):
640
608
  transform_kwargs = dict(
641
609
  session=dataset._session,
642
610
  dependencies=self._deps,
643
- pass_through_cols=self._get_pass_through_columns(dataset),
611
+ drop_input_cols = self._drop_input_cols,
644
612
  expected_output_cols_type="float",
645
613
  )
646
614
 
@@ -705,7 +673,7 @@ class SpectralEmbedding(BaseTransformer):
705
673
  transform_kwargs = dict(
706
674
  session=dataset._session,
707
675
  dependencies=self._deps,
708
- pass_through_cols=self._get_pass_through_columns(dataset),
676
+ drop_input_cols = self._drop_input_cols,
709
677
  expected_output_cols_type="float",
710
678
  )
711
679
  elif isinstance(dataset, pd.DataFrame):
@@ -766,7 +734,7 @@ class SpectralEmbedding(BaseTransformer):
766
734
  transform_kwargs = dict(
767
735
  session=dataset._session,
768
736
  dependencies=self._deps,
769
- pass_through_cols=self._get_pass_through_columns(dataset),
737
+ drop_input_cols = self._drop_input_cols,
770
738
  expected_output_cols_type="float",
771
739
  )
772
740
 
@@ -831,7 +799,7 @@ class SpectralEmbedding(BaseTransformer):
831
799
  transform_kwargs = dict(
832
800
  session=dataset._session,
833
801
  dependencies=self._deps,
834
- pass_through_cols=self._get_pass_through_columns(dataset),
802
+ drop_input_cols = self._drop_input_cols,
835
803
  expected_output_cols_type="float",
836
804
  )
837
805
 
@@ -885,13 +853,17 @@ class SpectralEmbedding(BaseTransformer):
885
853
  transform_kwargs: ScoreKwargsTypedDict = dict()
886
854
 
887
855
  if isinstance(dataset, DataFrame):
856
+ self._deps = self._batch_inference_validate_snowpark(
857
+ dataset=dataset,
858
+ inference_method="score",
859
+ )
888
860
  selected_cols = self._get_active_columns()
889
861
  if len(selected_cols) > 0:
890
862
  dataset = dataset.select(selected_cols)
891
863
  assert isinstance(dataset._session, Session) # keep mypy happy
892
864
  transform_kwargs = dict(
893
865
  session=dataset._session,
894
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
866
+ dependencies=["snowflake-snowpark-python"] + self._deps,
895
867
  score_sproc_imports=['sklearn'],
896
868
  )
897
869
  elif isinstance(dataset, pd.DataFrame):
@@ -965,9 +937,9 @@ class SpectralEmbedding(BaseTransformer):
965
937
  transform_kwargs = dict(
966
938
  session = dataset._session,
967
939
  dependencies = self._deps,
968
- pass_through_cols = self._get_pass_through_columns(dataset),
969
- expected_output_cols_type = "array",
970
- n_neighbors = n_neighbors,
940
+ drop_input_cols = self._drop_input_cols,
941
+ expected_output_cols_type="array",
942
+ n_neighbors = n_neighbors,
971
943
  return_distance = return_distance
972
944
  )
973
945
  elif isinstance(dataset, pd.DataFrame):
@@ -383,18 +383,24 @@ class TSNE(BaseTransformer):
383
383
  self._get_model_signatures(dataset)
384
384
  return self
385
385
 
386
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
387
- if self._drop_input_cols:
388
- return []
389
- else:
390
- return list(set(dataset.columns) - set(self.output_cols))
391
-
392
386
  def _batch_inference_validate_snowpark(
393
387
  self,
394
388
  dataset: DataFrame,
395
389
  inference_method: str,
396
390
  ) -> List[str]:
397
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
391
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
392
+ return the available package that exists in the snowflake anaconda channel
393
+
394
+ Args:
395
+ dataset: snowpark dataframe
396
+ inference_method: the inference method such as predict, score...
397
+
398
+ Raises:
399
+ SnowflakeMLException: If the estimator is not fitted, raise error
400
+ SnowflakeMLException: If the session is None, raise error
401
+
402
+ Returns:
403
+ A list of available package that exists in the snowflake anaconda channel
398
404
  """
399
405
  if not self._is_fitted:
400
406
  raise exceptions.SnowflakeMLException(
@@ -466,7 +472,7 @@ class TSNE(BaseTransformer):
466
472
  transform_kwargs = dict(
467
473
  session = dataset._session,
468
474
  dependencies = self._deps,
469
- pass_through_cols = self._get_pass_through_columns(dataset),
475
+ drop_input_cols = self._drop_input_cols,
470
476
  expected_output_cols_type = expected_type_inferred,
471
477
  )
472
478
 
@@ -526,16 +532,16 @@ class TSNE(BaseTransformer):
526
532
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
527
533
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
528
534
  # each row containing a list of values.
529
- expected_dtype = "ARRAY"
535
+ expected_dtype = "array"
530
536
 
531
537
  # If we were unable to assign a type to this transform in the factory, infer the type here.
532
538
  if expected_dtype == "":
533
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
539
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
534
540
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
535
- expected_dtype = "ARRAY"
536
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
541
+ expected_dtype = "array"
542
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
537
543
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
538
- expected_dtype = "ARRAY"
544
+ expected_dtype = "array"
539
545
  else:
540
546
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
541
547
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -553,7 +559,7 @@ class TSNE(BaseTransformer):
553
559
  transform_kwargs = dict(
554
560
  session = dataset._session,
555
561
  dependencies = self._deps,
556
- pass_through_cols = self._get_pass_through_columns(dataset),
562
+ drop_input_cols = self._drop_input_cols,
557
563
  expected_output_cols_type = expected_dtype,
558
564
  )
559
565
 
@@ -604,7 +610,7 @@ class TSNE(BaseTransformer):
604
610
  subproject=_SUBPROJECT,
605
611
  )
606
612
  output_result, fitted_estimator = model_trainer.train_fit_predict(
607
- pass_through_columns=self._get_pass_through_columns(dataset),
613
+ drop_input_cols=self._drop_input_cols,
608
614
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
609
615
  )
610
616
  self._sklearn_object = fitted_estimator
@@ -622,44 +628,6 @@ class TSNE(BaseTransformer):
622
628
  assert self._sklearn_object is not None
623
629
  return self._sklearn_object.embedding_
624
630
 
625
-
626
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
627
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
628
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
629
- """
630
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
631
- if output_cols:
632
- output_cols = [
633
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
634
- for c in output_cols
635
- ]
636
- elif getattr(self._sklearn_object, "classes_", None) is None:
637
- output_cols = [output_cols_prefix]
638
- elif self._sklearn_object is not None:
639
- classes = self._sklearn_object.classes_
640
- if isinstance(classes, numpy.ndarray):
641
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
642
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
643
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
644
- output_cols = []
645
- for i, cl in enumerate(classes):
646
- # For binary classification, there is only one output column for each class
647
- # ndarray as the two classes are complementary.
648
- if len(cl) == 2:
649
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
650
- else:
651
- output_cols.extend([
652
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
653
- ])
654
- else:
655
- output_cols = []
656
-
657
- # Make sure column names are valid snowflake identifiers.
658
- assert output_cols is not None # Make MyPy happy
659
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
660
-
661
- return rv
662
-
663
631
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
664
632
  @telemetry.send_api_usage_telemetry(
665
633
  project=_PROJECT,
@@ -699,7 +667,7 @@ class TSNE(BaseTransformer):
699
667
  transform_kwargs = dict(
700
668
  session=dataset._session,
701
669
  dependencies=self._deps,
702
- pass_through_cols=self._get_pass_through_columns(dataset),
670
+ drop_input_cols = self._drop_input_cols,
703
671
  expected_output_cols_type="float",
704
672
  )
705
673
 
@@ -764,7 +732,7 @@ class TSNE(BaseTransformer):
764
732
  transform_kwargs = dict(
765
733
  session=dataset._session,
766
734
  dependencies=self._deps,
767
- pass_through_cols=self._get_pass_through_columns(dataset),
735
+ drop_input_cols = self._drop_input_cols,
768
736
  expected_output_cols_type="float",
769
737
  )
770
738
  elif isinstance(dataset, pd.DataFrame):
@@ -825,7 +793,7 @@ class TSNE(BaseTransformer):
825
793
  transform_kwargs = dict(
826
794
  session=dataset._session,
827
795
  dependencies=self._deps,
828
- pass_through_cols=self._get_pass_through_columns(dataset),
796
+ drop_input_cols = self._drop_input_cols,
829
797
  expected_output_cols_type="float",
830
798
  )
831
799
 
@@ -890,7 +858,7 @@ class TSNE(BaseTransformer):
890
858
  transform_kwargs = dict(
891
859
  session=dataset._session,
892
860
  dependencies=self._deps,
893
- pass_through_cols=self._get_pass_through_columns(dataset),
861
+ drop_input_cols = self._drop_input_cols,
894
862
  expected_output_cols_type="float",
895
863
  )
896
864
 
@@ -944,13 +912,17 @@ class TSNE(BaseTransformer):
944
912
  transform_kwargs: ScoreKwargsTypedDict = dict()
945
913
 
946
914
  if isinstance(dataset, DataFrame):
915
+ self._deps = self._batch_inference_validate_snowpark(
916
+ dataset=dataset,
917
+ inference_method="score",
918
+ )
947
919
  selected_cols = self._get_active_columns()
948
920
  if len(selected_cols) > 0:
949
921
  dataset = dataset.select(selected_cols)
950
922
  assert isinstance(dataset._session, Session) # keep mypy happy
951
923
  transform_kwargs = dict(
952
924
  session=dataset._session,
953
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
925
+ dependencies=["snowflake-snowpark-python"] + self._deps,
954
926
  score_sproc_imports=['sklearn'],
955
927
  )
956
928
  elif isinstance(dataset, pd.DataFrame):
@@ -1024,9 +996,9 @@ class TSNE(BaseTransformer):
1024
996
  transform_kwargs = dict(
1025
997
  session = dataset._session,
1026
998
  dependencies = self._deps,
1027
- pass_through_cols = self._get_pass_through_columns(dataset),
1028
- expected_output_cols_type = "array",
1029
- n_neighbors = n_neighbors,
999
+ drop_input_cols = self._drop_input_cols,
1000
+ expected_output_cols_type="array",
1001
+ n_neighbors = n_neighbors,
1030
1002
  return_distance = return_distance
1031
1003
  )
1032
1004
  elif isinstance(dataset, pd.DataFrame):