snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -318,18 +318,24 @@ class LinearDiscriminantAnalysis(BaseTransformer):
318
318
  self._get_model_signatures(dataset)
319
319
  return self
320
320
 
321
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
322
- if self._drop_input_cols:
323
- return []
324
- else:
325
- return list(set(dataset.columns) - set(self.output_cols))
326
-
327
321
  def _batch_inference_validate_snowpark(
328
322
  self,
329
323
  dataset: DataFrame,
330
324
  inference_method: str,
331
325
  ) -> List[str]:
332
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
326
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
327
+ return the available package that exists in the snowflake anaconda channel
328
+
329
+ Args:
330
+ dataset: snowpark dataframe
331
+ inference_method: the inference method such as predict, score...
332
+
333
+ Raises:
334
+ SnowflakeMLException: If the estimator is not fitted, raise error
335
+ SnowflakeMLException: If the session is None, raise error
336
+
337
+ Returns:
338
+ A list of available package that exists in the snowflake anaconda channel
333
339
  """
334
340
  if not self._is_fitted:
335
341
  raise exceptions.SnowflakeMLException(
@@ -403,7 +409,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
403
409
  transform_kwargs = dict(
404
410
  session = dataset._session,
405
411
  dependencies = self._deps,
406
- pass_through_cols = self._get_pass_through_columns(dataset),
412
+ drop_input_cols = self._drop_input_cols,
407
413
  expected_output_cols_type = expected_type_inferred,
408
414
  )
409
415
 
@@ -465,16 +471,16 @@ class LinearDiscriminantAnalysis(BaseTransformer):
465
471
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
466
472
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
467
473
  # each row containing a list of values.
468
- expected_dtype = "ARRAY"
474
+ expected_dtype = "array"
469
475
 
470
476
  # If we were unable to assign a type to this transform in the factory, infer the type here.
471
477
  if expected_dtype == "":
472
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
478
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
473
479
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
474
- expected_dtype = "ARRAY"
475
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
480
+ expected_dtype = "array"
481
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
476
482
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
477
- expected_dtype = "ARRAY"
483
+ expected_dtype = "array"
478
484
  else:
479
485
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
480
486
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -492,7 +498,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
492
498
  transform_kwargs = dict(
493
499
  session = dataset._session,
494
500
  dependencies = self._deps,
495
- pass_through_cols = self._get_pass_through_columns(dataset),
501
+ drop_input_cols = self._drop_input_cols,
496
502
  expected_output_cols_type = expected_dtype,
497
503
  )
498
504
 
@@ -543,7 +549,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
543
549
  subproject=_SUBPROJECT,
544
550
  )
545
551
  output_result, fitted_estimator = model_trainer.train_fit_predict(
546
- pass_through_columns=self._get_pass_through_columns(dataset),
552
+ drop_input_cols=self._drop_input_cols,
547
553
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
548
554
  )
549
555
  self._sklearn_object = fitted_estimator
@@ -561,44 +567,6 @@ class LinearDiscriminantAnalysis(BaseTransformer):
561
567
  assert self._sklearn_object is not None
562
568
  return self._sklearn_object.embedding_
563
569
 
564
-
565
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
566
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
567
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
568
- """
569
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
570
- if output_cols:
571
- output_cols = [
572
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
573
- for c in output_cols
574
- ]
575
- elif getattr(self._sklearn_object, "classes_", None) is None:
576
- output_cols = [output_cols_prefix]
577
- elif self._sklearn_object is not None:
578
- classes = self._sklearn_object.classes_
579
- if isinstance(classes, numpy.ndarray):
580
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
581
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
582
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
583
- output_cols = []
584
- for i, cl in enumerate(classes):
585
- # For binary classification, there is only one output column for each class
586
- # ndarray as the two classes are complementary.
587
- if len(cl) == 2:
588
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
589
- else:
590
- output_cols.extend([
591
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
592
- ])
593
- else:
594
- output_cols = []
595
-
596
- # Make sure column names are valid snowflake identifiers.
597
- assert output_cols is not None # Make MyPy happy
598
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
599
-
600
- return rv
601
-
602
570
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
603
571
  @telemetry.send_api_usage_telemetry(
604
572
  project=_PROJECT,
@@ -640,7 +608,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
640
608
  transform_kwargs = dict(
641
609
  session=dataset._session,
642
610
  dependencies=self._deps,
643
- pass_through_cols=self._get_pass_through_columns(dataset),
611
+ drop_input_cols = self._drop_input_cols,
644
612
  expected_output_cols_type="float",
645
613
  )
646
614
 
@@ -707,7 +675,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
707
675
  transform_kwargs = dict(
708
676
  session=dataset._session,
709
677
  dependencies=self._deps,
710
- pass_through_cols=self._get_pass_through_columns(dataset),
678
+ drop_input_cols = self._drop_input_cols,
711
679
  expected_output_cols_type="float",
712
680
  )
713
681
  elif isinstance(dataset, pd.DataFrame):
@@ -770,7 +738,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
770
738
  transform_kwargs = dict(
771
739
  session=dataset._session,
772
740
  dependencies=self._deps,
773
- pass_through_cols=self._get_pass_through_columns(dataset),
741
+ drop_input_cols = self._drop_input_cols,
774
742
  expected_output_cols_type="float",
775
743
  )
776
744
 
@@ -835,7 +803,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
835
803
  transform_kwargs = dict(
836
804
  session=dataset._session,
837
805
  dependencies=self._deps,
838
- pass_through_cols=self._get_pass_through_columns(dataset),
806
+ drop_input_cols = self._drop_input_cols,
839
807
  expected_output_cols_type="float",
840
808
  )
841
809
 
@@ -891,13 +859,17 @@ class LinearDiscriminantAnalysis(BaseTransformer):
891
859
  transform_kwargs: ScoreKwargsTypedDict = dict()
892
860
 
893
861
  if isinstance(dataset, DataFrame):
862
+ self._deps = self._batch_inference_validate_snowpark(
863
+ dataset=dataset,
864
+ inference_method="score",
865
+ )
894
866
  selected_cols = self._get_active_columns()
895
867
  if len(selected_cols) > 0:
896
868
  dataset = dataset.select(selected_cols)
897
869
  assert isinstance(dataset._session, Session) # keep mypy happy
898
870
  transform_kwargs = dict(
899
871
  session=dataset._session,
900
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
872
+ dependencies=["snowflake-snowpark-python"] + self._deps,
901
873
  score_sproc_imports=['sklearn'],
902
874
  )
903
875
  elif isinstance(dataset, pd.DataFrame):
@@ -971,9 +943,9 @@ class LinearDiscriminantAnalysis(BaseTransformer):
971
943
  transform_kwargs = dict(
972
944
  session = dataset._session,
973
945
  dependencies = self._deps,
974
- pass_through_cols = self._get_pass_through_columns(dataset),
975
- expected_output_cols_type = "array",
976
- n_neighbors = n_neighbors,
946
+ drop_input_cols = self._drop_input_cols,
947
+ expected_output_cols_type="array",
948
+ n_neighbors = n_neighbors,
977
949
  return_distance = return_distance
978
950
  )
979
951
  elif isinstance(dataset, pd.DataFrame):
@@ -280,18 +280,24 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
280
280
  self._get_model_signatures(dataset)
281
281
  return self
282
282
 
283
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
284
- if self._drop_input_cols:
285
- return []
286
- else:
287
- return list(set(dataset.columns) - set(self.output_cols))
288
-
289
283
  def _batch_inference_validate_snowpark(
290
284
  self,
291
285
  dataset: DataFrame,
292
286
  inference_method: str,
293
287
  ) -> List[str]:
294
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
288
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
289
+ return the available package that exists in the snowflake anaconda channel
290
+
291
+ Args:
292
+ dataset: snowpark dataframe
293
+ inference_method: the inference method such as predict, score...
294
+
295
+ Raises:
296
+ SnowflakeMLException: If the estimator is not fitted, raise error
297
+ SnowflakeMLException: If the session is None, raise error
298
+
299
+ Returns:
300
+ A list of available package that exists in the snowflake anaconda channel
295
301
  """
296
302
  if not self._is_fitted:
297
303
  raise exceptions.SnowflakeMLException(
@@ -365,7 +371,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
365
371
  transform_kwargs = dict(
366
372
  session = dataset._session,
367
373
  dependencies = self._deps,
368
- pass_through_cols = self._get_pass_through_columns(dataset),
374
+ drop_input_cols = self._drop_input_cols,
369
375
  expected_output_cols_type = expected_type_inferred,
370
376
  )
371
377
 
@@ -425,16 +431,16 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
425
431
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
426
432
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
427
433
  # each row containing a list of values.
428
- expected_dtype = "ARRAY"
434
+ expected_dtype = "array"
429
435
 
430
436
  # If we were unable to assign a type to this transform in the factory, infer the type here.
431
437
  if expected_dtype == "":
432
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
438
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
433
439
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
434
- expected_dtype = "ARRAY"
435
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
440
+ expected_dtype = "array"
441
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
436
442
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
437
- expected_dtype = "ARRAY"
443
+ expected_dtype = "array"
438
444
  else:
439
445
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
440
446
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -452,7 +458,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
452
458
  transform_kwargs = dict(
453
459
  session = dataset._session,
454
460
  dependencies = self._deps,
455
- pass_through_cols = self._get_pass_through_columns(dataset),
461
+ drop_input_cols = self._drop_input_cols,
456
462
  expected_output_cols_type = expected_dtype,
457
463
  )
458
464
 
@@ -503,7 +509,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
503
509
  subproject=_SUBPROJECT,
504
510
  )
505
511
  output_result, fitted_estimator = model_trainer.train_fit_predict(
506
- pass_through_columns=self._get_pass_through_columns(dataset),
512
+ drop_input_cols=self._drop_input_cols,
507
513
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
508
514
  )
509
515
  self._sklearn_object = fitted_estimator
@@ -521,44 +527,6 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
521
527
  assert self._sklearn_object is not None
522
528
  return self._sklearn_object.embedding_
523
529
 
524
-
525
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
526
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
527
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
528
- """
529
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
530
- if output_cols:
531
- output_cols = [
532
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
533
- for c in output_cols
534
- ]
535
- elif getattr(self._sklearn_object, "classes_", None) is None:
536
- output_cols = [output_cols_prefix]
537
- elif self._sklearn_object is not None:
538
- classes = self._sklearn_object.classes_
539
- if isinstance(classes, numpy.ndarray):
540
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
541
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
542
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
543
- output_cols = []
544
- for i, cl in enumerate(classes):
545
- # For binary classification, there is only one output column for each class
546
- # ndarray as the two classes are complementary.
547
- if len(cl) == 2:
548
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
549
- else:
550
- output_cols.extend([
551
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
552
- ])
553
- else:
554
- output_cols = []
555
-
556
- # Make sure column names are valid snowflake identifiers.
557
- assert output_cols is not None # Make MyPy happy
558
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
559
-
560
- return rv
561
-
562
530
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
563
531
  @telemetry.send_api_usage_telemetry(
564
532
  project=_PROJECT,
@@ -600,7 +568,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
600
568
  transform_kwargs = dict(
601
569
  session=dataset._session,
602
570
  dependencies=self._deps,
603
- pass_through_cols=self._get_pass_through_columns(dataset),
571
+ drop_input_cols = self._drop_input_cols,
604
572
  expected_output_cols_type="float",
605
573
  )
606
574
 
@@ -667,7 +635,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
667
635
  transform_kwargs = dict(
668
636
  session=dataset._session,
669
637
  dependencies=self._deps,
670
- pass_through_cols=self._get_pass_through_columns(dataset),
638
+ drop_input_cols = self._drop_input_cols,
671
639
  expected_output_cols_type="float",
672
640
  )
673
641
  elif isinstance(dataset, pd.DataFrame):
@@ -730,7 +698,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
730
698
  transform_kwargs = dict(
731
699
  session=dataset._session,
732
700
  dependencies=self._deps,
733
- pass_through_cols=self._get_pass_through_columns(dataset),
701
+ drop_input_cols = self._drop_input_cols,
734
702
  expected_output_cols_type="float",
735
703
  )
736
704
 
@@ -795,7 +763,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
795
763
  transform_kwargs = dict(
796
764
  session=dataset._session,
797
765
  dependencies=self._deps,
798
- pass_through_cols=self._get_pass_through_columns(dataset),
766
+ drop_input_cols = self._drop_input_cols,
799
767
  expected_output_cols_type="float",
800
768
  )
801
769
 
@@ -851,13 +819,17 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
851
819
  transform_kwargs: ScoreKwargsTypedDict = dict()
852
820
 
853
821
  if isinstance(dataset, DataFrame):
822
+ self._deps = self._batch_inference_validate_snowpark(
823
+ dataset=dataset,
824
+ inference_method="score",
825
+ )
854
826
  selected_cols = self._get_active_columns()
855
827
  if len(selected_cols) > 0:
856
828
  dataset = dataset.select(selected_cols)
857
829
  assert isinstance(dataset._session, Session) # keep mypy happy
858
830
  transform_kwargs = dict(
859
831
  session=dataset._session,
860
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
832
+ dependencies=["snowflake-snowpark-python"] + self._deps,
861
833
  score_sproc_imports=['sklearn'],
862
834
  )
863
835
  elif isinstance(dataset, pd.DataFrame):
@@ -931,9 +903,9 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
931
903
  transform_kwargs = dict(
932
904
  session = dataset._session,
933
905
  dependencies = self._deps,
934
- pass_through_cols = self._get_pass_through_columns(dataset),
935
- expected_output_cols_type = "array",
936
- n_neighbors = n_neighbors,
906
+ drop_input_cols = self._drop_input_cols,
907
+ expected_output_cols_type="array",
908
+ n_neighbors = n_neighbors,
937
909
  return_distance = return_distance
938
910
  )
939
911
  elif isinstance(dataset, pd.DataFrame):
@@ -305,18 +305,24 @@ class AdaBoostClassifier(BaseTransformer):
305
305
  self._get_model_signatures(dataset)
306
306
  return self
307
307
 
308
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
309
- if self._drop_input_cols:
310
- return []
311
- else:
312
- return list(set(dataset.columns) - set(self.output_cols))
313
-
314
308
  def _batch_inference_validate_snowpark(
315
309
  self,
316
310
  dataset: DataFrame,
317
311
  inference_method: str,
318
312
  ) -> List[str]:
319
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
313
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
314
+ return the available package that exists in the snowflake anaconda channel
315
+
316
+ Args:
317
+ dataset: snowpark dataframe
318
+ inference_method: the inference method such as predict, score...
319
+
320
+ Raises:
321
+ SnowflakeMLException: If the estimator is not fitted, raise error
322
+ SnowflakeMLException: If the session is None, raise error
323
+
324
+ Returns:
325
+ A list of available package that exists in the snowflake anaconda channel
320
326
  """
321
327
  if not self._is_fitted:
322
328
  raise exceptions.SnowflakeMLException(
@@ -390,7 +396,7 @@ class AdaBoostClassifier(BaseTransformer):
390
396
  transform_kwargs = dict(
391
397
  session = dataset._session,
392
398
  dependencies = self._deps,
393
- pass_through_cols = self._get_pass_through_columns(dataset),
399
+ drop_input_cols = self._drop_input_cols,
394
400
  expected_output_cols_type = expected_type_inferred,
395
401
  )
396
402
 
@@ -450,16 +456,16 @@ class AdaBoostClassifier(BaseTransformer):
450
456
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
451
457
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
452
458
  # each row containing a list of values.
453
- expected_dtype = "ARRAY"
459
+ expected_dtype = "array"
454
460
 
455
461
  # If we were unable to assign a type to this transform in the factory, infer the type here.
456
462
  if expected_dtype == "":
457
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
463
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
458
464
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
459
- expected_dtype = "ARRAY"
460
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
465
+ expected_dtype = "array"
466
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
461
467
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
462
- expected_dtype = "ARRAY"
468
+ expected_dtype = "array"
463
469
  else:
464
470
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
465
471
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -477,7 +483,7 @@ class AdaBoostClassifier(BaseTransformer):
477
483
  transform_kwargs = dict(
478
484
  session = dataset._session,
479
485
  dependencies = self._deps,
480
- pass_through_cols = self._get_pass_through_columns(dataset),
486
+ drop_input_cols = self._drop_input_cols,
481
487
  expected_output_cols_type = expected_dtype,
482
488
  )
483
489
 
@@ -528,7 +534,7 @@ class AdaBoostClassifier(BaseTransformer):
528
534
  subproject=_SUBPROJECT,
529
535
  )
530
536
  output_result, fitted_estimator = model_trainer.train_fit_predict(
531
- pass_through_columns=self._get_pass_through_columns(dataset),
537
+ drop_input_cols=self._drop_input_cols,
532
538
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
533
539
  )
534
540
  self._sklearn_object = fitted_estimator
@@ -546,44 +552,6 @@ class AdaBoostClassifier(BaseTransformer):
546
552
  assert self._sklearn_object is not None
547
553
  return self._sklearn_object.embedding_
548
554
 
549
-
550
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
551
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
552
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
553
- """
554
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
555
- if output_cols:
556
- output_cols = [
557
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
558
- for c in output_cols
559
- ]
560
- elif getattr(self._sklearn_object, "classes_", None) is None:
561
- output_cols = [output_cols_prefix]
562
- elif self._sklearn_object is not None:
563
- classes = self._sklearn_object.classes_
564
- if isinstance(classes, numpy.ndarray):
565
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
566
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
567
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
568
- output_cols = []
569
- for i, cl in enumerate(classes):
570
- # For binary classification, there is only one output column for each class
571
- # ndarray as the two classes are complementary.
572
- if len(cl) == 2:
573
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
574
- else:
575
- output_cols.extend([
576
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
577
- ])
578
- else:
579
- output_cols = []
580
-
581
- # Make sure column names are valid snowflake identifiers.
582
- assert output_cols is not None # Make MyPy happy
583
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
584
-
585
- return rv
586
-
587
555
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
588
556
  @telemetry.send_api_usage_telemetry(
589
557
  project=_PROJECT,
@@ -625,7 +593,7 @@ class AdaBoostClassifier(BaseTransformer):
625
593
  transform_kwargs = dict(
626
594
  session=dataset._session,
627
595
  dependencies=self._deps,
628
- pass_through_cols=self._get_pass_through_columns(dataset),
596
+ drop_input_cols = self._drop_input_cols,
629
597
  expected_output_cols_type="float",
630
598
  )
631
599
 
@@ -692,7 +660,7 @@ class AdaBoostClassifier(BaseTransformer):
692
660
  transform_kwargs = dict(
693
661
  session=dataset._session,
694
662
  dependencies=self._deps,
695
- pass_through_cols=self._get_pass_through_columns(dataset),
663
+ drop_input_cols = self._drop_input_cols,
696
664
  expected_output_cols_type="float",
697
665
  )
698
666
  elif isinstance(dataset, pd.DataFrame):
@@ -755,7 +723,7 @@ class AdaBoostClassifier(BaseTransformer):
755
723
  transform_kwargs = dict(
756
724
  session=dataset._session,
757
725
  dependencies=self._deps,
758
- pass_through_cols=self._get_pass_through_columns(dataset),
726
+ drop_input_cols = self._drop_input_cols,
759
727
  expected_output_cols_type="float",
760
728
  )
761
729
 
@@ -820,7 +788,7 @@ class AdaBoostClassifier(BaseTransformer):
820
788
  transform_kwargs = dict(
821
789
  session=dataset._session,
822
790
  dependencies=self._deps,
823
- pass_through_cols=self._get_pass_through_columns(dataset),
791
+ drop_input_cols = self._drop_input_cols,
824
792
  expected_output_cols_type="float",
825
793
  )
826
794
 
@@ -876,13 +844,17 @@ class AdaBoostClassifier(BaseTransformer):
876
844
  transform_kwargs: ScoreKwargsTypedDict = dict()
877
845
 
878
846
  if isinstance(dataset, DataFrame):
847
+ self._deps = self._batch_inference_validate_snowpark(
848
+ dataset=dataset,
849
+ inference_method="score",
850
+ )
879
851
  selected_cols = self._get_active_columns()
880
852
  if len(selected_cols) > 0:
881
853
  dataset = dataset.select(selected_cols)
882
854
  assert isinstance(dataset._session, Session) # keep mypy happy
883
855
  transform_kwargs = dict(
884
856
  session=dataset._session,
885
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
857
+ dependencies=["snowflake-snowpark-python"] + self._deps,
886
858
  score_sproc_imports=['sklearn'],
887
859
  )
888
860
  elif isinstance(dataset, pd.DataFrame):
@@ -956,9 +928,9 @@ class AdaBoostClassifier(BaseTransformer):
956
928
  transform_kwargs = dict(
957
929
  session = dataset._session,
958
930
  dependencies = self._deps,
959
- pass_through_cols = self._get_pass_through_columns(dataset),
960
- expected_output_cols_type = "array",
961
- n_neighbors = n_neighbors,
931
+ drop_input_cols = self._drop_input_cols,
932
+ expected_output_cols_type="array",
933
+ n_neighbors = n_neighbors,
962
934
  return_distance = return_distance
963
935
  )
964
936
  elif isinstance(dataset, pd.DataFrame):