snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -321,18 +321,24 @@ class NuSVR(BaseTransformer):
321
321
  self._get_model_signatures(dataset)
322
322
  return self
323
323
 
324
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
325
- if self._drop_input_cols:
326
- return []
327
- else:
328
- return list(set(dataset.columns) - set(self.output_cols))
329
-
330
324
  def _batch_inference_validate_snowpark(
331
325
  self,
332
326
  dataset: DataFrame,
333
327
  inference_method: str,
334
328
  ) -> List[str]:
335
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
329
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
330
+ return the available package that exists in the snowflake anaconda channel
331
+
332
+ Args:
333
+ dataset: snowpark dataframe
334
+ inference_method: the inference method such as predict, score...
335
+
336
+ Raises:
337
+ SnowflakeMLException: If the estimator is not fitted, raise error
338
+ SnowflakeMLException: If the session is None, raise error
339
+
340
+ Returns:
341
+ A list of available package that exists in the snowflake anaconda channel
336
342
  """
337
343
  if not self._is_fitted:
338
344
  raise exceptions.SnowflakeMLException(
@@ -406,7 +412,7 @@ class NuSVR(BaseTransformer):
406
412
  transform_kwargs = dict(
407
413
  session = dataset._session,
408
414
  dependencies = self._deps,
409
- pass_through_cols = self._get_pass_through_columns(dataset),
415
+ drop_input_cols = self._drop_input_cols,
410
416
  expected_output_cols_type = expected_type_inferred,
411
417
  )
412
418
 
@@ -466,16 +472,16 @@ class NuSVR(BaseTransformer):
466
472
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
467
473
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
468
474
  # each row containing a list of values.
469
- expected_dtype = "ARRAY"
475
+ expected_dtype = "array"
470
476
 
471
477
  # If we were unable to assign a type to this transform in the factory, infer the type here.
472
478
  if expected_dtype == "":
473
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
479
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
474
480
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
475
- expected_dtype = "ARRAY"
476
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
481
+ expected_dtype = "array"
482
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
477
483
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
478
- expected_dtype = "ARRAY"
484
+ expected_dtype = "array"
479
485
  else:
480
486
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
481
487
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -493,7 +499,7 @@ class NuSVR(BaseTransformer):
493
499
  transform_kwargs = dict(
494
500
  session = dataset._session,
495
501
  dependencies = self._deps,
496
- pass_through_cols = self._get_pass_through_columns(dataset),
502
+ drop_input_cols = self._drop_input_cols,
497
503
  expected_output_cols_type = expected_dtype,
498
504
  )
499
505
 
@@ -544,7 +550,7 @@ class NuSVR(BaseTransformer):
544
550
  subproject=_SUBPROJECT,
545
551
  )
546
552
  output_result, fitted_estimator = model_trainer.train_fit_predict(
547
- pass_through_columns=self._get_pass_through_columns(dataset),
553
+ drop_input_cols=self._drop_input_cols,
548
554
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
549
555
  )
550
556
  self._sklearn_object = fitted_estimator
@@ -562,44 +568,6 @@ class NuSVR(BaseTransformer):
562
568
  assert self._sklearn_object is not None
563
569
  return self._sklearn_object.embedding_
564
570
 
565
-
566
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
567
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
568
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
569
- """
570
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
571
- if output_cols:
572
- output_cols = [
573
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
574
- for c in output_cols
575
- ]
576
- elif getattr(self._sklearn_object, "classes_", None) is None:
577
- output_cols = [output_cols_prefix]
578
- elif self._sklearn_object is not None:
579
- classes = self._sklearn_object.classes_
580
- if isinstance(classes, numpy.ndarray):
581
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
582
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
583
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
584
- output_cols = []
585
- for i, cl in enumerate(classes):
586
- # For binary classification, there is only one output column for each class
587
- # ndarray as the two classes are complementary.
588
- if len(cl) == 2:
589
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
590
- else:
591
- output_cols.extend([
592
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
593
- ])
594
- else:
595
- output_cols = []
596
-
597
- # Make sure column names are valid snowflake identifiers.
598
- assert output_cols is not None # Make MyPy happy
599
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
600
-
601
- return rv
602
-
603
571
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
604
572
  @telemetry.send_api_usage_telemetry(
605
573
  project=_PROJECT,
@@ -639,7 +607,7 @@ class NuSVR(BaseTransformer):
639
607
  transform_kwargs = dict(
640
608
  session=dataset._session,
641
609
  dependencies=self._deps,
642
- pass_through_cols=self._get_pass_through_columns(dataset),
610
+ drop_input_cols = self._drop_input_cols,
643
611
  expected_output_cols_type="float",
644
612
  )
645
613
 
@@ -704,7 +672,7 @@ class NuSVR(BaseTransformer):
704
672
  transform_kwargs = dict(
705
673
  session=dataset._session,
706
674
  dependencies=self._deps,
707
- pass_through_cols=self._get_pass_through_columns(dataset),
675
+ drop_input_cols = self._drop_input_cols,
708
676
  expected_output_cols_type="float",
709
677
  )
710
678
  elif isinstance(dataset, pd.DataFrame):
@@ -765,7 +733,7 @@ class NuSVR(BaseTransformer):
765
733
  transform_kwargs = dict(
766
734
  session=dataset._session,
767
735
  dependencies=self._deps,
768
- pass_through_cols=self._get_pass_through_columns(dataset),
736
+ drop_input_cols = self._drop_input_cols,
769
737
  expected_output_cols_type="float",
770
738
  )
771
739
 
@@ -830,7 +798,7 @@ class NuSVR(BaseTransformer):
830
798
  transform_kwargs = dict(
831
799
  session=dataset._session,
832
800
  dependencies=self._deps,
833
- pass_through_cols=self._get_pass_through_columns(dataset),
801
+ drop_input_cols = self._drop_input_cols,
834
802
  expected_output_cols_type="float",
835
803
  )
836
804
 
@@ -886,13 +854,17 @@ class NuSVR(BaseTransformer):
886
854
  transform_kwargs: ScoreKwargsTypedDict = dict()
887
855
 
888
856
  if isinstance(dataset, DataFrame):
857
+ self._deps = self._batch_inference_validate_snowpark(
858
+ dataset=dataset,
859
+ inference_method="score",
860
+ )
889
861
  selected_cols = self._get_active_columns()
890
862
  if len(selected_cols) > 0:
891
863
  dataset = dataset.select(selected_cols)
892
864
  assert isinstance(dataset._session, Session) # keep mypy happy
893
865
  transform_kwargs = dict(
894
866
  session=dataset._session,
895
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
867
+ dependencies=["snowflake-snowpark-python"] + self._deps,
896
868
  score_sproc_imports=['sklearn'],
897
869
  )
898
870
  elif isinstance(dataset, pd.DataFrame):
@@ -966,9 +938,9 @@ class NuSVR(BaseTransformer):
966
938
  transform_kwargs = dict(
967
939
  session = dataset._session,
968
940
  dependencies = self._deps,
969
- pass_through_cols = self._get_pass_through_columns(dataset),
970
- expected_output_cols_type = "array",
971
- n_neighbors = n_neighbors,
941
+ drop_input_cols = self._drop_input_cols,
942
+ expected_output_cols_type="array",
943
+ n_neighbors = n_neighbors,
972
944
  return_distance = return_distance
973
945
  )
974
946
  elif isinstance(dataset, pd.DataFrame):
@@ -363,18 +363,24 @@ class SVC(BaseTransformer):
363
363
  self._get_model_signatures(dataset)
364
364
  return self
365
365
 
366
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
367
- if self._drop_input_cols:
368
- return []
369
- else:
370
- return list(set(dataset.columns) - set(self.output_cols))
371
-
372
366
  def _batch_inference_validate_snowpark(
373
367
  self,
374
368
  dataset: DataFrame,
375
369
  inference_method: str,
376
370
  ) -> List[str]:
377
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
371
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
372
+ return the available package that exists in the snowflake anaconda channel
373
+
374
+ Args:
375
+ dataset: snowpark dataframe
376
+ inference_method: the inference method such as predict, score...
377
+
378
+ Raises:
379
+ SnowflakeMLException: If the estimator is not fitted, raise error
380
+ SnowflakeMLException: If the session is None, raise error
381
+
382
+ Returns:
383
+ A list of available package that exists in the snowflake anaconda channel
378
384
  """
379
385
  if not self._is_fitted:
380
386
  raise exceptions.SnowflakeMLException(
@@ -448,7 +454,7 @@ class SVC(BaseTransformer):
448
454
  transform_kwargs = dict(
449
455
  session = dataset._session,
450
456
  dependencies = self._deps,
451
- pass_through_cols = self._get_pass_through_columns(dataset),
457
+ drop_input_cols = self._drop_input_cols,
452
458
  expected_output_cols_type = expected_type_inferred,
453
459
  )
454
460
 
@@ -508,16 +514,16 @@ class SVC(BaseTransformer):
508
514
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
509
515
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
510
516
  # each row containing a list of values.
511
- expected_dtype = "ARRAY"
517
+ expected_dtype = "array"
512
518
 
513
519
  # If we were unable to assign a type to this transform in the factory, infer the type here.
514
520
  if expected_dtype == "":
515
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
521
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
516
522
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
517
- expected_dtype = "ARRAY"
518
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
523
+ expected_dtype = "array"
524
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
519
525
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
520
- expected_dtype = "ARRAY"
526
+ expected_dtype = "array"
521
527
  else:
522
528
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
523
529
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -535,7 +541,7 @@ class SVC(BaseTransformer):
535
541
  transform_kwargs = dict(
536
542
  session = dataset._session,
537
543
  dependencies = self._deps,
538
- pass_through_cols = self._get_pass_through_columns(dataset),
544
+ drop_input_cols = self._drop_input_cols,
539
545
  expected_output_cols_type = expected_dtype,
540
546
  )
541
547
 
@@ -586,7 +592,7 @@ class SVC(BaseTransformer):
586
592
  subproject=_SUBPROJECT,
587
593
  )
588
594
  output_result, fitted_estimator = model_trainer.train_fit_predict(
589
- pass_through_columns=self._get_pass_through_columns(dataset),
595
+ drop_input_cols=self._drop_input_cols,
590
596
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
591
597
  )
592
598
  self._sklearn_object = fitted_estimator
@@ -604,44 +610,6 @@ class SVC(BaseTransformer):
604
610
  assert self._sklearn_object is not None
605
611
  return self._sklearn_object.embedding_
606
612
 
607
-
608
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
609
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
610
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
611
- """
612
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
613
- if output_cols:
614
- output_cols = [
615
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
616
- for c in output_cols
617
- ]
618
- elif getattr(self._sklearn_object, "classes_", None) is None:
619
- output_cols = [output_cols_prefix]
620
- elif self._sklearn_object is not None:
621
- classes = self._sklearn_object.classes_
622
- if isinstance(classes, numpy.ndarray):
623
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
624
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
625
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
626
- output_cols = []
627
- for i, cl in enumerate(classes):
628
- # For binary classification, there is only one output column for each class
629
- # ndarray as the two classes are complementary.
630
- if len(cl) == 2:
631
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
632
- else:
633
- output_cols.extend([
634
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
635
- ])
636
- else:
637
- output_cols = []
638
-
639
- # Make sure column names are valid snowflake identifiers.
640
- assert output_cols is not None # Make MyPy happy
641
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
642
-
643
- return rv
644
-
645
613
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
646
614
  @telemetry.send_api_usage_telemetry(
647
615
  project=_PROJECT,
@@ -683,7 +651,7 @@ class SVC(BaseTransformer):
683
651
  transform_kwargs = dict(
684
652
  session=dataset._session,
685
653
  dependencies=self._deps,
686
- pass_through_cols=self._get_pass_through_columns(dataset),
654
+ drop_input_cols = self._drop_input_cols,
687
655
  expected_output_cols_type="float",
688
656
  )
689
657
 
@@ -750,7 +718,7 @@ class SVC(BaseTransformer):
750
718
  transform_kwargs = dict(
751
719
  session=dataset._session,
752
720
  dependencies=self._deps,
753
- pass_through_cols=self._get_pass_through_columns(dataset),
721
+ drop_input_cols = self._drop_input_cols,
754
722
  expected_output_cols_type="float",
755
723
  )
756
724
  elif isinstance(dataset, pd.DataFrame):
@@ -813,7 +781,7 @@ class SVC(BaseTransformer):
813
781
  transform_kwargs = dict(
814
782
  session=dataset._session,
815
783
  dependencies=self._deps,
816
- pass_through_cols=self._get_pass_through_columns(dataset),
784
+ drop_input_cols = self._drop_input_cols,
817
785
  expected_output_cols_type="float",
818
786
  )
819
787
 
@@ -878,7 +846,7 @@ class SVC(BaseTransformer):
878
846
  transform_kwargs = dict(
879
847
  session=dataset._session,
880
848
  dependencies=self._deps,
881
- pass_through_cols=self._get_pass_through_columns(dataset),
849
+ drop_input_cols = self._drop_input_cols,
882
850
  expected_output_cols_type="float",
883
851
  )
884
852
 
@@ -934,13 +902,17 @@ class SVC(BaseTransformer):
934
902
  transform_kwargs: ScoreKwargsTypedDict = dict()
935
903
 
936
904
  if isinstance(dataset, DataFrame):
905
+ self._deps = self._batch_inference_validate_snowpark(
906
+ dataset=dataset,
907
+ inference_method="score",
908
+ )
937
909
  selected_cols = self._get_active_columns()
938
910
  if len(selected_cols) > 0:
939
911
  dataset = dataset.select(selected_cols)
940
912
  assert isinstance(dataset._session, Session) # keep mypy happy
941
913
  transform_kwargs = dict(
942
914
  session=dataset._session,
943
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
915
+ dependencies=["snowflake-snowpark-python"] + self._deps,
944
916
  score_sproc_imports=['sklearn'],
945
917
  )
946
918
  elif isinstance(dataset, pd.DataFrame):
@@ -1014,9 +986,9 @@ class SVC(BaseTransformer):
1014
986
  transform_kwargs = dict(
1015
987
  session = dataset._session,
1016
988
  dependencies = self._deps,
1017
- pass_through_cols = self._get_pass_through_columns(dataset),
1018
- expected_output_cols_type = "array",
1019
- n_neighbors = n_neighbors,
989
+ drop_input_cols = self._drop_input_cols,
990
+ expected_output_cols_type="array",
991
+ n_neighbors = n_neighbors,
1020
992
  return_distance = return_distance
1021
993
  )
1022
994
  elif isinstance(dataset, pd.DataFrame):
@@ -324,18 +324,24 @@ class SVR(BaseTransformer):
324
324
  self._get_model_signatures(dataset)
325
325
  return self
326
326
 
327
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
328
- if self._drop_input_cols:
329
- return []
330
- else:
331
- return list(set(dataset.columns) - set(self.output_cols))
332
-
333
327
  def _batch_inference_validate_snowpark(
334
328
  self,
335
329
  dataset: DataFrame,
336
330
  inference_method: str,
337
331
  ) -> List[str]:
338
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
332
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
333
+ return the available package that exists in the snowflake anaconda channel
334
+
335
+ Args:
336
+ dataset: snowpark dataframe
337
+ inference_method: the inference method such as predict, score...
338
+
339
+ Raises:
340
+ SnowflakeMLException: If the estimator is not fitted, raise error
341
+ SnowflakeMLException: If the session is None, raise error
342
+
343
+ Returns:
344
+ A list of available package that exists in the snowflake anaconda channel
339
345
  """
340
346
  if not self._is_fitted:
341
347
  raise exceptions.SnowflakeMLException(
@@ -409,7 +415,7 @@ class SVR(BaseTransformer):
409
415
  transform_kwargs = dict(
410
416
  session = dataset._session,
411
417
  dependencies = self._deps,
412
- pass_through_cols = self._get_pass_through_columns(dataset),
418
+ drop_input_cols = self._drop_input_cols,
413
419
  expected_output_cols_type = expected_type_inferred,
414
420
  )
415
421
 
@@ -469,16 +475,16 @@ class SVR(BaseTransformer):
469
475
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
470
476
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
471
477
  # each row containing a list of values.
472
- expected_dtype = "ARRAY"
478
+ expected_dtype = "array"
473
479
 
474
480
  # If we were unable to assign a type to this transform in the factory, infer the type here.
475
481
  if expected_dtype == "":
476
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
482
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
477
483
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
478
- expected_dtype = "ARRAY"
479
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
484
+ expected_dtype = "array"
485
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
480
486
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
481
- expected_dtype = "ARRAY"
487
+ expected_dtype = "array"
482
488
  else:
483
489
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
484
490
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -496,7 +502,7 @@ class SVR(BaseTransformer):
496
502
  transform_kwargs = dict(
497
503
  session = dataset._session,
498
504
  dependencies = self._deps,
499
- pass_through_cols = self._get_pass_through_columns(dataset),
505
+ drop_input_cols = self._drop_input_cols,
500
506
  expected_output_cols_type = expected_dtype,
501
507
  )
502
508
 
@@ -547,7 +553,7 @@ class SVR(BaseTransformer):
547
553
  subproject=_SUBPROJECT,
548
554
  )
549
555
  output_result, fitted_estimator = model_trainer.train_fit_predict(
550
- pass_through_columns=self._get_pass_through_columns(dataset),
556
+ drop_input_cols=self._drop_input_cols,
551
557
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
552
558
  )
553
559
  self._sklearn_object = fitted_estimator
@@ -565,44 +571,6 @@ class SVR(BaseTransformer):
565
571
  assert self._sklearn_object is not None
566
572
  return self._sklearn_object.embedding_
567
573
 
568
-
569
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
570
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
571
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
572
- """
573
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
574
- if output_cols:
575
- output_cols = [
576
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
577
- for c in output_cols
578
- ]
579
- elif getattr(self._sklearn_object, "classes_", None) is None:
580
- output_cols = [output_cols_prefix]
581
- elif self._sklearn_object is not None:
582
- classes = self._sklearn_object.classes_
583
- if isinstance(classes, numpy.ndarray):
584
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
585
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
586
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
587
- output_cols = []
588
- for i, cl in enumerate(classes):
589
- # For binary classification, there is only one output column for each class
590
- # ndarray as the two classes are complementary.
591
- if len(cl) == 2:
592
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
593
- else:
594
- output_cols.extend([
595
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
596
- ])
597
- else:
598
- output_cols = []
599
-
600
- # Make sure column names are valid snowflake identifiers.
601
- assert output_cols is not None # Make MyPy happy
602
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
603
-
604
- return rv
605
-
606
574
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
607
575
  @telemetry.send_api_usage_telemetry(
608
576
  project=_PROJECT,
@@ -642,7 +610,7 @@ class SVR(BaseTransformer):
642
610
  transform_kwargs = dict(
643
611
  session=dataset._session,
644
612
  dependencies=self._deps,
645
- pass_through_cols=self._get_pass_through_columns(dataset),
613
+ drop_input_cols = self._drop_input_cols,
646
614
  expected_output_cols_type="float",
647
615
  )
648
616
 
@@ -707,7 +675,7 @@ class SVR(BaseTransformer):
707
675
  transform_kwargs = dict(
708
676
  session=dataset._session,
709
677
  dependencies=self._deps,
710
- pass_through_cols=self._get_pass_through_columns(dataset),
678
+ drop_input_cols = self._drop_input_cols,
711
679
  expected_output_cols_type="float",
712
680
  )
713
681
  elif isinstance(dataset, pd.DataFrame):
@@ -768,7 +736,7 @@ class SVR(BaseTransformer):
768
736
  transform_kwargs = dict(
769
737
  session=dataset._session,
770
738
  dependencies=self._deps,
771
- pass_through_cols=self._get_pass_through_columns(dataset),
739
+ drop_input_cols = self._drop_input_cols,
772
740
  expected_output_cols_type="float",
773
741
  )
774
742
 
@@ -833,7 +801,7 @@ class SVR(BaseTransformer):
833
801
  transform_kwargs = dict(
834
802
  session=dataset._session,
835
803
  dependencies=self._deps,
836
- pass_through_cols=self._get_pass_through_columns(dataset),
804
+ drop_input_cols = self._drop_input_cols,
837
805
  expected_output_cols_type="float",
838
806
  )
839
807
 
@@ -889,13 +857,17 @@ class SVR(BaseTransformer):
889
857
  transform_kwargs: ScoreKwargsTypedDict = dict()
890
858
 
891
859
  if isinstance(dataset, DataFrame):
860
+ self._deps = self._batch_inference_validate_snowpark(
861
+ dataset=dataset,
862
+ inference_method="score",
863
+ )
892
864
  selected_cols = self._get_active_columns()
893
865
  if len(selected_cols) > 0:
894
866
  dataset = dataset.select(selected_cols)
895
867
  assert isinstance(dataset._session, Session) # keep mypy happy
896
868
  transform_kwargs = dict(
897
869
  session=dataset._session,
898
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
870
+ dependencies=["snowflake-snowpark-python"] + self._deps,
899
871
  score_sproc_imports=['sklearn'],
900
872
  )
901
873
  elif isinstance(dataset, pd.DataFrame):
@@ -969,9 +941,9 @@ class SVR(BaseTransformer):
969
941
  transform_kwargs = dict(
970
942
  session = dataset._session,
971
943
  dependencies = self._deps,
972
- pass_through_cols = self._get_pass_through_columns(dataset),
973
- expected_output_cols_type = "array",
974
- n_neighbors = n_neighbors,
944
+ drop_input_cols = self._drop_input_cols,
945
+ expected_output_cols_type="array",
946
+ n_neighbors = n_neighbors,
975
947
  return_distance = return_distance
976
948
  )
977
949
  elif isinstance(dataset, pd.DataFrame):