snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -354,18 +354,24 @@ class LinearSVC(BaseTransformer):
354
354
  self._get_model_signatures(dataset)
355
355
  return self
356
356
 
357
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
358
- if self._drop_input_cols:
359
- return []
360
- else:
361
- return list(set(dataset.columns) - set(self.output_cols))
362
-
363
357
  def _batch_inference_validate_snowpark(
364
358
  self,
365
359
  dataset: DataFrame,
366
360
  inference_method: str,
367
361
  ) -> List[str]:
368
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
362
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
363
+ return the available package that exists in the snowflake anaconda channel
364
+
365
+ Args:
366
+ dataset: snowpark dataframe
367
+ inference_method: the inference method such as predict, score...
368
+
369
+ Raises:
370
+ SnowflakeMLException: If the estimator is not fitted, raise error
371
+ SnowflakeMLException: If the session is None, raise error
372
+
373
+ Returns:
374
+ A list of available package that exists in the snowflake anaconda channel
369
375
  """
370
376
  if not self._is_fitted:
371
377
  raise exceptions.SnowflakeMLException(
@@ -439,7 +445,7 @@ class LinearSVC(BaseTransformer):
439
445
  transform_kwargs = dict(
440
446
  session = dataset._session,
441
447
  dependencies = self._deps,
442
- pass_through_cols = self._get_pass_through_columns(dataset),
448
+ drop_input_cols = self._drop_input_cols,
443
449
  expected_output_cols_type = expected_type_inferred,
444
450
  )
445
451
 
@@ -499,16 +505,16 @@ class LinearSVC(BaseTransformer):
499
505
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
500
506
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
501
507
  # each row containing a list of values.
502
- expected_dtype = "ARRAY"
508
+ expected_dtype = "array"
503
509
 
504
510
  # If we were unable to assign a type to this transform in the factory, infer the type here.
505
511
  if expected_dtype == "":
506
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
512
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
507
513
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
508
- expected_dtype = "ARRAY"
509
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
514
+ expected_dtype = "array"
515
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
510
516
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
511
- expected_dtype = "ARRAY"
517
+ expected_dtype = "array"
512
518
  else:
513
519
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
514
520
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -526,7 +532,7 @@ class LinearSVC(BaseTransformer):
526
532
  transform_kwargs = dict(
527
533
  session = dataset._session,
528
534
  dependencies = self._deps,
529
- pass_through_cols = self._get_pass_through_columns(dataset),
535
+ drop_input_cols = self._drop_input_cols,
530
536
  expected_output_cols_type = expected_dtype,
531
537
  )
532
538
 
@@ -577,7 +583,7 @@ class LinearSVC(BaseTransformer):
577
583
  subproject=_SUBPROJECT,
578
584
  )
579
585
  output_result, fitted_estimator = model_trainer.train_fit_predict(
580
- pass_through_columns=self._get_pass_through_columns(dataset),
586
+ drop_input_cols=self._drop_input_cols,
581
587
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
582
588
  )
583
589
  self._sklearn_object = fitted_estimator
@@ -595,44 +601,6 @@ class LinearSVC(BaseTransformer):
595
601
  assert self._sklearn_object is not None
596
602
  return self._sklearn_object.embedding_
597
603
 
598
-
599
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
600
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
601
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
602
- """
603
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
604
- if output_cols:
605
- output_cols = [
606
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
607
- for c in output_cols
608
- ]
609
- elif getattr(self._sklearn_object, "classes_", None) is None:
610
- output_cols = [output_cols_prefix]
611
- elif self._sklearn_object is not None:
612
- classes = self._sklearn_object.classes_
613
- if isinstance(classes, numpy.ndarray):
614
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
615
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
616
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
617
- output_cols = []
618
- for i, cl in enumerate(classes):
619
- # For binary classification, there is only one output column for each class
620
- # ndarray as the two classes are complementary.
621
- if len(cl) == 2:
622
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
623
- else:
624
- output_cols.extend([
625
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
626
- ])
627
- else:
628
- output_cols = []
629
-
630
- # Make sure column names are valid snowflake identifiers.
631
- assert output_cols is not None # Make MyPy happy
632
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
633
-
634
- return rv
635
-
636
604
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
637
605
  @telemetry.send_api_usage_telemetry(
638
606
  project=_PROJECT,
@@ -672,7 +640,7 @@ class LinearSVC(BaseTransformer):
672
640
  transform_kwargs = dict(
673
641
  session=dataset._session,
674
642
  dependencies=self._deps,
675
- pass_through_cols=self._get_pass_through_columns(dataset),
643
+ drop_input_cols = self._drop_input_cols,
676
644
  expected_output_cols_type="float",
677
645
  )
678
646
 
@@ -737,7 +705,7 @@ class LinearSVC(BaseTransformer):
737
705
  transform_kwargs = dict(
738
706
  session=dataset._session,
739
707
  dependencies=self._deps,
740
- pass_through_cols=self._get_pass_through_columns(dataset),
708
+ drop_input_cols = self._drop_input_cols,
741
709
  expected_output_cols_type="float",
742
710
  )
743
711
  elif isinstance(dataset, pd.DataFrame):
@@ -800,7 +768,7 @@ class LinearSVC(BaseTransformer):
800
768
  transform_kwargs = dict(
801
769
  session=dataset._session,
802
770
  dependencies=self._deps,
803
- pass_through_cols=self._get_pass_through_columns(dataset),
771
+ drop_input_cols = self._drop_input_cols,
804
772
  expected_output_cols_type="float",
805
773
  )
806
774
 
@@ -865,7 +833,7 @@ class LinearSVC(BaseTransformer):
865
833
  transform_kwargs = dict(
866
834
  session=dataset._session,
867
835
  dependencies=self._deps,
868
- pass_through_cols=self._get_pass_through_columns(dataset),
836
+ drop_input_cols = self._drop_input_cols,
869
837
  expected_output_cols_type="float",
870
838
  )
871
839
 
@@ -921,13 +889,17 @@ class LinearSVC(BaseTransformer):
921
889
  transform_kwargs: ScoreKwargsTypedDict = dict()
922
890
 
923
891
  if isinstance(dataset, DataFrame):
892
+ self._deps = self._batch_inference_validate_snowpark(
893
+ dataset=dataset,
894
+ inference_method="score",
895
+ )
924
896
  selected_cols = self._get_active_columns()
925
897
  if len(selected_cols) > 0:
926
898
  dataset = dataset.select(selected_cols)
927
899
  assert isinstance(dataset._session, Session) # keep mypy happy
928
900
  transform_kwargs = dict(
929
901
  session=dataset._session,
930
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
902
+ dependencies=["snowflake-snowpark-python"] + self._deps,
931
903
  score_sproc_imports=['sklearn'],
932
904
  )
933
905
  elif isinstance(dataset, pd.DataFrame):
@@ -1001,9 +973,9 @@ class LinearSVC(BaseTransformer):
1001
973
  transform_kwargs = dict(
1002
974
  session = dataset._session,
1003
975
  dependencies = self._deps,
1004
- pass_through_cols = self._get_pass_through_columns(dataset),
1005
- expected_output_cols_type = "array",
1006
- n_neighbors = n_neighbors,
976
+ drop_input_cols = self._drop_input_cols,
977
+ expected_output_cols_type="array",
978
+ n_neighbors = n_neighbors,
1007
979
  return_distance = return_distance
1008
980
  )
1009
981
  elif isinstance(dataset, pd.DataFrame):
@@ -326,18 +326,24 @@ class LinearSVR(BaseTransformer):
326
326
  self._get_model_signatures(dataset)
327
327
  return self
328
328
 
329
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
330
- if self._drop_input_cols:
331
- return []
332
- else:
333
- return list(set(dataset.columns) - set(self.output_cols))
334
-
335
329
  def _batch_inference_validate_snowpark(
336
330
  self,
337
331
  dataset: DataFrame,
338
332
  inference_method: str,
339
333
  ) -> List[str]:
340
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
334
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
335
+ return the available package that exists in the snowflake anaconda channel
336
+
337
+ Args:
338
+ dataset: snowpark dataframe
339
+ inference_method: the inference method such as predict, score...
340
+
341
+ Raises:
342
+ SnowflakeMLException: If the estimator is not fitted, raise error
343
+ SnowflakeMLException: If the session is None, raise error
344
+
345
+ Returns:
346
+ A list of available package that exists in the snowflake anaconda channel
341
347
  """
342
348
  if not self._is_fitted:
343
349
  raise exceptions.SnowflakeMLException(
@@ -411,7 +417,7 @@ class LinearSVR(BaseTransformer):
411
417
  transform_kwargs = dict(
412
418
  session = dataset._session,
413
419
  dependencies = self._deps,
414
- pass_through_cols = self._get_pass_through_columns(dataset),
420
+ drop_input_cols = self._drop_input_cols,
415
421
  expected_output_cols_type = expected_type_inferred,
416
422
  )
417
423
 
@@ -471,16 +477,16 @@ class LinearSVR(BaseTransformer):
471
477
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
472
478
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
473
479
  # each row containing a list of values.
474
- expected_dtype = "ARRAY"
480
+ expected_dtype = "array"
475
481
 
476
482
  # If we were unable to assign a type to this transform in the factory, infer the type here.
477
483
  if expected_dtype == "":
478
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
484
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
479
485
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
480
- expected_dtype = "ARRAY"
481
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
486
+ expected_dtype = "array"
487
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
482
488
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
483
- expected_dtype = "ARRAY"
489
+ expected_dtype = "array"
484
490
  else:
485
491
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
486
492
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -498,7 +504,7 @@ class LinearSVR(BaseTransformer):
498
504
  transform_kwargs = dict(
499
505
  session = dataset._session,
500
506
  dependencies = self._deps,
501
- pass_through_cols = self._get_pass_through_columns(dataset),
507
+ drop_input_cols = self._drop_input_cols,
502
508
  expected_output_cols_type = expected_dtype,
503
509
  )
504
510
 
@@ -549,7 +555,7 @@ class LinearSVR(BaseTransformer):
549
555
  subproject=_SUBPROJECT,
550
556
  )
551
557
  output_result, fitted_estimator = model_trainer.train_fit_predict(
552
- pass_through_columns=self._get_pass_through_columns(dataset),
558
+ drop_input_cols=self._drop_input_cols,
553
559
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
554
560
  )
555
561
  self._sklearn_object = fitted_estimator
@@ -567,44 +573,6 @@ class LinearSVR(BaseTransformer):
567
573
  assert self._sklearn_object is not None
568
574
  return self._sklearn_object.embedding_
569
575
 
570
-
571
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
572
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
573
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
574
- """
575
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
576
- if output_cols:
577
- output_cols = [
578
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
579
- for c in output_cols
580
- ]
581
- elif getattr(self._sklearn_object, "classes_", None) is None:
582
- output_cols = [output_cols_prefix]
583
- elif self._sklearn_object is not None:
584
- classes = self._sklearn_object.classes_
585
- if isinstance(classes, numpy.ndarray):
586
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
587
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
588
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
589
- output_cols = []
590
- for i, cl in enumerate(classes):
591
- # For binary classification, there is only one output column for each class
592
- # ndarray as the two classes are complementary.
593
- if len(cl) == 2:
594
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
595
- else:
596
- output_cols.extend([
597
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
598
- ])
599
- else:
600
- output_cols = []
601
-
602
- # Make sure column names are valid snowflake identifiers.
603
- assert output_cols is not None # Make MyPy happy
604
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
605
-
606
- return rv
607
-
608
576
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
609
577
  @telemetry.send_api_usage_telemetry(
610
578
  project=_PROJECT,
@@ -644,7 +612,7 @@ class LinearSVR(BaseTransformer):
644
612
  transform_kwargs = dict(
645
613
  session=dataset._session,
646
614
  dependencies=self._deps,
647
- pass_through_cols=self._get_pass_through_columns(dataset),
615
+ drop_input_cols = self._drop_input_cols,
648
616
  expected_output_cols_type="float",
649
617
  )
650
618
 
@@ -709,7 +677,7 @@ class LinearSVR(BaseTransformer):
709
677
  transform_kwargs = dict(
710
678
  session=dataset._session,
711
679
  dependencies=self._deps,
712
- pass_through_cols=self._get_pass_through_columns(dataset),
680
+ drop_input_cols = self._drop_input_cols,
713
681
  expected_output_cols_type="float",
714
682
  )
715
683
  elif isinstance(dataset, pd.DataFrame):
@@ -770,7 +738,7 @@ class LinearSVR(BaseTransformer):
770
738
  transform_kwargs = dict(
771
739
  session=dataset._session,
772
740
  dependencies=self._deps,
773
- pass_through_cols=self._get_pass_through_columns(dataset),
741
+ drop_input_cols = self._drop_input_cols,
774
742
  expected_output_cols_type="float",
775
743
  )
776
744
 
@@ -835,7 +803,7 @@ class LinearSVR(BaseTransformer):
835
803
  transform_kwargs = dict(
836
804
  session=dataset._session,
837
805
  dependencies=self._deps,
838
- pass_through_cols=self._get_pass_through_columns(dataset),
806
+ drop_input_cols = self._drop_input_cols,
839
807
  expected_output_cols_type="float",
840
808
  )
841
809
 
@@ -891,13 +859,17 @@ class LinearSVR(BaseTransformer):
891
859
  transform_kwargs: ScoreKwargsTypedDict = dict()
892
860
 
893
861
  if isinstance(dataset, DataFrame):
862
+ self._deps = self._batch_inference_validate_snowpark(
863
+ dataset=dataset,
864
+ inference_method="score",
865
+ )
894
866
  selected_cols = self._get_active_columns()
895
867
  if len(selected_cols) > 0:
896
868
  dataset = dataset.select(selected_cols)
897
869
  assert isinstance(dataset._session, Session) # keep mypy happy
898
870
  transform_kwargs = dict(
899
871
  session=dataset._session,
900
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
872
+ dependencies=["snowflake-snowpark-python"] + self._deps,
901
873
  score_sproc_imports=['sklearn'],
902
874
  )
903
875
  elif isinstance(dataset, pd.DataFrame):
@@ -971,9 +943,9 @@ class LinearSVR(BaseTransformer):
971
943
  transform_kwargs = dict(
972
944
  session = dataset._session,
973
945
  dependencies = self._deps,
974
- pass_through_cols = self._get_pass_through_columns(dataset),
975
- expected_output_cols_type = "array",
976
- n_neighbors = n_neighbors,
946
+ drop_input_cols = self._drop_input_cols,
947
+ expected_output_cols_type="array",
948
+ n_neighbors = n_neighbors,
977
949
  return_distance = return_distance
978
950
  )
979
951
  elif isinstance(dataset, pd.DataFrame):
@@ -360,18 +360,24 @@ class NuSVC(BaseTransformer):
360
360
  self._get_model_signatures(dataset)
361
361
  return self
362
362
 
363
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
364
- if self._drop_input_cols:
365
- return []
366
- else:
367
- return list(set(dataset.columns) - set(self.output_cols))
368
-
369
363
  def _batch_inference_validate_snowpark(
370
364
  self,
371
365
  dataset: DataFrame,
372
366
  inference_method: str,
373
367
  ) -> List[str]:
374
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
368
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
369
+ return the available package that exists in the snowflake anaconda channel
370
+
371
+ Args:
372
+ dataset: snowpark dataframe
373
+ inference_method: the inference method such as predict, score...
374
+
375
+ Raises:
376
+ SnowflakeMLException: If the estimator is not fitted, raise error
377
+ SnowflakeMLException: If the session is None, raise error
378
+
379
+ Returns:
380
+ A list of available package that exists in the snowflake anaconda channel
375
381
  """
376
382
  if not self._is_fitted:
377
383
  raise exceptions.SnowflakeMLException(
@@ -445,7 +451,7 @@ class NuSVC(BaseTransformer):
445
451
  transform_kwargs = dict(
446
452
  session = dataset._session,
447
453
  dependencies = self._deps,
448
- pass_through_cols = self._get_pass_through_columns(dataset),
454
+ drop_input_cols = self._drop_input_cols,
449
455
  expected_output_cols_type = expected_type_inferred,
450
456
  )
451
457
 
@@ -505,16 +511,16 @@ class NuSVC(BaseTransformer):
505
511
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
506
512
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
507
513
  # each row containing a list of values.
508
- expected_dtype = "ARRAY"
514
+ expected_dtype = "array"
509
515
 
510
516
  # If we were unable to assign a type to this transform in the factory, infer the type here.
511
517
  if expected_dtype == "":
512
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
518
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
513
519
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
514
- expected_dtype = "ARRAY"
515
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
520
+ expected_dtype = "array"
521
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
516
522
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
517
- expected_dtype = "ARRAY"
523
+ expected_dtype = "array"
518
524
  else:
519
525
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
520
526
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -532,7 +538,7 @@ class NuSVC(BaseTransformer):
532
538
  transform_kwargs = dict(
533
539
  session = dataset._session,
534
540
  dependencies = self._deps,
535
- pass_through_cols = self._get_pass_through_columns(dataset),
541
+ drop_input_cols = self._drop_input_cols,
536
542
  expected_output_cols_type = expected_dtype,
537
543
  )
538
544
 
@@ -583,7 +589,7 @@ class NuSVC(BaseTransformer):
583
589
  subproject=_SUBPROJECT,
584
590
  )
585
591
  output_result, fitted_estimator = model_trainer.train_fit_predict(
586
- pass_through_columns=self._get_pass_through_columns(dataset),
592
+ drop_input_cols=self._drop_input_cols,
587
593
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
588
594
  )
589
595
  self._sklearn_object = fitted_estimator
@@ -601,44 +607,6 @@ class NuSVC(BaseTransformer):
601
607
  assert self._sklearn_object is not None
602
608
  return self._sklearn_object.embedding_
603
609
 
604
-
605
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
606
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
607
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
608
- """
609
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
610
- if output_cols:
611
- output_cols = [
612
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
613
- for c in output_cols
614
- ]
615
- elif getattr(self._sklearn_object, "classes_", None) is None:
616
- output_cols = [output_cols_prefix]
617
- elif self._sklearn_object is not None:
618
- classes = self._sklearn_object.classes_
619
- if isinstance(classes, numpy.ndarray):
620
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
621
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
622
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
623
- output_cols = []
624
- for i, cl in enumerate(classes):
625
- # For binary classification, there is only one output column for each class
626
- # ndarray as the two classes are complementary.
627
- if len(cl) == 2:
628
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
629
- else:
630
- output_cols.extend([
631
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
632
- ])
633
- else:
634
- output_cols = []
635
-
636
- # Make sure column names are valid snowflake identifiers.
637
- assert output_cols is not None # Make MyPy happy
638
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
639
-
640
- return rv
641
-
642
610
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
643
611
  @telemetry.send_api_usage_telemetry(
644
612
  project=_PROJECT,
@@ -680,7 +648,7 @@ class NuSVC(BaseTransformer):
680
648
  transform_kwargs = dict(
681
649
  session=dataset._session,
682
650
  dependencies=self._deps,
683
- pass_through_cols=self._get_pass_through_columns(dataset),
651
+ drop_input_cols = self._drop_input_cols,
684
652
  expected_output_cols_type="float",
685
653
  )
686
654
 
@@ -747,7 +715,7 @@ class NuSVC(BaseTransformer):
747
715
  transform_kwargs = dict(
748
716
  session=dataset._session,
749
717
  dependencies=self._deps,
750
- pass_through_cols=self._get_pass_through_columns(dataset),
718
+ drop_input_cols = self._drop_input_cols,
751
719
  expected_output_cols_type="float",
752
720
  )
753
721
  elif isinstance(dataset, pd.DataFrame):
@@ -810,7 +778,7 @@ class NuSVC(BaseTransformer):
810
778
  transform_kwargs = dict(
811
779
  session=dataset._session,
812
780
  dependencies=self._deps,
813
- pass_through_cols=self._get_pass_through_columns(dataset),
781
+ drop_input_cols = self._drop_input_cols,
814
782
  expected_output_cols_type="float",
815
783
  )
816
784
 
@@ -875,7 +843,7 @@ class NuSVC(BaseTransformer):
875
843
  transform_kwargs = dict(
876
844
  session=dataset._session,
877
845
  dependencies=self._deps,
878
- pass_through_cols=self._get_pass_through_columns(dataset),
846
+ drop_input_cols = self._drop_input_cols,
879
847
  expected_output_cols_type="float",
880
848
  )
881
849
 
@@ -931,13 +899,17 @@ class NuSVC(BaseTransformer):
931
899
  transform_kwargs: ScoreKwargsTypedDict = dict()
932
900
 
933
901
  if isinstance(dataset, DataFrame):
902
+ self._deps = self._batch_inference_validate_snowpark(
903
+ dataset=dataset,
904
+ inference_method="score",
905
+ )
934
906
  selected_cols = self._get_active_columns()
935
907
  if len(selected_cols) > 0:
936
908
  dataset = dataset.select(selected_cols)
937
909
  assert isinstance(dataset._session, Session) # keep mypy happy
938
910
  transform_kwargs = dict(
939
911
  session=dataset._session,
940
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
912
+ dependencies=["snowflake-snowpark-python"] + self._deps,
941
913
  score_sproc_imports=['sklearn'],
942
914
  )
943
915
  elif isinstance(dataset, pd.DataFrame):
@@ -1011,9 +983,9 @@ class NuSVC(BaseTransformer):
1011
983
  transform_kwargs = dict(
1012
984
  session = dataset._session,
1013
985
  dependencies = self._deps,
1014
- pass_through_cols = self._get_pass_through_columns(dataset),
1015
- expected_output_cols_type = "array",
1016
- n_neighbors = n_neighbors,
986
+ drop_input_cols = self._drop_input_cols,
987
+ expected_output_cols_type="array",
988
+ n_neighbors = n_neighbors,
1017
989
  return_distance = return_distance
1018
990
  )
1019
991
  elif isinstance(dataset, pd.DataFrame):