snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -362,18 +362,24 @@ class PassiveAggressiveClassifier(BaseTransformer):
362
362
  self._get_model_signatures(dataset)
363
363
  return self
364
364
 
365
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
366
- if self._drop_input_cols:
367
- return []
368
- else:
369
- return list(set(dataset.columns) - set(self.output_cols))
370
-
371
365
  def _batch_inference_validate_snowpark(
372
366
  self,
373
367
  dataset: DataFrame,
374
368
  inference_method: str,
375
369
  ) -> List[str]:
376
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
370
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
371
+ return the available package that exists in the snowflake anaconda channel
372
+
373
+ Args:
374
+ dataset: snowpark dataframe
375
+ inference_method: the inference method such as predict, score...
376
+
377
+ Raises:
378
+ SnowflakeMLException: If the estimator is not fitted, raise error
379
+ SnowflakeMLException: If the session is None, raise error
380
+
381
+ Returns:
382
+ A list of available package that exists in the snowflake anaconda channel
377
383
  """
378
384
  if not self._is_fitted:
379
385
  raise exceptions.SnowflakeMLException(
@@ -447,7 +453,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
447
453
  transform_kwargs = dict(
448
454
  session = dataset._session,
449
455
  dependencies = self._deps,
450
- pass_through_cols = self._get_pass_through_columns(dataset),
456
+ drop_input_cols = self._drop_input_cols,
451
457
  expected_output_cols_type = expected_type_inferred,
452
458
  )
453
459
 
@@ -507,16 +513,16 @@ class PassiveAggressiveClassifier(BaseTransformer):
507
513
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
508
514
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
509
515
  # each row containing a list of values.
510
- expected_dtype = "ARRAY"
516
+ expected_dtype = "array"
511
517
 
512
518
  # If we were unable to assign a type to this transform in the factory, infer the type here.
513
519
  if expected_dtype == "":
514
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
520
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
515
521
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
516
- expected_dtype = "ARRAY"
517
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
522
+ expected_dtype = "array"
523
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
518
524
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
519
- expected_dtype = "ARRAY"
525
+ expected_dtype = "array"
520
526
  else:
521
527
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
522
528
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -534,7 +540,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
534
540
  transform_kwargs = dict(
535
541
  session = dataset._session,
536
542
  dependencies = self._deps,
537
- pass_through_cols = self._get_pass_through_columns(dataset),
543
+ drop_input_cols = self._drop_input_cols,
538
544
  expected_output_cols_type = expected_dtype,
539
545
  )
540
546
 
@@ -585,7 +591,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
585
591
  subproject=_SUBPROJECT,
586
592
  )
587
593
  output_result, fitted_estimator = model_trainer.train_fit_predict(
588
- pass_through_columns=self._get_pass_through_columns(dataset),
594
+ drop_input_cols=self._drop_input_cols,
589
595
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
590
596
  )
591
597
  self._sklearn_object = fitted_estimator
@@ -603,44 +609,6 @@ class PassiveAggressiveClassifier(BaseTransformer):
603
609
  assert self._sklearn_object is not None
604
610
  return self._sklearn_object.embedding_
605
611
 
606
-
607
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
608
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
609
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
610
- """
611
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
612
- if output_cols:
613
- output_cols = [
614
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
615
- for c in output_cols
616
- ]
617
- elif getattr(self._sklearn_object, "classes_", None) is None:
618
- output_cols = [output_cols_prefix]
619
- elif self._sklearn_object is not None:
620
- classes = self._sklearn_object.classes_
621
- if isinstance(classes, numpy.ndarray):
622
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
623
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
624
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
625
- output_cols = []
626
- for i, cl in enumerate(classes):
627
- # For binary classification, there is only one output column for each class
628
- # ndarray as the two classes are complementary.
629
- if len(cl) == 2:
630
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
631
- else:
632
- output_cols.extend([
633
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
634
- ])
635
- else:
636
- output_cols = []
637
-
638
- # Make sure column names are valid snowflake identifiers.
639
- assert output_cols is not None # Make MyPy happy
640
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
641
-
642
- return rv
643
-
644
612
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
645
613
  @telemetry.send_api_usage_telemetry(
646
614
  project=_PROJECT,
@@ -680,7 +648,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
680
648
  transform_kwargs = dict(
681
649
  session=dataset._session,
682
650
  dependencies=self._deps,
683
- pass_through_cols=self._get_pass_through_columns(dataset),
651
+ drop_input_cols = self._drop_input_cols,
684
652
  expected_output_cols_type="float",
685
653
  )
686
654
 
@@ -745,7 +713,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
745
713
  transform_kwargs = dict(
746
714
  session=dataset._session,
747
715
  dependencies=self._deps,
748
- pass_through_cols=self._get_pass_through_columns(dataset),
716
+ drop_input_cols = self._drop_input_cols,
749
717
  expected_output_cols_type="float",
750
718
  )
751
719
  elif isinstance(dataset, pd.DataFrame):
@@ -808,7 +776,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
808
776
  transform_kwargs = dict(
809
777
  session=dataset._session,
810
778
  dependencies=self._deps,
811
- pass_through_cols=self._get_pass_through_columns(dataset),
779
+ drop_input_cols = self._drop_input_cols,
812
780
  expected_output_cols_type="float",
813
781
  )
814
782
 
@@ -873,7 +841,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
873
841
  transform_kwargs = dict(
874
842
  session=dataset._session,
875
843
  dependencies=self._deps,
876
- pass_through_cols=self._get_pass_through_columns(dataset),
844
+ drop_input_cols = self._drop_input_cols,
877
845
  expected_output_cols_type="float",
878
846
  )
879
847
 
@@ -929,13 +897,17 @@ class PassiveAggressiveClassifier(BaseTransformer):
929
897
  transform_kwargs: ScoreKwargsTypedDict = dict()
930
898
 
931
899
  if isinstance(dataset, DataFrame):
900
+ self._deps = self._batch_inference_validate_snowpark(
901
+ dataset=dataset,
902
+ inference_method="score",
903
+ )
932
904
  selected_cols = self._get_active_columns()
933
905
  if len(selected_cols) > 0:
934
906
  dataset = dataset.select(selected_cols)
935
907
  assert isinstance(dataset._session, Session) # keep mypy happy
936
908
  transform_kwargs = dict(
937
909
  session=dataset._session,
938
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
910
+ dependencies=["snowflake-snowpark-python"] + self._deps,
939
911
  score_sproc_imports=['sklearn'],
940
912
  )
941
913
  elif isinstance(dataset, pd.DataFrame):
@@ -1009,9 +981,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
1009
981
  transform_kwargs = dict(
1010
982
  session = dataset._session,
1011
983
  dependencies = self._deps,
1012
- pass_through_cols = self._get_pass_through_columns(dataset),
1013
- expected_output_cols_type = "array",
1014
- n_neighbors = n_neighbors,
984
+ drop_input_cols = self._drop_input_cols,
985
+ expected_output_cols_type="array",
986
+ n_neighbors = n_neighbors,
1015
987
  return_distance = return_distance
1016
988
  )
1017
989
  elif isinstance(dataset, pd.DataFrame):
@@ -348,18 +348,24 @@ class PassiveAggressiveRegressor(BaseTransformer):
348
348
  self._get_model_signatures(dataset)
349
349
  return self
350
350
 
351
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
352
- if self._drop_input_cols:
353
- return []
354
- else:
355
- return list(set(dataset.columns) - set(self.output_cols))
356
-
357
351
  def _batch_inference_validate_snowpark(
358
352
  self,
359
353
  dataset: DataFrame,
360
354
  inference_method: str,
361
355
  ) -> List[str]:
362
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
356
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
357
+ return the available package that exists in the snowflake anaconda channel
358
+
359
+ Args:
360
+ dataset: snowpark dataframe
361
+ inference_method: the inference method such as predict, score...
362
+
363
+ Raises:
364
+ SnowflakeMLException: If the estimator is not fitted, raise error
365
+ SnowflakeMLException: If the session is None, raise error
366
+
367
+ Returns:
368
+ A list of available package that exists in the snowflake anaconda channel
363
369
  """
364
370
  if not self._is_fitted:
365
371
  raise exceptions.SnowflakeMLException(
@@ -433,7 +439,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
433
439
  transform_kwargs = dict(
434
440
  session = dataset._session,
435
441
  dependencies = self._deps,
436
- pass_through_cols = self._get_pass_through_columns(dataset),
442
+ drop_input_cols = self._drop_input_cols,
437
443
  expected_output_cols_type = expected_type_inferred,
438
444
  )
439
445
 
@@ -493,16 +499,16 @@ class PassiveAggressiveRegressor(BaseTransformer):
493
499
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
494
500
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
495
501
  # each row containing a list of values.
496
- expected_dtype = "ARRAY"
502
+ expected_dtype = "array"
497
503
 
498
504
  # If we were unable to assign a type to this transform in the factory, infer the type here.
499
505
  if expected_dtype == "":
500
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
506
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
501
507
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
502
- expected_dtype = "ARRAY"
503
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
508
+ expected_dtype = "array"
509
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
504
510
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
505
- expected_dtype = "ARRAY"
511
+ expected_dtype = "array"
506
512
  else:
507
513
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
508
514
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -520,7 +526,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
520
526
  transform_kwargs = dict(
521
527
  session = dataset._session,
522
528
  dependencies = self._deps,
523
- pass_through_cols = self._get_pass_through_columns(dataset),
529
+ drop_input_cols = self._drop_input_cols,
524
530
  expected_output_cols_type = expected_dtype,
525
531
  )
526
532
 
@@ -571,7 +577,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
571
577
  subproject=_SUBPROJECT,
572
578
  )
573
579
  output_result, fitted_estimator = model_trainer.train_fit_predict(
574
- pass_through_columns=self._get_pass_through_columns(dataset),
580
+ drop_input_cols=self._drop_input_cols,
575
581
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
576
582
  )
577
583
  self._sklearn_object = fitted_estimator
@@ -589,44 +595,6 @@ class PassiveAggressiveRegressor(BaseTransformer):
589
595
  assert self._sklearn_object is not None
590
596
  return self._sklearn_object.embedding_
591
597
 
592
-
593
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
594
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
595
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
596
- """
597
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
598
- if output_cols:
599
- output_cols = [
600
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
601
- for c in output_cols
602
- ]
603
- elif getattr(self._sklearn_object, "classes_", None) is None:
604
- output_cols = [output_cols_prefix]
605
- elif self._sklearn_object is not None:
606
- classes = self._sklearn_object.classes_
607
- if isinstance(classes, numpy.ndarray):
608
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
609
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
610
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
611
- output_cols = []
612
- for i, cl in enumerate(classes):
613
- # For binary classification, there is only one output column for each class
614
- # ndarray as the two classes are complementary.
615
- if len(cl) == 2:
616
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
617
- else:
618
- output_cols.extend([
619
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
620
- ])
621
- else:
622
- output_cols = []
623
-
624
- # Make sure column names are valid snowflake identifiers.
625
- assert output_cols is not None # Make MyPy happy
626
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
627
-
628
- return rv
629
-
630
598
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
631
599
  @telemetry.send_api_usage_telemetry(
632
600
  project=_PROJECT,
@@ -666,7 +634,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
666
634
  transform_kwargs = dict(
667
635
  session=dataset._session,
668
636
  dependencies=self._deps,
669
- pass_through_cols=self._get_pass_through_columns(dataset),
637
+ drop_input_cols = self._drop_input_cols,
670
638
  expected_output_cols_type="float",
671
639
  )
672
640
 
@@ -731,7 +699,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
731
699
  transform_kwargs = dict(
732
700
  session=dataset._session,
733
701
  dependencies=self._deps,
734
- pass_through_cols=self._get_pass_through_columns(dataset),
702
+ drop_input_cols = self._drop_input_cols,
735
703
  expected_output_cols_type="float",
736
704
  )
737
705
  elif isinstance(dataset, pd.DataFrame):
@@ -792,7 +760,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
792
760
  transform_kwargs = dict(
793
761
  session=dataset._session,
794
762
  dependencies=self._deps,
795
- pass_through_cols=self._get_pass_through_columns(dataset),
763
+ drop_input_cols = self._drop_input_cols,
796
764
  expected_output_cols_type="float",
797
765
  )
798
766
 
@@ -857,7 +825,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
857
825
  transform_kwargs = dict(
858
826
  session=dataset._session,
859
827
  dependencies=self._deps,
860
- pass_through_cols=self._get_pass_through_columns(dataset),
828
+ drop_input_cols = self._drop_input_cols,
861
829
  expected_output_cols_type="float",
862
830
  )
863
831
 
@@ -913,13 +881,17 @@ class PassiveAggressiveRegressor(BaseTransformer):
913
881
  transform_kwargs: ScoreKwargsTypedDict = dict()
914
882
 
915
883
  if isinstance(dataset, DataFrame):
884
+ self._deps = self._batch_inference_validate_snowpark(
885
+ dataset=dataset,
886
+ inference_method="score",
887
+ )
916
888
  selected_cols = self._get_active_columns()
917
889
  if len(selected_cols) > 0:
918
890
  dataset = dataset.select(selected_cols)
919
891
  assert isinstance(dataset._session, Session) # keep mypy happy
920
892
  transform_kwargs = dict(
921
893
  session=dataset._session,
922
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
894
+ dependencies=["snowflake-snowpark-python"] + self._deps,
923
895
  score_sproc_imports=['sklearn'],
924
896
  )
925
897
  elif isinstance(dataset, pd.DataFrame):
@@ -993,9 +965,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
993
965
  transform_kwargs = dict(
994
966
  session = dataset._session,
995
967
  dependencies = self._deps,
996
- pass_through_cols = self._get_pass_through_columns(dataset),
997
- expected_output_cols_type = "array",
998
- n_neighbors = n_neighbors,
968
+ drop_input_cols = self._drop_input_cols,
969
+ expected_output_cols_type="array",
970
+ n_neighbors = n_neighbors,
999
971
  return_distance = return_distance
1000
972
  )
1001
973
  elif isinstance(dataset, pd.DataFrame):
@@ -361,18 +361,24 @@ class Perceptron(BaseTransformer):
361
361
  self._get_model_signatures(dataset)
362
362
  return self
363
363
 
364
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
365
- if self._drop_input_cols:
366
- return []
367
- else:
368
- return list(set(dataset.columns) - set(self.output_cols))
369
-
370
364
  def _batch_inference_validate_snowpark(
371
365
  self,
372
366
  dataset: DataFrame,
373
367
  inference_method: str,
374
368
  ) -> List[str]:
375
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
369
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
370
+ return the available package that exists in the snowflake anaconda channel
371
+
372
+ Args:
373
+ dataset: snowpark dataframe
374
+ inference_method: the inference method such as predict, score...
375
+
376
+ Raises:
377
+ SnowflakeMLException: If the estimator is not fitted, raise error
378
+ SnowflakeMLException: If the session is None, raise error
379
+
380
+ Returns:
381
+ A list of available package that exists in the snowflake anaconda channel
376
382
  """
377
383
  if not self._is_fitted:
378
384
  raise exceptions.SnowflakeMLException(
@@ -446,7 +452,7 @@ class Perceptron(BaseTransformer):
446
452
  transform_kwargs = dict(
447
453
  session = dataset._session,
448
454
  dependencies = self._deps,
449
- pass_through_cols = self._get_pass_through_columns(dataset),
455
+ drop_input_cols = self._drop_input_cols,
450
456
  expected_output_cols_type = expected_type_inferred,
451
457
  )
452
458
 
@@ -506,16 +512,16 @@ class Perceptron(BaseTransformer):
506
512
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
507
513
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
508
514
  # each row containing a list of values.
509
- expected_dtype = "ARRAY"
515
+ expected_dtype = "array"
510
516
 
511
517
  # If we were unable to assign a type to this transform in the factory, infer the type here.
512
518
  if expected_dtype == "":
513
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
519
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
514
520
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
515
- expected_dtype = "ARRAY"
516
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
521
+ expected_dtype = "array"
522
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
517
523
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
518
- expected_dtype = "ARRAY"
524
+ expected_dtype = "array"
519
525
  else:
520
526
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
521
527
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -533,7 +539,7 @@ class Perceptron(BaseTransformer):
533
539
  transform_kwargs = dict(
534
540
  session = dataset._session,
535
541
  dependencies = self._deps,
536
- pass_through_cols = self._get_pass_through_columns(dataset),
542
+ drop_input_cols = self._drop_input_cols,
537
543
  expected_output_cols_type = expected_dtype,
538
544
  )
539
545
 
@@ -584,7 +590,7 @@ class Perceptron(BaseTransformer):
584
590
  subproject=_SUBPROJECT,
585
591
  )
586
592
  output_result, fitted_estimator = model_trainer.train_fit_predict(
587
- pass_through_columns=self._get_pass_through_columns(dataset),
593
+ drop_input_cols=self._drop_input_cols,
588
594
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
589
595
  )
590
596
  self._sklearn_object = fitted_estimator
@@ -602,44 +608,6 @@ class Perceptron(BaseTransformer):
602
608
  assert self._sklearn_object is not None
603
609
  return self._sklearn_object.embedding_
604
610
 
605
-
606
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
607
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
608
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
609
- """
610
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
611
- if output_cols:
612
- output_cols = [
613
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
614
- for c in output_cols
615
- ]
616
- elif getattr(self._sklearn_object, "classes_", None) is None:
617
- output_cols = [output_cols_prefix]
618
- elif self._sklearn_object is not None:
619
- classes = self._sklearn_object.classes_
620
- if isinstance(classes, numpy.ndarray):
621
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
622
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
623
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
624
- output_cols = []
625
- for i, cl in enumerate(classes):
626
- # For binary classification, there is only one output column for each class
627
- # ndarray as the two classes are complementary.
628
- if len(cl) == 2:
629
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
630
- else:
631
- output_cols.extend([
632
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
633
- ])
634
- else:
635
- output_cols = []
636
-
637
- # Make sure column names are valid snowflake identifiers.
638
- assert output_cols is not None # Make MyPy happy
639
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
640
-
641
- return rv
642
-
643
611
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
644
612
  @telemetry.send_api_usage_telemetry(
645
613
  project=_PROJECT,
@@ -679,7 +647,7 @@ class Perceptron(BaseTransformer):
679
647
  transform_kwargs = dict(
680
648
  session=dataset._session,
681
649
  dependencies=self._deps,
682
- pass_through_cols=self._get_pass_through_columns(dataset),
650
+ drop_input_cols = self._drop_input_cols,
683
651
  expected_output_cols_type="float",
684
652
  )
685
653
 
@@ -744,7 +712,7 @@ class Perceptron(BaseTransformer):
744
712
  transform_kwargs = dict(
745
713
  session=dataset._session,
746
714
  dependencies=self._deps,
747
- pass_through_cols=self._get_pass_through_columns(dataset),
715
+ drop_input_cols = self._drop_input_cols,
748
716
  expected_output_cols_type="float",
749
717
  )
750
718
  elif isinstance(dataset, pd.DataFrame):
@@ -807,7 +775,7 @@ class Perceptron(BaseTransformer):
807
775
  transform_kwargs = dict(
808
776
  session=dataset._session,
809
777
  dependencies=self._deps,
810
- pass_through_cols=self._get_pass_through_columns(dataset),
778
+ drop_input_cols = self._drop_input_cols,
811
779
  expected_output_cols_type="float",
812
780
  )
813
781
 
@@ -872,7 +840,7 @@ class Perceptron(BaseTransformer):
872
840
  transform_kwargs = dict(
873
841
  session=dataset._session,
874
842
  dependencies=self._deps,
875
- pass_through_cols=self._get_pass_through_columns(dataset),
843
+ drop_input_cols = self._drop_input_cols,
876
844
  expected_output_cols_type="float",
877
845
  )
878
846
 
@@ -928,13 +896,17 @@ class Perceptron(BaseTransformer):
928
896
  transform_kwargs: ScoreKwargsTypedDict = dict()
929
897
 
930
898
  if isinstance(dataset, DataFrame):
899
+ self._deps = self._batch_inference_validate_snowpark(
900
+ dataset=dataset,
901
+ inference_method="score",
902
+ )
931
903
  selected_cols = self._get_active_columns()
932
904
  if len(selected_cols) > 0:
933
905
  dataset = dataset.select(selected_cols)
934
906
  assert isinstance(dataset._session, Session) # keep mypy happy
935
907
  transform_kwargs = dict(
936
908
  session=dataset._session,
937
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
909
+ dependencies=["snowflake-snowpark-python"] + self._deps,
938
910
  score_sproc_imports=['sklearn'],
939
911
  )
940
912
  elif isinstance(dataset, pd.DataFrame):
@@ -1008,9 +980,9 @@ class Perceptron(BaseTransformer):
1008
980
  transform_kwargs = dict(
1009
981
  session = dataset._session,
1010
982
  dependencies = self._deps,
1011
- pass_through_cols = self._get_pass_through_columns(dataset),
1012
- expected_output_cols_type = "array",
1013
- n_neighbors = n_neighbors,
983
+ drop_input_cols = self._drop_input_cols,
984
+ expected_output_cols_type="array",
985
+ n_neighbors = n_neighbors,
1014
986
  return_distance = return_distance
1015
987
  )
1016
988
  elif isinstance(dataset, pd.DataFrame):