snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -283,18 +283,24 @@ class ComplementNB(BaseTransformer):
283
283
  self._get_model_signatures(dataset)
284
284
  return self
285
285
 
286
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
287
- if self._drop_input_cols:
288
- return []
289
- else:
290
- return list(set(dataset.columns) - set(self.output_cols))
291
-
292
286
  def _batch_inference_validate_snowpark(
293
287
  self,
294
288
  dataset: DataFrame,
295
289
  inference_method: str,
296
290
  ) -> List[str]:
297
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
291
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
292
+ return the available package that exists in the snowflake anaconda channel
293
+
294
+ Args:
295
+ dataset: snowpark dataframe
296
+ inference_method: the inference method such as predict, score...
297
+
298
+ Raises:
299
+ SnowflakeMLException: If the estimator is not fitted, raise error
300
+ SnowflakeMLException: If the session is None, raise error
301
+
302
+ Returns:
303
+ A list of available package that exists in the snowflake anaconda channel
298
304
  """
299
305
  if not self._is_fitted:
300
306
  raise exceptions.SnowflakeMLException(
@@ -368,7 +374,7 @@ class ComplementNB(BaseTransformer):
368
374
  transform_kwargs = dict(
369
375
  session = dataset._session,
370
376
  dependencies = self._deps,
371
- pass_through_cols = self._get_pass_through_columns(dataset),
377
+ drop_input_cols = self._drop_input_cols,
372
378
  expected_output_cols_type = expected_type_inferred,
373
379
  )
374
380
 
@@ -428,16 +434,16 @@ class ComplementNB(BaseTransformer):
428
434
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
429
435
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
430
436
  # each row containing a list of values.
431
- expected_dtype = "ARRAY"
437
+ expected_dtype = "array"
432
438
 
433
439
  # If we were unable to assign a type to this transform in the factory, infer the type here.
434
440
  if expected_dtype == "":
435
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
441
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
436
442
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
437
- expected_dtype = "ARRAY"
438
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
443
+ expected_dtype = "array"
444
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
439
445
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
440
- expected_dtype = "ARRAY"
446
+ expected_dtype = "array"
441
447
  else:
442
448
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
443
449
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -455,7 +461,7 @@ class ComplementNB(BaseTransformer):
455
461
  transform_kwargs = dict(
456
462
  session = dataset._session,
457
463
  dependencies = self._deps,
458
- pass_through_cols = self._get_pass_through_columns(dataset),
464
+ drop_input_cols = self._drop_input_cols,
459
465
  expected_output_cols_type = expected_dtype,
460
466
  )
461
467
 
@@ -506,7 +512,7 @@ class ComplementNB(BaseTransformer):
506
512
  subproject=_SUBPROJECT,
507
513
  )
508
514
  output_result, fitted_estimator = model_trainer.train_fit_predict(
509
- pass_through_columns=self._get_pass_through_columns(dataset),
515
+ drop_input_cols=self._drop_input_cols,
510
516
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
511
517
  )
512
518
  self._sklearn_object = fitted_estimator
@@ -524,44 +530,6 @@ class ComplementNB(BaseTransformer):
524
530
  assert self._sklearn_object is not None
525
531
  return self._sklearn_object.embedding_
526
532
 
527
-
528
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
529
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
530
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
531
- """
532
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
533
- if output_cols:
534
- output_cols = [
535
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
536
- for c in output_cols
537
- ]
538
- elif getattr(self._sklearn_object, "classes_", None) is None:
539
- output_cols = [output_cols_prefix]
540
- elif self._sklearn_object is not None:
541
- classes = self._sklearn_object.classes_
542
- if isinstance(classes, numpy.ndarray):
543
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
544
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
545
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
546
- output_cols = []
547
- for i, cl in enumerate(classes):
548
- # For binary classification, there is only one output column for each class
549
- # ndarray as the two classes are complementary.
550
- if len(cl) == 2:
551
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
552
- else:
553
- output_cols.extend([
554
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
555
- ])
556
- else:
557
- output_cols = []
558
-
559
- # Make sure column names are valid snowflake identifiers.
560
- assert output_cols is not None # Make MyPy happy
561
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
562
-
563
- return rv
564
-
565
533
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
566
534
  @telemetry.send_api_usage_telemetry(
567
535
  project=_PROJECT,
@@ -603,7 +571,7 @@ class ComplementNB(BaseTransformer):
603
571
  transform_kwargs = dict(
604
572
  session=dataset._session,
605
573
  dependencies=self._deps,
606
- pass_through_cols=self._get_pass_through_columns(dataset),
574
+ drop_input_cols = self._drop_input_cols,
607
575
  expected_output_cols_type="float",
608
576
  )
609
577
 
@@ -670,7 +638,7 @@ class ComplementNB(BaseTransformer):
670
638
  transform_kwargs = dict(
671
639
  session=dataset._session,
672
640
  dependencies=self._deps,
673
- pass_through_cols=self._get_pass_through_columns(dataset),
641
+ drop_input_cols = self._drop_input_cols,
674
642
  expected_output_cols_type="float",
675
643
  )
676
644
  elif isinstance(dataset, pd.DataFrame):
@@ -731,7 +699,7 @@ class ComplementNB(BaseTransformer):
731
699
  transform_kwargs = dict(
732
700
  session=dataset._session,
733
701
  dependencies=self._deps,
734
- pass_through_cols=self._get_pass_through_columns(dataset),
702
+ drop_input_cols = self._drop_input_cols,
735
703
  expected_output_cols_type="float",
736
704
  )
737
705
 
@@ -796,7 +764,7 @@ class ComplementNB(BaseTransformer):
796
764
  transform_kwargs = dict(
797
765
  session=dataset._session,
798
766
  dependencies=self._deps,
799
- pass_through_cols=self._get_pass_through_columns(dataset),
767
+ drop_input_cols = self._drop_input_cols,
800
768
  expected_output_cols_type="float",
801
769
  )
802
770
 
@@ -852,13 +820,17 @@ class ComplementNB(BaseTransformer):
852
820
  transform_kwargs: ScoreKwargsTypedDict = dict()
853
821
 
854
822
  if isinstance(dataset, DataFrame):
823
+ self._deps = self._batch_inference_validate_snowpark(
824
+ dataset=dataset,
825
+ inference_method="score",
826
+ )
855
827
  selected_cols = self._get_active_columns()
856
828
  if len(selected_cols) > 0:
857
829
  dataset = dataset.select(selected_cols)
858
830
  assert isinstance(dataset._session, Session) # keep mypy happy
859
831
  transform_kwargs = dict(
860
832
  session=dataset._session,
861
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
833
+ dependencies=["snowflake-snowpark-python"] + self._deps,
862
834
  score_sproc_imports=['sklearn'],
863
835
  )
864
836
  elif isinstance(dataset, pd.DataFrame):
@@ -932,9 +904,9 @@ class ComplementNB(BaseTransformer):
932
904
  transform_kwargs = dict(
933
905
  session = dataset._session,
934
906
  dependencies = self._deps,
935
- pass_through_cols = self._get_pass_through_columns(dataset),
936
- expected_output_cols_type = "array",
937
- n_neighbors = n_neighbors,
907
+ drop_input_cols = self._drop_input_cols,
908
+ expected_output_cols_type="array",
909
+ n_neighbors = n_neighbors,
938
910
  return_distance = return_distance
939
911
  )
940
912
  elif isinstance(dataset, pd.DataFrame):
@@ -264,18 +264,24 @@ class GaussianNB(BaseTransformer):
264
264
  self._get_model_signatures(dataset)
265
265
  return self
266
266
 
267
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
268
- if self._drop_input_cols:
269
- return []
270
- else:
271
- return list(set(dataset.columns) - set(self.output_cols))
272
-
273
267
  def _batch_inference_validate_snowpark(
274
268
  self,
275
269
  dataset: DataFrame,
276
270
  inference_method: str,
277
271
  ) -> List[str]:
278
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
272
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
273
+ return the available package that exists in the snowflake anaconda channel
274
+
275
+ Args:
276
+ dataset: snowpark dataframe
277
+ inference_method: the inference method such as predict, score...
278
+
279
+ Raises:
280
+ SnowflakeMLException: If the estimator is not fitted, raise error
281
+ SnowflakeMLException: If the session is None, raise error
282
+
283
+ Returns:
284
+ A list of available package that exists in the snowflake anaconda channel
279
285
  """
280
286
  if not self._is_fitted:
281
287
  raise exceptions.SnowflakeMLException(
@@ -349,7 +355,7 @@ class GaussianNB(BaseTransformer):
349
355
  transform_kwargs = dict(
350
356
  session = dataset._session,
351
357
  dependencies = self._deps,
352
- pass_through_cols = self._get_pass_through_columns(dataset),
358
+ drop_input_cols = self._drop_input_cols,
353
359
  expected_output_cols_type = expected_type_inferred,
354
360
  )
355
361
 
@@ -409,16 +415,16 @@ class GaussianNB(BaseTransformer):
409
415
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
410
416
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
411
417
  # each row containing a list of values.
412
- expected_dtype = "ARRAY"
418
+ expected_dtype = "array"
413
419
 
414
420
  # If we were unable to assign a type to this transform in the factory, infer the type here.
415
421
  if expected_dtype == "":
416
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
422
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
417
423
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
418
- expected_dtype = "ARRAY"
419
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
424
+ expected_dtype = "array"
425
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
420
426
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
421
- expected_dtype = "ARRAY"
427
+ expected_dtype = "array"
422
428
  else:
423
429
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
424
430
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -436,7 +442,7 @@ class GaussianNB(BaseTransformer):
436
442
  transform_kwargs = dict(
437
443
  session = dataset._session,
438
444
  dependencies = self._deps,
439
- pass_through_cols = self._get_pass_through_columns(dataset),
445
+ drop_input_cols = self._drop_input_cols,
440
446
  expected_output_cols_type = expected_dtype,
441
447
  )
442
448
 
@@ -487,7 +493,7 @@ class GaussianNB(BaseTransformer):
487
493
  subproject=_SUBPROJECT,
488
494
  )
489
495
  output_result, fitted_estimator = model_trainer.train_fit_predict(
490
- pass_through_columns=self._get_pass_through_columns(dataset),
496
+ drop_input_cols=self._drop_input_cols,
491
497
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
492
498
  )
493
499
  self._sklearn_object = fitted_estimator
@@ -505,44 +511,6 @@ class GaussianNB(BaseTransformer):
505
511
  assert self._sklearn_object is not None
506
512
  return self._sklearn_object.embedding_
507
513
 
508
-
509
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
510
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
511
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
512
- """
513
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
514
- if output_cols:
515
- output_cols = [
516
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
517
- for c in output_cols
518
- ]
519
- elif getattr(self._sklearn_object, "classes_", None) is None:
520
- output_cols = [output_cols_prefix]
521
- elif self._sklearn_object is not None:
522
- classes = self._sklearn_object.classes_
523
- if isinstance(classes, numpy.ndarray):
524
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
525
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
526
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
527
- output_cols = []
528
- for i, cl in enumerate(classes):
529
- # For binary classification, there is only one output column for each class
530
- # ndarray as the two classes are complementary.
531
- if len(cl) == 2:
532
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
533
- else:
534
- output_cols.extend([
535
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
536
- ])
537
- else:
538
- output_cols = []
539
-
540
- # Make sure column names are valid snowflake identifiers.
541
- assert output_cols is not None # Make MyPy happy
542
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
543
-
544
- return rv
545
-
546
514
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
547
515
  @telemetry.send_api_usage_telemetry(
548
516
  project=_PROJECT,
@@ -584,7 +552,7 @@ class GaussianNB(BaseTransformer):
584
552
  transform_kwargs = dict(
585
553
  session=dataset._session,
586
554
  dependencies=self._deps,
587
- pass_through_cols=self._get_pass_through_columns(dataset),
555
+ drop_input_cols = self._drop_input_cols,
588
556
  expected_output_cols_type="float",
589
557
  )
590
558
 
@@ -651,7 +619,7 @@ class GaussianNB(BaseTransformer):
651
619
  transform_kwargs = dict(
652
620
  session=dataset._session,
653
621
  dependencies=self._deps,
654
- pass_through_cols=self._get_pass_through_columns(dataset),
622
+ drop_input_cols = self._drop_input_cols,
655
623
  expected_output_cols_type="float",
656
624
  )
657
625
  elif isinstance(dataset, pd.DataFrame):
@@ -712,7 +680,7 @@ class GaussianNB(BaseTransformer):
712
680
  transform_kwargs = dict(
713
681
  session=dataset._session,
714
682
  dependencies=self._deps,
715
- pass_through_cols=self._get_pass_through_columns(dataset),
683
+ drop_input_cols = self._drop_input_cols,
716
684
  expected_output_cols_type="float",
717
685
  )
718
686
 
@@ -777,7 +745,7 @@ class GaussianNB(BaseTransformer):
777
745
  transform_kwargs = dict(
778
746
  session=dataset._session,
779
747
  dependencies=self._deps,
780
- pass_through_cols=self._get_pass_through_columns(dataset),
748
+ drop_input_cols = self._drop_input_cols,
781
749
  expected_output_cols_type="float",
782
750
  )
783
751
 
@@ -833,13 +801,17 @@ class GaussianNB(BaseTransformer):
833
801
  transform_kwargs: ScoreKwargsTypedDict = dict()
834
802
 
835
803
  if isinstance(dataset, DataFrame):
804
+ self._deps = self._batch_inference_validate_snowpark(
805
+ dataset=dataset,
806
+ inference_method="score",
807
+ )
836
808
  selected_cols = self._get_active_columns()
837
809
  if len(selected_cols) > 0:
838
810
  dataset = dataset.select(selected_cols)
839
811
  assert isinstance(dataset._session, Session) # keep mypy happy
840
812
  transform_kwargs = dict(
841
813
  session=dataset._session,
842
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
814
+ dependencies=["snowflake-snowpark-python"] + self._deps,
843
815
  score_sproc_imports=['sklearn'],
844
816
  )
845
817
  elif isinstance(dataset, pd.DataFrame):
@@ -913,9 +885,9 @@ class GaussianNB(BaseTransformer):
913
885
  transform_kwargs = dict(
914
886
  session = dataset._session,
915
887
  dependencies = self._deps,
916
- pass_through_cols = self._get_pass_through_columns(dataset),
917
- expected_output_cols_type = "array",
918
- n_neighbors = n_neighbors,
888
+ drop_input_cols = self._drop_input_cols,
889
+ expected_output_cols_type="array",
890
+ n_neighbors = n_neighbors,
919
891
  return_distance = return_distance
920
892
  )
921
893
  elif isinstance(dataset, pd.DataFrame):
@@ -277,18 +277,24 @@ class MultinomialNB(BaseTransformer):
277
277
  self._get_model_signatures(dataset)
278
278
  return self
279
279
 
280
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
281
- if self._drop_input_cols:
282
- return []
283
- else:
284
- return list(set(dataset.columns) - set(self.output_cols))
285
-
286
280
  def _batch_inference_validate_snowpark(
287
281
  self,
288
282
  dataset: DataFrame,
289
283
  inference_method: str,
290
284
  ) -> List[str]:
291
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
285
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
286
+ return the available package that exists in the snowflake anaconda channel
287
+
288
+ Args:
289
+ dataset: snowpark dataframe
290
+ inference_method: the inference method such as predict, score...
291
+
292
+ Raises:
293
+ SnowflakeMLException: If the estimator is not fitted, raise error
294
+ SnowflakeMLException: If the session is None, raise error
295
+
296
+ Returns:
297
+ A list of available package that exists in the snowflake anaconda channel
292
298
  """
293
299
  if not self._is_fitted:
294
300
  raise exceptions.SnowflakeMLException(
@@ -362,7 +368,7 @@ class MultinomialNB(BaseTransformer):
362
368
  transform_kwargs = dict(
363
369
  session = dataset._session,
364
370
  dependencies = self._deps,
365
- pass_through_cols = self._get_pass_through_columns(dataset),
371
+ drop_input_cols = self._drop_input_cols,
366
372
  expected_output_cols_type = expected_type_inferred,
367
373
  )
368
374
 
@@ -422,16 +428,16 @@ class MultinomialNB(BaseTransformer):
422
428
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
423
429
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
424
430
  # each row containing a list of values.
425
- expected_dtype = "ARRAY"
431
+ expected_dtype = "array"
426
432
 
427
433
  # If we were unable to assign a type to this transform in the factory, infer the type here.
428
434
  if expected_dtype == "":
429
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
435
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
430
436
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
431
- expected_dtype = "ARRAY"
432
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
437
+ expected_dtype = "array"
438
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
433
439
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
434
- expected_dtype = "ARRAY"
440
+ expected_dtype = "array"
435
441
  else:
436
442
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
437
443
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -449,7 +455,7 @@ class MultinomialNB(BaseTransformer):
449
455
  transform_kwargs = dict(
450
456
  session = dataset._session,
451
457
  dependencies = self._deps,
452
- pass_through_cols = self._get_pass_through_columns(dataset),
458
+ drop_input_cols = self._drop_input_cols,
453
459
  expected_output_cols_type = expected_dtype,
454
460
  )
455
461
 
@@ -500,7 +506,7 @@ class MultinomialNB(BaseTransformer):
500
506
  subproject=_SUBPROJECT,
501
507
  )
502
508
  output_result, fitted_estimator = model_trainer.train_fit_predict(
503
- pass_through_columns=self._get_pass_through_columns(dataset),
509
+ drop_input_cols=self._drop_input_cols,
504
510
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
505
511
  )
506
512
  self._sklearn_object = fitted_estimator
@@ -518,44 +524,6 @@ class MultinomialNB(BaseTransformer):
518
524
  assert self._sklearn_object is not None
519
525
  return self._sklearn_object.embedding_
520
526
 
521
-
522
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
523
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
524
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
525
- """
526
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
527
- if output_cols:
528
- output_cols = [
529
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
530
- for c in output_cols
531
- ]
532
- elif getattr(self._sklearn_object, "classes_", None) is None:
533
- output_cols = [output_cols_prefix]
534
- elif self._sklearn_object is not None:
535
- classes = self._sklearn_object.classes_
536
- if isinstance(classes, numpy.ndarray):
537
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
538
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
539
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
540
- output_cols = []
541
- for i, cl in enumerate(classes):
542
- # For binary classification, there is only one output column for each class
543
- # ndarray as the two classes are complementary.
544
- if len(cl) == 2:
545
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
546
- else:
547
- output_cols.extend([
548
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
549
- ])
550
- else:
551
- output_cols = []
552
-
553
- # Make sure column names are valid snowflake identifiers.
554
- assert output_cols is not None # Make MyPy happy
555
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
556
-
557
- return rv
558
-
559
527
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
560
528
  @telemetry.send_api_usage_telemetry(
561
529
  project=_PROJECT,
@@ -597,7 +565,7 @@ class MultinomialNB(BaseTransformer):
597
565
  transform_kwargs = dict(
598
566
  session=dataset._session,
599
567
  dependencies=self._deps,
600
- pass_through_cols=self._get_pass_through_columns(dataset),
568
+ drop_input_cols = self._drop_input_cols,
601
569
  expected_output_cols_type="float",
602
570
  )
603
571
 
@@ -664,7 +632,7 @@ class MultinomialNB(BaseTransformer):
664
632
  transform_kwargs = dict(
665
633
  session=dataset._session,
666
634
  dependencies=self._deps,
667
- pass_through_cols=self._get_pass_through_columns(dataset),
635
+ drop_input_cols = self._drop_input_cols,
668
636
  expected_output_cols_type="float",
669
637
  )
670
638
  elif isinstance(dataset, pd.DataFrame):
@@ -725,7 +693,7 @@ class MultinomialNB(BaseTransformer):
725
693
  transform_kwargs = dict(
726
694
  session=dataset._session,
727
695
  dependencies=self._deps,
728
- pass_through_cols=self._get_pass_through_columns(dataset),
696
+ drop_input_cols = self._drop_input_cols,
729
697
  expected_output_cols_type="float",
730
698
  )
731
699
 
@@ -790,7 +758,7 @@ class MultinomialNB(BaseTransformer):
790
758
  transform_kwargs = dict(
791
759
  session=dataset._session,
792
760
  dependencies=self._deps,
793
- pass_through_cols=self._get_pass_through_columns(dataset),
761
+ drop_input_cols = self._drop_input_cols,
794
762
  expected_output_cols_type="float",
795
763
  )
796
764
 
@@ -846,13 +814,17 @@ class MultinomialNB(BaseTransformer):
846
814
  transform_kwargs: ScoreKwargsTypedDict = dict()
847
815
 
848
816
  if isinstance(dataset, DataFrame):
817
+ self._deps = self._batch_inference_validate_snowpark(
818
+ dataset=dataset,
819
+ inference_method="score",
820
+ )
849
821
  selected_cols = self._get_active_columns()
850
822
  if len(selected_cols) > 0:
851
823
  dataset = dataset.select(selected_cols)
852
824
  assert isinstance(dataset._session, Session) # keep mypy happy
853
825
  transform_kwargs = dict(
854
826
  session=dataset._session,
855
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
827
+ dependencies=["snowflake-snowpark-python"] + self._deps,
856
828
  score_sproc_imports=['sklearn'],
857
829
  )
858
830
  elif isinstance(dataset, pd.DataFrame):
@@ -926,9 +898,9 @@ class MultinomialNB(BaseTransformer):
926
898
  transform_kwargs = dict(
927
899
  session = dataset._session,
928
900
  dependencies = self._deps,
929
- pass_through_cols = self._get_pass_through_columns(dataset),
930
- expected_output_cols_type = "array",
931
- n_neighbors = n_neighbors,
901
+ drop_input_cols = self._drop_input_cols,
902
+ expected_output_cols_type="array",
903
+ n_neighbors = n_neighbors,
932
904
  return_distance = return_distance
933
905
  )
934
906
  elif isinstance(dataset, pd.DataFrame):