snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -293,18 +293,24 @@ class BernoulliRBM(BaseTransformer):
293
293
  self._get_model_signatures(dataset)
294
294
  return self
295
295
 
296
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
297
- if self._drop_input_cols:
298
- return []
299
- else:
300
- return list(set(dataset.columns) - set(self.output_cols))
301
-
302
296
  def _batch_inference_validate_snowpark(
303
297
  self,
304
298
  dataset: DataFrame,
305
299
  inference_method: str,
306
300
  ) -> List[str]:
307
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
301
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
302
+ return the available package that exists in the snowflake anaconda channel
303
+
304
+ Args:
305
+ dataset: snowpark dataframe
306
+ inference_method: the inference method such as predict, score...
307
+
308
+ Raises:
309
+ SnowflakeMLException: If the estimator is not fitted, raise error
310
+ SnowflakeMLException: If the session is None, raise error
311
+
312
+ Returns:
313
+ A list of available package that exists in the snowflake anaconda channel
308
314
  """
309
315
  if not self._is_fitted:
310
316
  raise exceptions.SnowflakeMLException(
@@ -376,7 +382,7 @@ class BernoulliRBM(BaseTransformer):
376
382
  transform_kwargs = dict(
377
383
  session = dataset._session,
378
384
  dependencies = self._deps,
379
- pass_through_cols = self._get_pass_through_columns(dataset),
385
+ drop_input_cols = self._drop_input_cols,
380
386
  expected_output_cols_type = expected_type_inferred,
381
387
  )
382
388
 
@@ -438,16 +444,16 @@ class BernoulliRBM(BaseTransformer):
438
444
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
439
445
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
440
446
  # each row containing a list of values.
441
- expected_dtype = "ARRAY"
447
+ expected_dtype = "array"
442
448
 
443
449
  # If we were unable to assign a type to this transform in the factory, infer the type here.
444
450
  if expected_dtype == "":
445
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
451
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
446
452
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
447
- expected_dtype = "ARRAY"
448
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
453
+ expected_dtype = "array"
454
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
449
455
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
450
- expected_dtype = "ARRAY"
456
+ expected_dtype = "array"
451
457
  else:
452
458
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
453
459
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -465,7 +471,7 @@ class BernoulliRBM(BaseTransformer):
465
471
  transform_kwargs = dict(
466
472
  session = dataset._session,
467
473
  dependencies = self._deps,
468
- pass_through_cols = self._get_pass_through_columns(dataset),
474
+ drop_input_cols = self._drop_input_cols,
469
475
  expected_output_cols_type = expected_dtype,
470
476
  )
471
477
 
@@ -516,7 +522,7 @@ class BernoulliRBM(BaseTransformer):
516
522
  subproject=_SUBPROJECT,
517
523
  )
518
524
  output_result, fitted_estimator = model_trainer.train_fit_predict(
519
- pass_through_columns=self._get_pass_through_columns(dataset),
525
+ drop_input_cols=self._drop_input_cols,
520
526
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
521
527
  )
522
528
  self._sklearn_object = fitted_estimator
@@ -534,44 +540,6 @@ class BernoulliRBM(BaseTransformer):
534
540
  assert self._sklearn_object is not None
535
541
  return self._sklearn_object.embedding_
536
542
 
537
-
538
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
539
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
540
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
541
- """
542
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
543
- if output_cols:
544
- output_cols = [
545
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
546
- for c in output_cols
547
- ]
548
- elif getattr(self._sklearn_object, "classes_", None) is None:
549
- output_cols = [output_cols_prefix]
550
- elif self._sklearn_object is not None:
551
- classes = self._sklearn_object.classes_
552
- if isinstance(classes, numpy.ndarray):
553
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
554
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
555
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
556
- output_cols = []
557
- for i, cl in enumerate(classes):
558
- # For binary classification, there is only one output column for each class
559
- # ndarray as the two classes are complementary.
560
- if len(cl) == 2:
561
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
562
- else:
563
- output_cols.extend([
564
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
565
- ])
566
- else:
567
- output_cols = []
568
-
569
- # Make sure column names are valid snowflake identifiers.
570
- assert output_cols is not None # Make MyPy happy
571
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
572
-
573
- return rv
574
-
575
543
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
576
544
  @telemetry.send_api_usage_telemetry(
577
545
  project=_PROJECT,
@@ -611,7 +579,7 @@ class BernoulliRBM(BaseTransformer):
611
579
  transform_kwargs = dict(
612
580
  session=dataset._session,
613
581
  dependencies=self._deps,
614
- pass_through_cols=self._get_pass_through_columns(dataset),
582
+ drop_input_cols = self._drop_input_cols,
615
583
  expected_output_cols_type="float",
616
584
  )
617
585
 
@@ -676,7 +644,7 @@ class BernoulliRBM(BaseTransformer):
676
644
  transform_kwargs = dict(
677
645
  session=dataset._session,
678
646
  dependencies=self._deps,
679
- pass_through_cols=self._get_pass_through_columns(dataset),
647
+ drop_input_cols = self._drop_input_cols,
680
648
  expected_output_cols_type="float",
681
649
  )
682
650
  elif isinstance(dataset, pd.DataFrame):
@@ -737,7 +705,7 @@ class BernoulliRBM(BaseTransformer):
737
705
  transform_kwargs = dict(
738
706
  session=dataset._session,
739
707
  dependencies=self._deps,
740
- pass_through_cols=self._get_pass_through_columns(dataset),
708
+ drop_input_cols = self._drop_input_cols,
741
709
  expected_output_cols_type="float",
742
710
  )
743
711
 
@@ -804,7 +772,7 @@ class BernoulliRBM(BaseTransformer):
804
772
  transform_kwargs = dict(
805
773
  session=dataset._session,
806
774
  dependencies=self._deps,
807
- pass_through_cols=self._get_pass_through_columns(dataset),
775
+ drop_input_cols = self._drop_input_cols,
808
776
  expected_output_cols_type="float",
809
777
  )
810
778
 
@@ -858,13 +826,17 @@ class BernoulliRBM(BaseTransformer):
858
826
  transform_kwargs: ScoreKwargsTypedDict = dict()
859
827
 
860
828
  if isinstance(dataset, DataFrame):
829
+ self._deps = self._batch_inference_validate_snowpark(
830
+ dataset=dataset,
831
+ inference_method="score",
832
+ )
861
833
  selected_cols = self._get_active_columns()
862
834
  if len(selected_cols) > 0:
863
835
  dataset = dataset.select(selected_cols)
864
836
  assert isinstance(dataset._session, Session) # keep mypy happy
865
837
  transform_kwargs = dict(
866
838
  session=dataset._session,
867
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
839
+ dependencies=["snowflake-snowpark-python"] + self._deps,
868
840
  score_sproc_imports=['sklearn'],
869
841
  )
870
842
  elif isinstance(dataset, pd.DataFrame):
@@ -938,9 +910,9 @@ class BernoulliRBM(BaseTransformer):
938
910
  transform_kwargs = dict(
939
911
  session = dataset._session,
940
912
  dependencies = self._deps,
941
- pass_through_cols = self._get_pass_through_columns(dataset),
942
- expected_output_cols_type = "array",
943
- n_neighbors = n_neighbors,
913
+ drop_input_cols = self._drop_input_cols,
914
+ expected_output_cols_type="array",
915
+ n_neighbors = n_neighbors,
944
916
  return_distance = return_distance
945
917
  )
946
918
  elif isinstance(dataset, pd.DataFrame):
@@ -448,18 +448,24 @@ class MLPClassifier(BaseTransformer):
448
448
  self._get_model_signatures(dataset)
449
449
  return self
450
450
 
451
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
452
- if self._drop_input_cols:
453
- return []
454
- else:
455
- return list(set(dataset.columns) - set(self.output_cols))
456
-
457
451
  def _batch_inference_validate_snowpark(
458
452
  self,
459
453
  dataset: DataFrame,
460
454
  inference_method: str,
461
455
  ) -> List[str]:
462
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
456
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
457
+ return the available package that exists in the snowflake anaconda channel
458
+
459
+ Args:
460
+ dataset: snowpark dataframe
461
+ inference_method: the inference method such as predict, score...
462
+
463
+ Raises:
464
+ SnowflakeMLException: If the estimator is not fitted, raise error
465
+ SnowflakeMLException: If the session is None, raise error
466
+
467
+ Returns:
468
+ A list of available package that exists in the snowflake anaconda channel
463
469
  """
464
470
  if not self._is_fitted:
465
471
  raise exceptions.SnowflakeMLException(
@@ -533,7 +539,7 @@ class MLPClassifier(BaseTransformer):
533
539
  transform_kwargs = dict(
534
540
  session = dataset._session,
535
541
  dependencies = self._deps,
536
- pass_through_cols = self._get_pass_through_columns(dataset),
542
+ drop_input_cols = self._drop_input_cols,
537
543
  expected_output_cols_type = expected_type_inferred,
538
544
  )
539
545
 
@@ -593,16 +599,16 @@ class MLPClassifier(BaseTransformer):
593
599
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
594
600
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
595
601
  # each row containing a list of values.
596
- expected_dtype = "ARRAY"
602
+ expected_dtype = "array"
597
603
 
598
604
  # If we were unable to assign a type to this transform in the factory, infer the type here.
599
605
  if expected_dtype == "":
600
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
606
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
601
607
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
602
- expected_dtype = "ARRAY"
603
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
608
+ expected_dtype = "array"
609
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
604
610
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
605
- expected_dtype = "ARRAY"
611
+ expected_dtype = "array"
606
612
  else:
607
613
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
608
614
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -620,7 +626,7 @@ class MLPClassifier(BaseTransformer):
620
626
  transform_kwargs = dict(
621
627
  session = dataset._session,
622
628
  dependencies = self._deps,
623
- pass_through_cols = self._get_pass_through_columns(dataset),
629
+ drop_input_cols = self._drop_input_cols,
624
630
  expected_output_cols_type = expected_dtype,
625
631
  )
626
632
 
@@ -671,7 +677,7 @@ class MLPClassifier(BaseTransformer):
671
677
  subproject=_SUBPROJECT,
672
678
  )
673
679
  output_result, fitted_estimator = model_trainer.train_fit_predict(
674
- pass_through_columns=self._get_pass_through_columns(dataset),
680
+ drop_input_cols=self._drop_input_cols,
675
681
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
676
682
  )
677
683
  self._sklearn_object = fitted_estimator
@@ -689,44 +695,6 @@ class MLPClassifier(BaseTransformer):
689
695
  assert self._sklearn_object is not None
690
696
  return self._sklearn_object.embedding_
691
697
 
692
-
693
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
694
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
695
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
696
- """
697
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
698
- if output_cols:
699
- output_cols = [
700
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
701
- for c in output_cols
702
- ]
703
- elif getattr(self._sklearn_object, "classes_", None) is None:
704
- output_cols = [output_cols_prefix]
705
- elif self._sklearn_object is not None:
706
- classes = self._sklearn_object.classes_
707
- if isinstance(classes, numpy.ndarray):
708
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
709
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
710
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
711
- output_cols = []
712
- for i, cl in enumerate(classes):
713
- # For binary classification, there is only one output column for each class
714
- # ndarray as the two classes are complementary.
715
- if len(cl) == 2:
716
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
717
- else:
718
- output_cols.extend([
719
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
720
- ])
721
- else:
722
- output_cols = []
723
-
724
- # Make sure column names are valid snowflake identifiers.
725
- assert output_cols is not None # Make MyPy happy
726
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
727
-
728
- return rv
729
-
730
698
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
731
699
  @telemetry.send_api_usage_telemetry(
732
700
  project=_PROJECT,
@@ -768,7 +736,7 @@ class MLPClassifier(BaseTransformer):
768
736
  transform_kwargs = dict(
769
737
  session=dataset._session,
770
738
  dependencies=self._deps,
771
- pass_through_cols=self._get_pass_through_columns(dataset),
739
+ drop_input_cols = self._drop_input_cols,
772
740
  expected_output_cols_type="float",
773
741
  )
774
742
 
@@ -835,7 +803,7 @@ class MLPClassifier(BaseTransformer):
835
803
  transform_kwargs = dict(
836
804
  session=dataset._session,
837
805
  dependencies=self._deps,
838
- pass_through_cols=self._get_pass_through_columns(dataset),
806
+ drop_input_cols = self._drop_input_cols,
839
807
  expected_output_cols_type="float",
840
808
  )
841
809
  elif isinstance(dataset, pd.DataFrame):
@@ -896,7 +864,7 @@ class MLPClassifier(BaseTransformer):
896
864
  transform_kwargs = dict(
897
865
  session=dataset._session,
898
866
  dependencies=self._deps,
899
- pass_through_cols=self._get_pass_through_columns(dataset),
867
+ drop_input_cols = self._drop_input_cols,
900
868
  expected_output_cols_type="float",
901
869
  )
902
870
 
@@ -961,7 +929,7 @@ class MLPClassifier(BaseTransformer):
961
929
  transform_kwargs = dict(
962
930
  session=dataset._session,
963
931
  dependencies=self._deps,
964
- pass_through_cols=self._get_pass_through_columns(dataset),
932
+ drop_input_cols = self._drop_input_cols,
965
933
  expected_output_cols_type="float",
966
934
  )
967
935
 
@@ -1017,13 +985,17 @@ class MLPClassifier(BaseTransformer):
1017
985
  transform_kwargs: ScoreKwargsTypedDict = dict()
1018
986
 
1019
987
  if isinstance(dataset, DataFrame):
988
+ self._deps = self._batch_inference_validate_snowpark(
989
+ dataset=dataset,
990
+ inference_method="score",
991
+ )
1020
992
  selected_cols = self._get_active_columns()
1021
993
  if len(selected_cols) > 0:
1022
994
  dataset = dataset.select(selected_cols)
1023
995
  assert isinstance(dataset._session, Session) # keep mypy happy
1024
996
  transform_kwargs = dict(
1025
997
  session=dataset._session,
1026
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
998
+ dependencies=["snowflake-snowpark-python"] + self._deps,
1027
999
  score_sproc_imports=['sklearn'],
1028
1000
  )
1029
1001
  elif isinstance(dataset, pd.DataFrame):
@@ -1097,9 +1069,9 @@ class MLPClassifier(BaseTransformer):
1097
1069
  transform_kwargs = dict(
1098
1070
  session = dataset._session,
1099
1071
  dependencies = self._deps,
1100
- pass_through_cols = self._get_pass_through_columns(dataset),
1101
- expected_output_cols_type = "array",
1102
- n_neighbors = n_neighbors,
1072
+ drop_input_cols = self._drop_input_cols,
1073
+ expected_output_cols_type="array",
1074
+ n_neighbors = n_neighbors,
1103
1075
  return_distance = return_distance
1104
1076
  )
1105
1077
  elif isinstance(dataset, pd.DataFrame):
@@ -444,18 +444,24 @@ class MLPRegressor(BaseTransformer):
444
444
  self._get_model_signatures(dataset)
445
445
  return self
446
446
 
447
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
448
- if self._drop_input_cols:
449
- return []
450
- else:
451
- return list(set(dataset.columns) - set(self.output_cols))
452
-
453
447
  def _batch_inference_validate_snowpark(
454
448
  self,
455
449
  dataset: DataFrame,
456
450
  inference_method: str,
457
451
  ) -> List[str]:
458
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
452
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
453
+ return the available package that exists in the snowflake anaconda channel
454
+
455
+ Args:
456
+ dataset: snowpark dataframe
457
+ inference_method: the inference method such as predict, score...
458
+
459
+ Raises:
460
+ SnowflakeMLException: If the estimator is not fitted, raise error
461
+ SnowflakeMLException: If the session is None, raise error
462
+
463
+ Returns:
464
+ A list of available package that exists in the snowflake anaconda channel
459
465
  """
460
466
  if not self._is_fitted:
461
467
  raise exceptions.SnowflakeMLException(
@@ -529,7 +535,7 @@ class MLPRegressor(BaseTransformer):
529
535
  transform_kwargs = dict(
530
536
  session = dataset._session,
531
537
  dependencies = self._deps,
532
- pass_through_cols = self._get_pass_through_columns(dataset),
538
+ drop_input_cols = self._drop_input_cols,
533
539
  expected_output_cols_type = expected_type_inferred,
534
540
  )
535
541
 
@@ -589,16 +595,16 @@ class MLPRegressor(BaseTransformer):
589
595
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
590
596
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
591
597
  # each row containing a list of values.
592
- expected_dtype = "ARRAY"
598
+ expected_dtype = "array"
593
599
 
594
600
  # If we were unable to assign a type to this transform in the factory, infer the type here.
595
601
  if expected_dtype == "":
596
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
602
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
597
603
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
598
- expected_dtype = "ARRAY"
599
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
604
+ expected_dtype = "array"
605
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
600
606
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
601
- expected_dtype = "ARRAY"
607
+ expected_dtype = "array"
602
608
  else:
603
609
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
604
610
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -616,7 +622,7 @@ class MLPRegressor(BaseTransformer):
616
622
  transform_kwargs = dict(
617
623
  session = dataset._session,
618
624
  dependencies = self._deps,
619
- pass_through_cols = self._get_pass_through_columns(dataset),
625
+ drop_input_cols = self._drop_input_cols,
620
626
  expected_output_cols_type = expected_dtype,
621
627
  )
622
628
 
@@ -667,7 +673,7 @@ class MLPRegressor(BaseTransformer):
667
673
  subproject=_SUBPROJECT,
668
674
  )
669
675
  output_result, fitted_estimator = model_trainer.train_fit_predict(
670
- pass_through_columns=self._get_pass_through_columns(dataset),
676
+ drop_input_cols=self._drop_input_cols,
671
677
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
672
678
  )
673
679
  self._sklearn_object = fitted_estimator
@@ -685,44 +691,6 @@ class MLPRegressor(BaseTransformer):
685
691
  assert self._sklearn_object is not None
686
692
  return self._sklearn_object.embedding_
687
693
 
688
-
689
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
690
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
691
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
692
- """
693
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
694
- if output_cols:
695
- output_cols = [
696
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
697
- for c in output_cols
698
- ]
699
- elif getattr(self._sklearn_object, "classes_", None) is None:
700
- output_cols = [output_cols_prefix]
701
- elif self._sklearn_object is not None:
702
- classes = self._sklearn_object.classes_
703
- if isinstance(classes, numpy.ndarray):
704
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
705
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
706
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
707
- output_cols = []
708
- for i, cl in enumerate(classes):
709
- # For binary classification, there is only one output column for each class
710
- # ndarray as the two classes are complementary.
711
- if len(cl) == 2:
712
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
713
- else:
714
- output_cols.extend([
715
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
716
- ])
717
- else:
718
- output_cols = []
719
-
720
- # Make sure column names are valid snowflake identifiers.
721
- assert output_cols is not None # Make MyPy happy
722
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
723
-
724
- return rv
725
-
726
694
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
727
695
  @telemetry.send_api_usage_telemetry(
728
696
  project=_PROJECT,
@@ -762,7 +730,7 @@ class MLPRegressor(BaseTransformer):
762
730
  transform_kwargs = dict(
763
731
  session=dataset._session,
764
732
  dependencies=self._deps,
765
- pass_through_cols=self._get_pass_through_columns(dataset),
733
+ drop_input_cols = self._drop_input_cols,
766
734
  expected_output_cols_type="float",
767
735
  )
768
736
 
@@ -827,7 +795,7 @@ class MLPRegressor(BaseTransformer):
827
795
  transform_kwargs = dict(
828
796
  session=dataset._session,
829
797
  dependencies=self._deps,
830
- pass_through_cols=self._get_pass_through_columns(dataset),
798
+ drop_input_cols = self._drop_input_cols,
831
799
  expected_output_cols_type="float",
832
800
  )
833
801
  elif isinstance(dataset, pd.DataFrame):
@@ -888,7 +856,7 @@ class MLPRegressor(BaseTransformer):
888
856
  transform_kwargs = dict(
889
857
  session=dataset._session,
890
858
  dependencies=self._deps,
891
- pass_through_cols=self._get_pass_through_columns(dataset),
859
+ drop_input_cols = self._drop_input_cols,
892
860
  expected_output_cols_type="float",
893
861
  )
894
862
 
@@ -953,7 +921,7 @@ class MLPRegressor(BaseTransformer):
953
921
  transform_kwargs = dict(
954
922
  session=dataset._session,
955
923
  dependencies=self._deps,
956
- pass_through_cols=self._get_pass_through_columns(dataset),
924
+ drop_input_cols = self._drop_input_cols,
957
925
  expected_output_cols_type="float",
958
926
  )
959
927
 
@@ -1009,13 +977,17 @@ class MLPRegressor(BaseTransformer):
1009
977
  transform_kwargs: ScoreKwargsTypedDict = dict()
1010
978
 
1011
979
  if isinstance(dataset, DataFrame):
980
+ self._deps = self._batch_inference_validate_snowpark(
981
+ dataset=dataset,
982
+ inference_method="score",
983
+ )
1012
984
  selected_cols = self._get_active_columns()
1013
985
  if len(selected_cols) > 0:
1014
986
  dataset = dataset.select(selected_cols)
1015
987
  assert isinstance(dataset._session, Session) # keep mypy happy
1016
988
  transform_kwargs = dict(
1017
989
  session=dataset._session,
1018
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
990
+ dependencies=["snowflake-snowpark-python"] + self._deps,
1019
991
  score_sproc_imports=['sklearn'],
1020
992
  )
1021
993
  elif isinstance(dataset, pd.DataFrame):
@@ -1089,9 +1061,9 @@ class MLPRegressor(BaseTransformer):
1089
1061
  transform_kwargs = dict(
1090
1062
  session = dataset._session,
1091
1063
  dependencies = self._deps,
1092
- pass_through_cols = self._get_pass_through_columns(dataset),
1093
- expected_output_cols_type = "array",
1094
- n_neighbors = n_neighbors,
1064
+ drop_input_cols = self._drop_input_cols,
1065
+ expected_output_cols_type="array",
1066
+ n_neighbors = n_neighbors,
1095
1067
  return_distance = return_distance
1096
1068
  )
1097
1069
  elif isinstance(dataset, pd.DataFrame):