snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -317,18 +317,24 @@ class TheilSenRegressor(BaseTransformer):
317
317
  self._get_model_signatures(dataset)
318
318
  return self
319
319
 
320
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
321
- if self._drop_input_cols:
322
- return []
323
- else:
324
- return list(set(dataset.columns) - set(self.output_cols))
325
-
326
320
  def _batch_inference_validate_snowpark(
327
321
  self,
328
322
  dataset: DataFrame,
329
323
  inference_method: str,
330
324
  ) -> List[str]:
331
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
325
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
326
+ return the available package that exists in the snowflake anaconda channel
327
+
328
+ Args:
329
+ dataset: snowpark dataframe
330
+ inference_method: the inference method such as predict, score...
331
+
332
+ Raises:
333
+ SnowflakeMLException: If the estimator is not fitted, raise error
334
+ SnowflakeMLException: If the session is None, raise error
335
+
336
+ Returns:
337
+ A list of available package that exists in the snowflake anaconda channel
332
338
  """
333
339
  if not self._is_fitted:
334
340
  raise exceptions.SnowflakeMLException(
@@ -402,7 +408,7 @@ class TheilSenRegressor(BaseTransformer):
402
408
  transform_kwargs = dict(
403
409
  session = dataset._session,
404
410
  dependencies = self._deps,
405
- pass_through_cols = self._get_pass_through_columns(dataset),
411
+ drop_input_cols = self._drop_input_cols,
406
412
  expected_output_cols_type = expected_type_inferred,
407
413
  )
408
414
 
@@ -462,16 +468,16 @@ class TheilSenRegressor(BaseTransformer):
462
468
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
463
469
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
464
470
  # each row containing a list of values.
465
- expected_dtype = "ARRAY"
471
+ expected_dtype = "array"
466
472
 
467
473
  # If we were unable to assign a type to this transform in the factory, infer the type here.
468
474
  if expected_dtype == "":
469
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
475
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
470
476
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
471
- expected_dtype = "ARRAY"
472
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
477
+ expected_dtype = "array"
478
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
473
479
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
474
- expected_dtype = "ARRAY"
480
+ expected_dtype = "array"
475
481
  else:
476
482
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
477
483
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -489,7 +495,7 @@ class TheilSenRegressor(BaseTransformer):
489
495
  transform_kwargs = dict(
490
496
  session = dataset._session,
491
497
  dependencies = self._deps,
492
- pass_through_cols = self._get_pass_through_columns(dataset),
498
+ drop_input_cols = self._drop_input_cols,
493
499
  expected_output_cols_type = expected_dtype,
494
500
  )
495
501
 
@@ -540,7 +546,7 @@ class TheilSenRegressor(BaseTransformer):
540
546
  subproject=_SUBPROJECT,
541
547
  )
542
548
  output_result, fitted_estimator = model_trainer.train_fit_predict(
543
- pass_through_columns=self._get_pass_through_columns(dataset),
549
+ drop_input_cols=self._drop_input_cols,
544
550
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
545
551
  )
546
552
  self._sklearn_object = fitted_estimator
@@ -558,44 +564,6 @@ class TheilSenRegressor(BaseTransformer):
558
564
  assert self._sklearn_object is not None
559
565
  return self._sklearn_object.embedding_
560
566
 
561
-
562
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
563
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
564
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
565
- """
566
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
567
- if output_cols:
568
- output_cols = [
569
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
570
- for c in output_cols
571
- ]
572
- elif getattr(self._sklearn_object, "classes_", None) is None:
573
- output_cols = [output_cols_prefix]
574
- elif self._sklearn_object is not None:
575
- classes = self._sklearn_object.classes_
576
- if isinstance(classes, numpy.ndarray):
577
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
578
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
579
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
580
- output_cols = []
581
- for i, cl in enumerate(classes):
582
- # For binary classification, there is only one output column for each class
583
- # ndarray as the two classes are complementary.
584
- if len(cl) == 2:
585
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
586
- else:
587
- output_cols.extend([
588
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
589
- ])
590
- else:
591
- output_cols = []
592
-
593
- # Make sure column names are valid snowflake identifiers.
594
- assert output_cols is not None # Make MyPy happy
595
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
596
-
597
- return rv
598
-
599
567
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
600
568
  @telemetry.send_api_usage_telemetry(
601
569
  project=_PROJECT,
@@ -635,7 +603,7 @@ class TheilSenRegressor(BaseTransformer):
635
603
  transform_kwargs = dict(
636
604
  session=dataset._session,
637
605
  dependencies=self._deps,
638
- pass_through_cols=self._get_pass_through_columns(dataset),
606
+ drop_input_cols = self._drop_input_cols,
639
607
  expected_output_cols_type="float",
640
608
  )
641
609
 
@@ -700,7 +668,7 @@ class TheilSenRegressor(BaseTransformer):
700
668
  transform_kwargs = dict(
701
669
  session=dataset._session,
702
670
  dependencies=self._deps,
703
- pass_through_cols=self._get_pass_through_columns(dataset),
671
+ drop_input_cols = self._drop_input_cols,
704
672
  expected_output_cols_type="float",
705
673
  )
706
674
  elif isinstance(dataset, pd.DataFrame):
@@ -761,7 +729,7 @@ class TheilSenRegressor(BaseTransformer):
761
729
  transform_kwargs = dict(
762
730
  session=dataset._session,
763
731
  dependencies=self._deps,
764
- pass_through_cols=self._get_pass_through_columns(dataset),
732
+ drop_input_cols = self._drop_input_cols,
765
733
  expected_output_cols_type="float",
766
734
  )
767
735
 
@@ -826,7 +794,7 @@ class TheilSenRegressor(BaseTransformer):
826
794
  transform_kwargs = dict(
827
795
  session=dataset._session,
828
796
  dependencies=self._deps,
829
- pass_through_cols=self._get_pass_through_columns(dataset),
797
+ drop_input_cols = self._drop_input_cols,
830
798
  expected_output_cols_type="float",
831
799
  )
832
800
 
@@ -882,13 +850,17 @@ class TheilSenRegressor(BaseTransformer):
882
850
  transform_kwargs: ScoreKwargsTypedDict = dict()
883
851
 
884
852
  if isinstance(dataset, DataFrame):
853
+ self._deps = self._batch_inference_validate_snowpark(
854
+ dataset=dataset,
855
+ inference_method="score",
856
+ )
885
857
  selected_cols = self._get_active_columns()
886
858
  if len(selected_cols) > 0:
887
859
  dataset = dataset.select(selected_cols)
888
860
  assert isinstance(dataset._session, Session) # keep mypy happy
889
861
  transform_kwargs = dict(
890
862
  session=dataset._session,
891
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
863
+ dependencies=["snowflake-snowpark-python"] + self._deps,
892
864
  score_sproc_imports=['sklearn'],
893
865
  )
894
866
  elif isinstance(dataset, pd.DataFrame):
@@ -962,9 +934,9 @@ class TheilSenRegressor(BaseTransformer):
962
934
  transform_kwargs = dict(
963
935
  session = dataset._session,
964
936
  dependencies = self._deps,
965
- pass_through_cols = self._get_pass_through_columns(dataset),
966
- expected_output_cols_type = "array",
967
- n_neighbors = n_neighbors,
937
+ drop_input_cols = self._drop_input_cols,
938
+ expected_output_cols_type="array",
939
+ n_neighbors = n_neighbors,
968
940
  return_distance = return_distance
969
941
  )
970
942
  elif isinstance(dataset, pd.DataFrame):
@@ -343,18 +343,24 @@ class TweedieRegressor(BaseTransformer):
343
343
  self._get_model_signatures(dataset)
344
344
  return self
345
345
 
346
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
347
- if self._drop_input_cols:
348
- return []
349
- else:
350
- return list(set(dataset.columns) - set(self.output_cols))
351
-
352
346
  def _batch_inference_validate_snowpark(
353
347
  self,
354
348
  dataset: DataFrame,
355
349
  inference_method: str,
356
350
  ) -> List[str]:
357
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
351
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
352
+ return the available package that exists in the snowflake anaconda channel
353
+
354
+ Args:
355
+ dataset: snowpark dataframe
356
+ inference_method: the inference method such as predict, score...
357
+
358
+ Raises:
359
+ SnowflakeMLException: If the estimator is not fitted, raise error
360
+ SnowflakeMLException: If the session is None, raise error
361
+
362
+ Returns:
363
+ A list of available package that exists in the snowflake anaconda channel
358
364
  """
359
365
  if not self._is_fitted:
360
366
  raise exceptions.SnowflakeMLException(
@@ -428,7 +434,7 @@ class TweedieRegressor(BaseTransformer):
428
434
  transform_kwargs = dict(
429
435
  session = dataset._session,
430
436
  dependencies = self._deps,
431
- pass_through_cols = self._get_pass_through_columns(dataset),
437
+ drop_input_cols = self._drop_input_cols,
432
438
  expected_output_cols_type = expected_type_inferred,
433
439
  )
434
440
 
@@ -488,16 +494,16 @@ class TweedieRegressor(BaseTransformer):
488
494
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
489
495
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
490
496
  # each row containing a list of values.
491
- expected_dtype = "ARRAY"
497
+ expected_dtype = "array"
492
498
 
493
499
  # If we were unable to assign a type to this transform in the factory, infer the type here.
494
500
  if expected_dtype == "":
495
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
501
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
496
502
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
497
- expected_dtype = "ARRAY"
498
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
503
+ expected_dtype = "array"
504
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
499
505
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
500
- expected_dtype = "ARRAY"
506
+ expected_dtype = "array"
501
507
  else:
502
508
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
503
509
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -515,7 +521,7 @@ class TweedieRegressor(BaseTransformer):
515
521
  transform_kwargs = dict(
516
522
  session = dataset._session,
517
523
  dependencies = self._deps,
518
- pass_through_cols = self._get_pass_through_columns(dataset),
524
+ drop_input_cols = self._drop_input_cols,
519
525
  expected_output_cols_type = expected_dtype,
520
526
  )
521
527
 
@@ -566,7 +572,7 @@ class TweedieRegressor(BaseTransformer):
566
572
  subproject=_SUBPROJECT,
567
573
  )
568
574
  output_result, fitted_estimator = model_trainer.train_fit_predict(
569
- pass_through_columns=self._get_pass_through_columns(dataset),
575
+ drop_input_cols=self._drop_input_cols,
570
576
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
571
577
  )
572
578
  self._sklearn_object = fitted_estimator
@@ -584,44 +590,6 @@ class TweedieRegressor(BaseTransformer):
584
590
  assert self._sklearn_object is not None
585
591
  return self._sklearn_object.embedding_
586
592
 
587
-
588
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
589
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
590
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
591
- """
592
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
593
- if output_cols:
594
- output_cols = [
595
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
596
- for c in output_cols
597
- ]
598
- elif getattr(self._sklearn_object, "classes_", None) is None:
599
- output_cols = [output_cols_prefix]
600
- elif self._sklearn_object is not None:
601
- classes = self._sklearn_object.classes_
602
- if isinstance(classes, numpy.ndarray):
603
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
604
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
605
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
606
- output_cols = []
607
- for i, cl in enumerate(classes):
608
- # For binary classification, there is only one output column for each class
609
- # ndarray as the two classes are complementary.
610
- if len(cl) == 2:
611
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
612
- else:
613
- output_cols.extend([
614
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
615
- ])
616
- else:
617
- output_cols = []
618
-
619
- # Make sure column names are valid snowflake identifiers.
620
- assert output_cols is not None # Make MyPy happy
621
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
622
-
623
- return rv
624
-
625
593
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
626
594
  @telemetry.send_api_usage_telemetry(
627
595
  project=_PROJECT,
@@ -661,7 +629,7 @@ class TweedieRegressor(BaseTransformer):
661
629
  transform_kwargs = dict(
662
630
  session=dataset._session,
663
631
  dependencies=self._deps,
664
- pass_through_cols=self._get_pass_through_columns(dataset),
632
+ drop_input_cols = self._drop_input_cols,
665
633
  expected_output_cols_type="float",
666
634
  )
667
635
 
@@ -726,7 +694,7 @@ class TweedieRegressor(BaseTransformer):
726
694
  transform_kwargs = dict(
727
695
  session=dataset._session,
728
696
  dependencies=self._deps,
729
- pass_through_cols=self._get_pass_through_columns(dataset),
697
+ drop_input_cols = self._drop_input_cols,
730
698
  expected_output_cols_type="float",
731
699
  )
732
700
  elif isinstance(dataset, pd.DataFrame):
@@ -787,7 +755,7 @@ class TweedieRegressor(BaseTransformer):
787
755
  transform_kwargs = dict(
788
756
  session=dataset._session,
789
757
  dependencies=self._deps,
790
- pass_through_cols=self._get_pass_through_columns(dataset),
758
+ drop_input_cols = self._drop_input_cols,
791
759
  expected_output_cols_type="float",
792
760
  )
793
761
 
@@ -852,7 +820,7 @@ class TweedieRegressor(BaseTransformer):
852
820
  transform_kwargs = dict(
853
821
  session=dataset._session,
854
822
  dependencies=self._deps,
855
- pass_through_cols=self._get_pass_through_columns(dataset),
823
+ drop_input_cols = self._drop_input_cols,
856
824
  expected_output_cols_type="float",
857
825
  )
858
826
 
@@ -908,13 +876,17 @@ class TweedieRegressor(BaseTransformer):
908
876
  transform_kwargs: ScoreKwargsTypedDict = dict()
909
877
 
910
878
  if isinstance(dataset, DataFrame):
879
+ self._deps = self._batch_inference_validate_snowpark(
880
+ dataset=dataset,
881
+ inference_method="score",
882
+ )
911
883
  selected_cols = self._get_active_columns()
912
884
  if len(selected_cols) > 0:
913
885
  dataset = dataset.select(selected_cols)
914
886
  assert isinstance(dataset._session, Session) # keep mypy happy
915
887
  transform_kwargs = dict(
916
888
  session=dataset._session,
917
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
889
+ dependencies=["snowflake-snowpark-python"] + self._deps,
918
890
  score_sproc_imports=['sklearn'],
919
891
  )
920
892
  elif isinstance(dataset, pd.DataFrame):
@@ -988,9 +960,9 @@ class TweedieRegressor(BaseTransformer):
988
960
  transform_kwargs = dict(
989
961
  session = dataset._session,
990
962
  dependencies = self._deps,
991
- pass_through_cols = self._get_pass_through_columns(dataset),
992
- expected_output_cols_type = "array",
993
- n_neighbors = n_neighbors,
963
+ drop_input_cols = self._drop_input_cols,
964
+ expected_output_cols_type="array",
965
+ n_neighbors = n_neighbors,
994
966
  return_distance = return_distance
995
967
  )
996
968
  elif isinstance(dataset, pd.DataFrame):
@@ -339,18 +339,24 @@ class Isomap(BaseTransformer):
339
339
  self._get_model_signatures(dataset)
340
340
  return self
341
341
 
342
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
343
- if self._drop_input_cols:
344
- return []
345
- else:
346
- return list(set(dataset.columns) - set(self.output_cols))
347
-
348
342
  def _batch_inference_validate_snowpark(
349
343
  self,
350
344
  dataset: DataFrame,
351
345
  inference_method: str,
352
346
  ) -> List[str]:
353
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
347
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
348
+ return the available package that exists in the snowflake anaconda channel
349
+
350
+ Args:
351
+ dataset: snowpark dataframe
352
+ inference_method: the inference method such as predict, score...
353
+
354
+ Raises:
355
+ SnowflakeMLException: If the estimator is not fitted, raise error
356
+ SnowflakeMLException: If the session is None, raise error
357
+
358
+ Returns:
359
+ A list of available package that exists in the snowflake anaconda channel
354
360
  """
355
361
  if not self._is_fitted:
356
362
  raise exceptions.SnowflakeMLException(
@@ -422,7 +428,7 @@ class Isomap(BaseTransformer):
422
428
  transform_kwargs = dict(
423
429
  session = dataset._session,
424
430
  dependencies = self._deps,
425
- pass_through_cols = self._get_pass_through_columns(dataset),
431
+ drop_input_cols = self._drop_input_cols,
426
432
  expected_output_cols_type = expected_type_inferred,
427
433
  )
428
434
 
@@ -484,16 +490,16 @@ class Isomap(BaseTransformer):
484
490
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
485
491
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
486
492
  # each row containing a list of values.
487
- expected_dtype = "ARRAY"
493
+ expected_dtype = "array"
488
494
 
489
495
  # If we were unable to assign a type to this transform in the factory, infer the type here.
490
496
  if expected_dtype == "":
491
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
497
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
492
498
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
493
- expected_dtype = "ARRAY"
494
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
499
+ expected_dtype = "array"
500
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
495
501
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
496
- expected_dtype = "ARRAY"
502
+ expected_dtype = "array"
497
503
  else:
498
504
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
499
505
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -511,7 +517,7 @@ class Isomap(BaseTransformer):
511
517
  transform_kwargs = dict(
512
518
  session = dataset._session,
513
519
  dependencies = self._deps,
514
- pass_through_cols = self._get_pass_through_columns(dataset),
520
+ drop_input_cols = self._drop_input_cols,
515
521
  expected_output_cols_type = expected_dtype,
516
522
  )
517
523
 
@@ -562,7 +568,7 @@ class Isomap(BaseTransformer):
562
568
  subproject=_SUBPROJECT,
563
569
  )
564
570
  output_result, fitted_estimator = model_trainer.train_fit_predict(
565
- pass_through_columns=self._get_pass_through_columns(dataset),
571
+ drop_input_cols=self._drop_input_cols,
566
572
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
567
573
  )
568
574
  self._sklearn_object = fitted_estimator
@@ -580,44 +586,6 @@ class Isomap(BaseTransformer):
580
586
  assert self._sklearn_object is not None
581
587
  return self._sklearn_object.embedding_
582
588
 
583
-
584
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
585
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
586
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
587
- """
588
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
589
- if output_cols:
590
- output_cols = [
591
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
592
- for c in output_cols
593
- ]
594
- elif getattr(self._sklearn_object, "classes_", None) is None:
595
- output_cols = [output_cols_prefix]
596
- elif self._sklearn_object is not None:
597
- classes = self._sklearn_object.classes_
598
- if isinstance(classes, numpy.ndarray):
599
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
600
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
601
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
602
- output_cols = []
603
- for i, cl in enumerate(classes):
604
- # For binary classification, there is only one output column for each class
605
- # ndarray as the two classes are complementary.
606
- if len(cl) == 2:
607
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
608
- else:
609
- output_cols.extend([
610
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
611
- ])
612
- else:
613
- output_cols = []
614
-
615
- # Make sure column names are valid snowflake identifiers.
616
- assert output_cols is not None # Make MyPy happy
617
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
618
-
619
- return rv
620
-
621
589
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
622
590
  @telemetry.send_api_usage_telemetry(
623
591
  project=_PROJECT,
@@ -657,7 +625,7 @@ class Isomap(BaseTransformer):
657
625
  transform_kwargs = dict(
658
626
  session=dataset._session,
659
627
  dependencies=self._deps,
660
- pass_through_cols=self._get_pass_through_columns(dataset),
628
+ drop_input_cols = self._drop_input_cols,
661
629
  expected_output_cols_type="float",
662
630
  )
663
631
 
@@ -722,7 +690,7 @@ class Isomap(BaseTransformer):
722
690
  transform_kwargs = dict(
723
691
  session=dataset._session,
724
692
  dependencies=self._deps,
725
- pass_through_cols=self._get_pass_through_columns(dataset),
693
+ drop_input_cols = self._drop_input_cols,
726
694
  expected_output_cols_type="float",
727
695
  )
728
696
  elif isinstance(dataset, pd.DataFrame):
@@ -783,7 +751,7 @@ class Isomap(BaseTransformer):
783
751
  transform_kwargs = dict(
784
752
  session=dataset._session,
785
753
  dependencies=self._deps,
786
- pass_through_cols=self._get_pass_through_columns(dataset),
754
+ drop_input_cols = self._drop_input_cols,
787
755
  expected_output_cols_type="float",
788
756
  )
789
757
 
@@ -848,7 +816,7 @@ class Isomap(BaseTransformer):
848
816
  transform_kwargs = dict(
849
817
  session=dataset._session,
850
818
  dependencies=self._deps,
851
- pass_through_cols=self._get_pass_through_columns(dataset),
819
+ drop_input_cols = self._drop_input_cols,
852
820
  expected_output_cols_type="float",
853
821
  )
854
822
 
@@ -902,13 +870,17 @@ class Isomap(BaseTransformer):
902
870
  transform_kwargs: ScoreKwargsTypedDict = dict()
903
871
 
904
872
  if isinstance(dataset, DataFrame):
873
+ self._deps = self._batch_inference_validate_snowpark(
874
+ dataset=dataset,
875
+ inference_method="score",
876
+ )
905
877
  selected_cols = self._get_active_columns()
906
878
  if len(selected_cols) > 0:
907
879
  dataset = dataset.select(selected_cols)
908
880
  assert isinstance(dataset._session, Session) # keep mypy happy
909
881
  transform_kwargs = dict(
910
882
  session=dataset._session,
911
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
883
+ dependencies=["snowflake-snowpark-python"] + self._deps,
912
884
  score_sproc_imports=['sklearn'],
913
885
  )
914
886
  elif isinstance(dataset, pd.DataFrame):
@@ -982,9 +954,9 @@ class Isomap(BaseTransformer):
982
954
  transform_kwargs = dict(
983
955
  session = dataset._session,
984
956
  dependencies = self._deps,
985
- pass_through_cols = self._get_pass_through_columns(dataset),
986
- expected_output_cols_type = "array",
987
- n_neighbors = n_neighbors,
957
+ drop_input_cols = self._drop_input_cols,
958
+ expected_output_cols_type="array",
959
+ n_neighbors = n_neighbors,
988
960
  return_distance = return_distance
989
961
  )
990
962
  elif isinstance(dataset, pd.DataFrame):