snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -327,18 +327,24 @@ class LassoLarsIC(BaseTransformer):
327
327
  self._get_model_signatures(dataset)
328
328
  return self
329
329
 
330
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
331
- if self._drop_input_cols:
332
- return []
333
- else:
334
- return list(set(dataset.columns) - set(self.output_cols))
335
-
336
330
  def _batch_inference_validate_snowpark(
337
331
  self,
338
332
  dataset: DataFrame,
339
333
  inference_method: str,
340
334
  ) -> List[str]:
341
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
335
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
336
+ return the available package that exists in the snowflake anaconda channel
337
+
338
+ Args:
339
+ dataset: snowpark dataframe
340
+ inference_method: the inference method such as predict, score...
341
+
342
+ Raises:
343
+ SnowflakeMLException: If the estimator is not fitted, raise error
344
+ SnowflakeMLException: If the session is None, raise error
345
+
346
+ Returns:
347
+ A list of available package that exists in the snowflake anaconda channel
342
348
  """
343
349
  if not self._is_fitted:
344
350
  raise exceptions.SnowflakeMLException(
@@ -412,7 +418,7 @@ class LassoLarsIC(BaseTransformer):
412
418
  transform_kwargs = dict(
413
419
  session = dataset._session,
414
420
  dependencies = self._deps,
415
- pass_through_cols = self._get_pass_through_columns(dataset),
421
+ drop_input_cols = self._drop_input_cols,
416
422
  expected_output_cols_type = expected_type_inferred,
417
423
  )
418
424
 
@@ -472,16 +478,16 @@ class LassoLarsIC(BaseTransformer):
472
478
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
473
479
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
474
480
  # each row containing a list of values.
475
- expected_dtype = "ARRAY"
481
+ expected_dtype = "array"
476
482
 
477
483
  # If we were unable to assign a type to this transform in the factory, infer the type here.
478
484
  if expected_dtype == "":
479
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
485
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
480
486
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
481
- expected_dtype = "ARRAY"
482
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
487
+ expected_dtype = "array"
488
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
483
489
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
484
- expected_dtype = "ARRAY"
490
+ expected_dtype = "array"
485
491
  else:
486
492
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
487
493
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -499,7 +505,7 @@ class LassoLarsIC(BaseTransformer):
499
505
  transform_kwargs = dict(
500
506
  session = dataset._session,
501
507
  dependencies = self._deps,
502
- pass_through_cols = self._get_pass_through_columns(dataset),
508
+ drop_input_cols = self._drop_input_cols,
503
509
  expected_output_cols_type = expected_dtype,
504
510
  )
505
511
 
@@ -550,7 +556,7 @@ class LassoLarsIC(BaseTransformer):
550
556
  subproject=_SUBPROJECT,
551
557
  )
552
558
  output_result, fitted_estimator = model_trainer.train_fit_predict(
553
- pass_through_columns=self._get_pass_through_columns(dataset),
559
+ drop_input_cols=self._drop_input_cols,
554
560
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
555
561
  )
556
562
  self._sklearn_object = fitted_estimator
@@ -568,44 +574,6 @@ class LassoLarsIC(BaseTransformer):
568
574
  assert self._sklearn_object is not None
569
575
  return self._sklearn_object.embedding_
570
576
 
571
-
572
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
573
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
574
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
575
- """
576
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
577
- if output_cols:
578
- output_cols = [
579
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
580
- for c in output_cols
581
- ]
582
- elif getattr(self._sklearn_object, "classes_", None) is None:
583
- output_cols = [output_cols_prefix]
584
- elif self._sklearn_object is not None:
585
- classes = self._sklearn_object.classes_
586
- if isinstance(classes, numpy.ndarray):
587
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
588
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
589
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
590
- output_cols = []
591
- for i, cl in enumerate(classes):
592
- # For binary classification, there is only one output column for each class
593
- # ndarray as the two classes are complementary.
594
- if len(cl) == 2:
595
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
596
- else:
597
- output_cols.extend([
598
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
599
- ])
600
- else:
601
- output_cols = []
602
-
603
- # Make sure column names are valid snowflake identifiers.
604
- assert output_cols is not None # Make MyPy happy
605
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
606
-
607
- return rv
608
-
609
577
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
610
578
  @telemetry.send_api_usage_telemetry(
611
579
  project=_PROJECT,
@@ -645,7 +613,7 @@ class LassoLarsIC(BaseTransformer):
645
613
  transform_kwargs = dict(
646
614
  session=dataset._session,
647
615
  dependencies=self._deps,
648
- pass_through_cols=self._get_pass_through_columns(dataset),
616
+ drop_input_cols = self._drop_input_cols,
649
617
  expected_output_cols_type="float",
650
618
  )
651
619
 
@@ -710,7 +678,7 @@ class LassoLarsIC(BaseTransformer):
710
678
  transform_kwargs = dict(
711
679
  session=dataset._session,
712
680
  dependencies=self._deps,
713
- pass_through_cols=self._get_pass_through_columns(dataset),
681
+ drop_input_cols = self._drop_input_cols,
714
682
  expected_output_cols_type="float",
715
683
  )
716
684
  elif isinstance(dataset, pd.DataFrame):
@@ -771,7 +739,7 @@ class LassoLarsIC(BaseTransformer):
771
739
  transform_kwargs = dict(
772
740
  session=dataset._session,
773
741
  dependencies=self._deps,
774
- pass_through_cols=self._get_pass_through_columns(dataset),
742
+ drop_input_cols = self._drop_input_cols,
775
743
  expected_output_cols_type="float",
776
744
  )
777
745
 
@@ -836,7 +804,7 @@ class LassoLarsIC(BaseTransformer):
836
804
  transform_kwargs = dict(
837
805
  session=dataset._session,
838
806
  dependencies=self._deps,
839
- pass_through_cols=self._get_pass_through_columns(dataset),
807
+ drop_input_cols = self._drop_input_cols,
840
808
  expected_output_cols_type="float",
841
809
  )
842
810
 
@@ -892,13 +860,17 @@ class LassoLarsIC(BaseTransformer):
892
860
  transform_kwargs: ScoreKwargsTypedDict = dict()
893
861
 
894
862
  if isinstance(dataset, DataFrame):
863
+ self._deps = self._batch_inference_validate_snowpark(
864
+ dataset=dataset,
865
+ inference_method="score",
866
+ )
895
867
  selected_cols = self._get_active_columns()
896
868
  if len(selected_cols) > 0:
897
869
  dataset = dataset.select(selected_cols)
898
870
  assert isinstance(dataset._session, Session) # keep mypy happy
899
871
  transform_kwargs = dict(
900
872
  session=dataset._session,
901
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
873
+ dependencies=["snowflake-snowpark-python"] + self._deps,
902
874
  score_sproc_imports=['sklearn'],
903
875
  )
904
876
  elif isinstance(dataset, pd.DataFrame):
@@ -972,9 +944,9 @@ class LassoLarsIC(BaseTransformer):
972
944
  transform_kwargs = dict(
973
945
  session = dataset._session,
974
946
  dependencies = self._deps,
975
- pass_through_cols = self._get_pass_through_columns(dataset),
976
- expected_output_cols_type = "array",
977
- n_neighbors = n_neighbors,
947
+ drop_input_cols = self._drop_input_cols,
948
+ expected_output_cols_type="array",
949
+ n_neighbors = n_neighbors,
978
950
  return_distance = return_distance
979
951
  )
980
952
  elif isinstance(dataset, pd.DataFrame):
@@ -280,18 +280,24 @@ class LinearRegression(BaseTransformer):
280
280
  self._get_model_signatures(dataset)
281
281
  return self
282
282
 
283
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
284
- if self._drop_input_cols:
285
- return []
286
- else:
287
- return list(set(dataset.columns) - set(self.output_cols))
288
-
289
283
  def _batch_inference_validate_snowpark(
290
284
  self,
291
285
  dataset: DataFrame,
292
286
  inference_method: str,
293
287
  ) -> List[str]:
294
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
288
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
289
+ return the available package that exists in the snowflake anaconda channel
290
+
291
+ Args:
292
+ dataset: snowpark dataframe
293
+ inference_method: the inference method such as predict, score...
294
+
295
+ Raises:
296
+ SnowflakeMLException: If the estimator is not fitted, raise error
297
+ SnowflakeMLException: If the session is None, raise error
298
+
299
+ Returns:
300
+ A list of available package that exists in the snowflake anaconda channel
295
301
  """
296
302
  if not self._is_fitted:
297
303
  raise exceptions.SnowflakeMLException(
@@ -365,7 +371,7 @@ class LinearRegression(BaseTransformer):
365
371
  transform_kwargs = dict(
366
372
  session = dataset._session,
367
373
  dependencies = self._deps,
368
- pass_through_cols = self._get_pass_through_columns(dataset),
374
+ drop_input_cols = self._drop_input_cols,
369
375
  expected_output_cols_type = expected_type_inferred,
370
376
  )
371
377
 
@@ -425,16 +431,16 @@ class LinearRegression(BaseTransformer):
425
431
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
426
432
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
427
433
  # each row containing a list of values.
428
- expected_dtype = "ARRAY"
434
+ expected_dtype = "array"
429
435
 
430
436
  # If we were unable to assign a type to this transform in the factory, infer the type here.
431
437
  if expected_dtype == "":
432
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
438
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
433
439
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
434
- expected_dtype = "ARRAY"
435
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
440
+ expected_dtype = "array"
441
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
436
442
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
437
- expected_dtype = "ARRAY"
443
+ expected_dtype = "array"
438
444
  else:
439
445
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
440
446
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -452,7 +458,7 @@ class LinearRegression(BaseTransformer):
452
458
  transform_kwargs = dict(
453
459
  session = dataset._session,
454
460
  dependencies = self._deps,
455
- pass_through_cols = self._get_pass_through_columns(dataset),
461
+ drop_input_cols = self._drop_input_cols,
456
462
  expected_output_cols_type = expected_dtype,
457
463
  )
458
464
 
@@ -503,7 +509,7 @@ class LinearRegression(BaseTransformer):
503
509
  subproject=_SUBPROJECT,
504
510
  )
505
511
  output_result, fitted_estimator = model_trainer.train_fit_predict(
506
- pass_through_columns=self._get_pass_through_columns(dataset),
512
+ drop_input_cols=self._drop_input_cols,
507
513
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
508
514
  )
509
515
  self._sklearn_object = fitted_estimator
@@ -521,44 +527,6 @@ class LinearRegression(BaseTransformer):
521
527
  assert self._sklearn_object is not None
522
528
  return self._sklearn_object.embedding_
523
529
 
524
-
525
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
526
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
527
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
528
- """
529
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
530
- if output_cols:
531
- output_cols = [
532
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
533
- for c in output_cols
534
- ]
535
- elif getattr(self._sklearn_object, "classes_", None) is None:
536
- output_cols = [output_cols_prefix]
537
- elif self._sklearn_object is not None:
538
- classes = self._sklearn_object.classes_
539
- if isinstance(classes, numpy.ndarray):
540
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
541
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
542
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
543
- output_cols = []
544
- for i, cl in enumerate(classes):
545
- # For binary classification, there is only one output column for each class
546
- # ndarray as the two classes are complementary.
547
- if len(cl) == 2:
548
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
549
- else:
550
- output_cols.extend([
551
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
552
- ])
553
- else:
554
- output_cols = []
555
-
556
- # Make sure column names are valid snowflake identifiers.
557
- assert output_cols is not None # Make MyPy happy
558
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
559
-
560
- return rv
561
-
562
530
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
563
531
  @telemetry.send_api_usage_telemetry(
564
532
  project=_PROJECT,
@@ -598,7 +566,7 @@ class LinearRegression(BaseTransformer):
598
566
  transform_kwargs = dict(
599
567
  session=dataset._session,
600
568
  dependencies=self._deps,
601
- pass_through_cols=self._get_pass_through_columns(dataset),
569
+ drop_input_cols = self._drop_input_cols,
602
570
  expected_output_cols_type="float",
603
571
  )
604
572
 
@@ -663,7 +631,7 @@ class LinearRegression(BaseTransformer):
663
631
  transform_kwargs = dict(
664
632
  session=dataset._session,
665
633
  dependencies=self._deps,
666
- pass_through_cols=self._get_pass_through_columns(dataset),
634
+ drop_input_cols = self._drop_input_cols,
667
635
  expected_output_cols_type="float",
668
636
  )
669
637
  elif isinstance(dataset, pd.DataFrame):
@@ -724,7 +692,7 @@ class LinearRegression(BaseTransformer):
724
692
  transform_kwargs = dict(
725
693
  session=dataset._session,
726
694
  dependencies=self._deps,
727
- pass_through_cols=self._get_pass_through_columns(dataset),
695
+ drop_input_cols = self._drop_input_cols,
728
696
  expected_output_cols_type="float",
729
697
  )
730
698
 
@@ -789,7 +757,7 @@ class LinearRegression(BaseTransformer):
789
757
  transform_kwargs = dict(
790
758
  session=dataset._session,
791
759
  dependencies=self._deps,
792
- pass_through_cols=self._get_pass_through_columns(dataset),
760
+ drop_input_cols = self._drop_input_cols,
793
761
  expected_output_cols_type="float",
794
762
  )
795
763
 
@@ -845,13 +813,17 @@ class LinearRegression(BaseTransformer):
845
813
  transform_kwargs: ScoreKwargsTypedDict = dict()
846
814
 
847
815
  if isinstance(dataset, DataFrame):
816
+ self._deps = self._batch_inference_validate_snowpark(
817
+ dataset=dataset,
818
+ inference_method="score",
819
+ )
848
820
  selected_cols = self._get_active_columns()
849
821
  if len(selected_cols) > 0:
850
822
  dataset = dataset.select(selected_cols)
851
823
  assert isinstance(dataset._session, Session) # keep mypy happy
852
824
  transform_kwargs = dict(
853
825
  session=dataset._session,
854
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
826
+ dependencies=["snowflake-snowpark-python"] + self._deps,
855
827
  score_sproc_imports=['sklearn'],
856
828
  )
857
829
  elif isinstance(dataset, pd.DataFrame):
@@ -925,9 +897,9 @@ class LinearRegression(BaseTransformer):
925
897
  transform_kwargs = dict(
926
898
  session = dataset._session,
927
899
  dependencies = self._deps,
928
- pass_through_cols = self._get_pass_through_columns(dataset),
929
- expected_output_cols_type = "array",
930
- n_neighbors = n_neighbors,
900
+ drop_input_cols = self._drop_input_cols,
901
+ expected_output_cols_type="array",
902
+ n_neighbors = n_neighbors,
931
903
  return_distance = return_distance
932
904
  )
933
905
  elif isinstance(dataset, pd.DataFrame):
@@ -394,18 +394,24 @@ class LogisticRegression(BaseTransformer):
394
394
  self._get_model_signatures(dataset)
395
395
  return self
396
396
 
397
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
398
- if self._drop_input_cols:
399
- return []
400
- else:
401
- return list(set(dataset.columns) - set(self.output_cols))
402
-
403
397
  def _batch_inference_validate_snowpark(
404
398
  self,
405
399
  dataset: DataFrame,
406
400
  inference_method: str,
407
401
  ) -> List[str]:
408
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
402
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
403
+ return the available package that exists in the snowflake anaconda channel
404
+
405
+ Args:
406
+ dataset: snowpark dataframe
407
+ inference_method: the inference method such as predict, score...
408
+
409
+ Raises:
410
+ SnowflakeMLException: If the estimator is not fitted, raise error
411
+ SnowflakeMLException: If the session is None, raise error
412
+
413
+ Returns:
414
+ A list of available package that exists in the snowflake anaconda channel
409
415
  """
410
416
  if not self._is_fitted:
411
417
  raise exceptions.SnowflakeMLException(
@@ -479,7 +485,7 @@ class LogisticRegression(BaseTransformer):
479
485
  transform_kwargs = dict(
480
486
  session = dataset._session,
481
487
  dependencies = self._deps,
482
- pass_through_cols = self._get_pass_through_columns(dataset),
488
+ drop_input_cols = self._drop_input_cols,
483
489
  expected_output_cols_type = expected_type_inferred,
484
490
  )
485
491
 
@@ -539,16 +545,16 @@ class LogisticRegression(BaseTransformer):
539
545
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
540
546
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
541
547
  # each row containing a list of values.
542
- expected_dtype = "ARRAY"
548
+ expected_dtype = "array"
543
549
 
544
550
  # If we were unable to assign a type to this transform in the factory, infer the type here.
545
551
  if expected_dtype == "":
546
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
552
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
547
553
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
548
- expected_dtype = "ARRAY"
549
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
554
+ expected_dtype = "array"
555
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
550
556
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
551
- expected_dtype = "ARRAY"
557
+ expected_dtype = "array"
552
558
  else:
553
559
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
554
560
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -566,7 +572,7 @@ class LogisticRegression(BaseTransformer):
566
572
  transform_kwargs = dict(
567
573
  session = dataset._session,
568
574
  dependencies = self._deps,
569
- pass_through_cols = self._get_pass_through_columns(dataset),
575
+ drop_input_cols = self._drop_input_cols,
570
576
  expected_output_cols_type = expected_dtype,
571
577
  )
572
578
 
@@ -617,7 +623,7 @@ class LogisticRegression(BaseTransformer):
617
623
  subproject=_SUBPROJECT,
618
624
  )
619
625
  output_result, fitted_estimator = model_trainer.train_fit_predict(
620
- pass_through_columns=self._get_pass_through_columns(dataset),
626
+ drop_input_cols=self._drop_input_cols,
621
627
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
622
628
  )
623
629
  self._sklearn_object = fitted_estimator
@@ -635,44 +641,6 @@ class LogisticRegression(BaseTransformer):
635
641
  assert self._sklearn_object is not None
636
642
  return self._sklearn_object.embedding_
637
643
 
638
-
639
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
640
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
641
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
642
- """
643
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
644
- if output_cols:
645
- output_cols = [
646
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
647
- for c in output_cols
648
- ]
649
- elif getattr(self._sklearn_object, "classes_", None) is None:
650
- output_cols = [output_cols_prefix]
651
- elif self._sklearn_object is not None:
652
- classes = self._sklearn_object.classes_
653
- if isinstance(classes, numpy.ndarray):
654
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
655
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
656
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
657
- output_cols = []
658
- for i, cl in enumerate(classes):
659
- # For binary classification, there is only one output column for each class
660
- # ndarray as the two classes are complementary.
661
- if len(cl) == 2:
662
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
663
- else:
664
- output_cols.extend([
665
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
666
- ])
667
- else:
668
- output_cols = []
669
-
670
- # Make sure column names are valid snowflake identifiers.
671
- assert output_cols is not None # Make MyPy happy
672
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
673
-
674
- return rv
675
-
676
644
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
677
645
  @telemetry.send_api_usage_telemetry(
678
646
  project=_PROJECT,
@@ -714,7 +682,7 @@ class LogisticRegression(BaseTransformer):
714
682
  transform_kwargs = dict(
715
683
  session=dataset._session,
716
684
  dependencies=self._deps,
717
- pass_through_cols=self._get_pass_through_columns(dataset),
685
+ drop_input_cols = self._drop_input_cols,
718
686
  expected_output_cols_type="float",
719
687
  )
720
688
 
@@ -781,7 +749,7 @@ class LogisticRegression(BaseTransformer):
781
749
  transform_kwargs = dict(
782
750
  session=dataset._session,
783
751
  dependencies=self._deps,
784
- pass_through_cols=self._get_pass_through_columns(dataset),
752
+ drop_input_cols = self._drop_input_cols,
785
753
  expected_output_cols_type="float",
786
754
  )
787
755
  elif isinstance(dataset, pd.DataFrame):
@@ -844,7 +812,7 @@ class LogisticRegression(BaseTransformer):
844
812
  transform_kwargs = dict(
845
813
  session=dataset._session,
846
814
  dependencies=self._deps,
847
- pass_through_cols=self._get_pass_through_columns(dataset),
815
+ drop_input_cols = self._drop_input_cols,
848
816
  expected_output_cols_type="float",
849
817
  )
850
818
 
@@ -909,7 +877,7 @@ class LogisticRegression(BaseTransformer):
909
877
  transform_kwargs = dict(
910
878
  session=dataset._session,
911
879
  dependencies=self._deps,
912
- pass_through_cols=self._get_pass_through_columns(dataset),
880
+ drop_input_cols = self._drop_input_cols,
913
881
  expected_output_cols_type="float",
914
882
  )
915
883
 
@@ -965,13 +933,17 @@ class LogisticRegression(BaseTransformer):
965
933
  transform_kwargs: ScoreKwargsTypedDict = dict()
966
934
 
967
935
  if isinstance(dataset, DataFrame):
936
+ self._deps = self._batch_inference_validate_snowpark(
937
+ dataset=dataset,
938
+ inference_method="score",
939
+ )
968
940
  selected_cols = self._get_active_columns()
969
941
  if len(selected_cols) > 0:
970
942
  dataset = dataset.select(selected_cols)
971
943
  assert isinstance(dataset._session, Session) # keep mypy happy
972
944
  transform_kwargs = dict(
973
945
  session=dataset._session,
974
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
946
+ dependencies=["snowflake-snowpark-python"] + self._deps,
975
947
  score_sproc_imports=['sklearn'],
976
948
  )
977
949
  elif isinstance(dataset, pd.DataFrame):
@@ -1045,9 +1017,9 @@ class LogisticRegression(BaseTransformer):
1045
1017
  transform_kwargs = dict(
1046
1018
  session = dataset._session,
1047
1019
  dependencies = self._deps,
1048
- pass_through_cols = self._get_pass_through_columns(dataset),
1049
- expected_output_cols_type = "array",
1050
- n_neighbors = n_neighbors,
1020
+ drop_input_cols = self._drop_input_cols,
1021
+ expected_output_cols_type="array",
1022
+ n_neighbors = n_neighbors,
1051
1023
  return_distance = return_distance
1052
1024
  )
1053
1025
  elif isinstance(dataset, pd.DataFrame):