snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -322,18 +322,24 @@ class Lars(BaseTransformer):
322
322
  self._get_model_signatures(dataset)
323
323
  return self
324
324
 
325
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
326
- if self._drop_input_cols:
327
- return []
328
- else:
329
- return list(set(dataset.columns) - set(self.output_cols))
330
-
331
325
  def _batch_inference_validate_snowpark(
332
326
  self,
333
327
  dataset: DataFrame,
334
328
  inference_method: str,
335
329
  ) -> List[str]:
336
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
330
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
331
+ return the available package that exists in the snowflake anaconda channel
332
+
333
+ Args:
334
+ dataset: snowpark dataframe
335
+ inference_method: the inference method such as predict, score...
336
+
337
+ Raises:
338
+ SnowflakeMLException: If the estimator is not fitted, raise error
339
+ SnowflakeMLException: If the session is None, raise error
340
+
341
+ Returns:
342
+ A list of available package that exists in the snowflake anaconda channel
337
343
  """
338
344
  if not self._is_fitted:
339
345
  raise exceptions.SnowflakeMLException(
@@ -407,7 +413,7 @@ class Lars(BaseTransformer):
407
413
  transform_kwargs = dict(
408
414
  session = dataset._session,
409
415
  dependencies = self._deps,
410
- pass_through_cols = self._get_pass_through_columns(dataset),
416
+ drop_input_cols = self._drop_input_cols,
411
417
  expected_output_cols_type = expected_type_inferred,
412
418
  )
413
419
 
@@ -467,16 +473,16 @@ class Lars(BaseTransformer):
467
473
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
468
474
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
469
475
  # each row containing a list of values.
470
- expected_dtype = "ARRAY"
476
+ expected_dtype = "array"
471
477
 
472
478
  # If we were unable to assign a type to this transform in the factory, infer the type here.
473
479
  if expected_dtype == "":
474
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
480
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
475
481
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
476
- expected_dtype = "ARRAY"
477
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
482
+ expected_dtype = "array"
483
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
478
484
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
479
- expected_dtype = "ARRAY"
485
+ expected_dtype = "array"
480
486
  else:
481
487
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
482
488
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -494,7 +500,7 @@ class Lars(BaseTransformer):
494
500
  transform_kwargs = dict(
495
501
  session = dataset._session,
496
502
  dependencies = self._deps,
497
- pass_through_cols = self._get_pass_through_columns(dataset),
503
+ drop_input_cols = self._drop_input_cols,
498
504
  expected_output_cols_type = expected_dtype,
499
505
  )
500
506
 
@@ -545,7 +551,7 @@ class Lars(BaseTransformer):
545
551
  subproject=_SUBPROJECT,
546
552
  )
547
553
  output_result, fitted_estimator = model_trainer.train_fit_predict(
548
- pass_through_columns=self._get_pass_through_columns(dataset),
554
+ drop_input_cols=self._drop_input_cols,
549
555
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
550
556
  )
551
557
  self._sklearn_object = fitted_estimator
@@ -563,44 +569,6 @@ class Lars(BaseTransformer):
563
569
  assert self._sklearn_object is not None
564
570
  return self._sklearn_object.embedding_
565
571
 
566
-
567
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
568
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
569
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
570
- """
571
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
572
- if output_cols:
573
- output_cols = [
574
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
575
- for c in output_cols
576
- ]
577
- elif getattr(self._sklearn_object, "classes_", None) is None:
578
- output_cols = [output_cols_prefix]
579
- elif self._sklearn_object is not None:
580
- classes = self._sklearn_object.classes_
581
- if isinstance(classes, numpy.ndarray):
582
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
583
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
584
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
585
- output_cols = []
586
- for i, cl in enumerate(classes):
587
- # For binary classification, there is only one output column for each class
588
- # ndarray as the two classes are complementary.
589
- if len(cl) == 2:
590
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
591
- else:
592
- output_cols.extend([
593
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
594
- ])
595
- else:
596
- output_cols = []
597
-
598
- # Make sure column names are valid snowflake identifiers.
599
- assert output_cols is not None # Make MyPy happy
600
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
601
-
602
- return rv
603
-
604
572
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
605
573
  @telemetry.send_api_usage_telemetry(
606
574
  project=_PROJECT,
@@ -640,7 +608,7 @@ class Lars(BaseTransformer):
640
608
  transform_kwargs = dict(
641
609
  session=dataset._session,
642
610
  dependencies=self._deps,
643
- pass_through_cols=self._get_pass_through_columns(dataset),
611
+ drop_input_cols = self._drop_input_cols,
644
612
  expected_output_cols_type="float",
645
613
  )
646
614
 
@@ -705,7 +673,7 @@ class Lars(BaseTransformer):
705
673
  transform_kwargs = dict(
706
674
  session=dataset._session,
707
675
  dependencies=self._deps,
708
- pass_through_cols=self._get_pass_through_columns(dataset),
676
+ drop_input_cols = self._drop_input_cols,
709
677
  expected_output_cols_type="float",
710
678
  )
711
679
  elif isinstance(dataset, pd.DataFrame):
@@ -766,7 +734,7 @@ class Lars(BaseTransformer):
766
734
  transform_kwargs = dict(
767
735
  session=dataset._session,
768
736
  dependencies=self._deps,
769
- pass_through_cols=self._get_pass_through_columns(dataset),
737
+ drop_input_cols = self._drop_input_cols,
770
738
  expected_output_cols_type="float",
771
739
  )
772
740
 
@@ -831,7 +799,7 @@ class Lars(BaseTransformer):
831
799
  transform_kwargs = dict(
832
800
  session=dataset._session,
833
801
  dependencies=self._deps,
834
- pass_through_cols=self._get_pass_through_columns(dataset),
802
+ drop_input_cols = self._drop_input_cols,
835
803
  expected_output_cols_type="float",
836
804
  )
837
805
 
@@ -887,13 +855,17 @@ class Lars(BaseTransformer):
887
855
  transform_kwargs: ScoreKwargsTypedDict = dict()
888
856
 
889
857
  if isinstance(dataset, DataFrame):
858
+ self._deps = self._batch_inference_validate_snowpark(
859
+ dataset=dataset,
860
+ inference_method="score",
861
+ )
890
862
  selected_cols = self._get_active_columns()
891
863
  if len(selected_cols) > 0:
892
864
  dataset = dataset.select(selected_cols)
893
865
  assert isinstance(dataset._session, Session) # keep mypy happy
894
866
  transform_kwargs = dict(
895
867
  session=dataset._session,
896
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
868
+ dependencies=["snowflake-snowpark-python"] + self._deps,
897
869
  score_sproc_imports=['sklearn'],
898
870
  )
899
871
  elif isinstance(dataset, pd.DataFrame):
@@ -967,9 +939,9 @@ class Lars(BaseTransformer):
967
939
  transform_kwargs = dict(
968
940
  session = dataset._session,
969
941
  dependencies = self._deps,
970
- pass_through_cols = self._get_pass_through_columns(dataset),
971
- expected_output_cols_type = "array",
972
- n_neighbors = n_neighbors,
942
+ drop_input_cols = self._drop_input_cols,
943
+ expected_output_cols_type="array",
944
+ n_neighbors = n_neighbors,
973
945
  return_distance = return_distance
974
946
  )
975
947
  elif isinstance(dataset, pd.DataFrame):
@@ -330,18 +330,24 @@ class LarsCV(BaseTransformer):
330
330
  self._get_model_signatures(dataset)
331
331
  return self
332
332
 
333
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
334
- if self._drop_input_cols:
335
- return []
336
- else:
337
- return list(set(dataset.columns) - set(self.output_cols))
338
-
339
333
  def _batch_inference_validate_snowpark(
340
334
  self,
341
335
  dataset: DataFrame,
342
336
  inference_method: str,
343
337
  ) -> List[str]:
344
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
338
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
339
+ return the available package that exists in the snowflake anaconda channel
340
+
341
+ Args:
342
+ dataset: snowpark dataframe
343
+ inference_method: the inference method such as predict, score...
344
+
345
+ Raises:
346
+ SnowflakeMLException: If the estimator is not fitted, raise error
347
+ SnowflakeMLException: If the session is None, raise error
348
+
349
+ Returns:
350
+ A list of available package that exists in the snowflake anaconda channel
345
351
  """
346
352
  if not self._is_fitted:
347
353
  raise exceptions.SnowflakeMLException(
@@ -415,7 +421,7 @@ class LarsCV(BaseTransformer):
415
421
  transform_kwargs = dict(
416
422
  session = dataset._session,
417
423
  dependencies = self._deps,
418
- pass_through_cols = self._get_pass_through_columns(dataset),
424
+ drop_input_cols = self._drop_input_cols,
419
425
  expected_output_cols_type = expected_type_inferred,
420
426
  )
421
427
 
@@ -475,16 +481,16 @@ class LarsCV(BaseTransformer):
475
481
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
476
482
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
477
483
  # each row containing a list of values.
478
- expected_dtype = "ARRAY"
484
+ expected_dtype = "array"
479
485
 
480
486
  # If we were unable to assign a type to this transform in the factory, infer the type here.
481
487
  if expected_dtype == "":
482
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
488
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
483
489
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
484
- expected_dtype = "ARRAY"
485
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
490
+ expected_dtype = "array"
491
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
486
492
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
487
- expected_dtype = "ARRAY"
493
+ expected_dtype = "array"
488
494
  else:
489
495
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
490
496
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -502,7 +508,7 @@ class LarsCV(BaseTransformer):
502
508
  transform_kwargs = dict(
503
509
  session = dataset._session,
504
510
  dependencies = self._deps,
505
- pass_through_cols = self._get_pass_through_columns(dataset),
511
+ drop_input_cols = self._drop_input_cols,
506
512
  expected_output_cols_type = expected_dtype,
507
513
  )
508
514
 
@@ -553,7 +559,7 @@ class LarsCV(BaseTransformer):
553
559
  subproject=_SUBPROJECT,
554
560
  )
555
561
  output_result, fitted_estimator = model_trainer.train_fit_predict(
556
- pass_through_columns=self._get_pass_through_columns(dataset),
562
+ drop_input_cols=self._drop_input_cols,
557
563
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
558
564
  )
559
565
  self._sklearn_object = fitted_estimator
@@ -571,44 +577,6 @@ class LarsCV(BaseTransformer):
571
577
  assert self._sklearn_object is not None
572
578
  return self._sklearn_object.embedding_
573
579
 
574
-
575
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
576
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
577
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
578
- """
579
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
580
- if output_cols:
581
- output_cols = [
582
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
583
- for c in output_cols
584
- ]
585
- elif getattr(self._sklearn_object, "classes_", None) is None:
586
- output_cols = [output_cols_prefix]
587
- elif self._sklearn_object is not None:
588
- classes = self._sklearn_object.classes_
589
- if isinstance(classes, numpy.ndarray):
590
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
591
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
592
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
593
- output_cols = []
594
- for i, cl in enumerate(classes):
595
- # For binary classification, there is only one output column for each class
596
- # ndarray as the two classes are complementary.
597
- if len(cl) == 2:
598
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
599
- else:
600
- output_cols.extend([
601
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
602
- ])
603
- else:
604
- output_cols = []
605
-
606
- # Make sure column names are valid snowflake identifiers.
607
- assert output_cols is not None # Make MyPy happy
608
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
609
-
610
- return rv
611
-
612
580
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
613
581
  @telemetry.send_api_usage_telemetry(
614
582
  project=_PROJECT,
@@ -648,7 +616,7 @@ class LarsCV(BaseTransformer):
648
616
  transform_kwargs = dict(
649
617
  session=dataset._session,
650
618
  dependencies=self._deps,
651
- pass_through_cols=self._get_pass_through_columns(dataset),
619
+ drop_input_cols = self._drop_input_cols,
652
620
  expected_output_cols_type="float",
653
621
  )
654
622
 
@@ -713,7 +681,7 @@ class LarsCV(BaseTransformer):
713
681
  transform_kwargs = dict(
714
682
  session=dataset._session,
715
683
  dependencies=self._deps,
716
- pass_through_cols=self._get_pass_through_columns(dataset),
684
+ drop_input_cols = self._drop_input_cols,
717
685
  expected_output_cols_type="float",
718
686
  )
719
687
  elif isinstance(dataset, pd.DataFrame):
@@ -774,7 +742,7 @@ class LarsCV(BaseTransformer):
774
742
  transform_kwargs = dict(
775
743
  session=dataset._session,
776
744
  dependencies=self._deps,
777
- pass_through_cols=self._get_pass_through_columns(dataset),
745
+ drop_input_cols = self._drop_input_cols,
778
746
  expected_output_cols_type="float",
779
747
  )
780
748
 
@@ -839,7 +807,7 @@ class LarsCV(BaseTransformer):
839
807
  transform_kwargs = dict(
840
808
  session=dataset._session,
841
809
  dependencies=self._deps,
842
- pass_through_cols=self._get_pass_through_columns(dataset),
810
+ drop_input_cols = self._drop_input_cols,
843
811
  expected_output_cols_type="float",
844
812
  )
845
813
 
@@ -895,13 +863,17 @@ class LarsCV(BaseTransformer):
895
863
  transform_kwargs: ScoreKwargsTypedDict = dict()
896
864
 
897
865
  if isinstance(dataset, DataFrame):
866
+ self._deps = self._batch_inference_validate_snowpark(
867
+ dataset=dataset,
868
+ inference_method="score",
869
+ )
898
870
  selected_cols = self._get_active_columns()
899
871
  if len(selected_cols) > 0:
900
872
  dataset = dataset.select(selected_cols)
901
873
  assert isinstance(dataset._session, Session) # keep mypy happy
902
874
  transform_kwargs = dict(
903
875
  session=dataset._session,
904
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
876
+ dependencies=["snowflake-snowpark-python"] + self._deps,
905
877
  score_sproc_imports=['sklearn'],
906
878
  )
907
879
  elif isinstance(dataset, pd.DataFrame):
@@ -975,9 +947,9 @@ class LarsCV(BaseTransformer):
975
947
  transform_kwargs = dict(
976
948
  session = dataset._session,
977
949
  dependencies = self._deps,
978
- pass_through_cols = self._get_pass_through_columns(dataset),
979
- expected_output_cols_type = "array",
980
- n_neighbors = n_neighbors,
950
+ drop_input_cols = self._drop_input_cols,
951
+ expected_output_cols_type="array",
952
+ n_neighbors = n_neighbors,
981
953
  return_distance = return_distance
982
954
  )
983
955
  elif isinstance(dataset, pd.DataFrame):
@@ -323,18 +323,24 @@ class Lasso(BaseTransformer):
323
323
  self._get_model_signatures(dataset)
324
324
  return self
325
325
 
326
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
327
- if self._drop_input_cols:
328
- return []
329
- else:
330
- return list(set(dataset.columns) - set(self.output_cols))
331
-
332
326
  def _batch_inference_validate_snowpark(
333
327
  self,
334
328
  dataset: DataFrame,
335
329
  inference_method: str,
336
330
  ) -> List[str]:
337
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
331
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
332
+ return the available package that exists in the snowflake anaconda channel
333
+
334
+ Args:
335
+ dataset: snowpark dataframe
336
+ inference_method: the inference method such as predict, score...
337
+
338
+ Raises:
339
+ SnowflakeMLException: If the estimator is not fitted, raise error
340
+ SnowflakeMLException: If the session is None, raise error
341
+
342
+ Returns:
343
+ A list of available package that exists in the snowflake anaconda channel
338
344
  """
339
345
  if not self._is_fitted:
340
346
  raise exceptions.SnowflakeMLException(
@@ -408,7 +414,7 @@ class Lasso(BaseTransformer):
408
414
  transform_kwargs = dict(
409
415
  session = dataset._session,
410
416
  dependencies = self._deps,
411
- pass_through_cols = self._get_pass_through_columns(dataset),
417
+ drop_input_cols = self._drop_input_cols,
412
418
  expected_output_cols_type = expected_type_inferred,
413
419
  )
414
420
 
@@ -468,16 +474,16 @@ class Lasso(BaseTransformer):
468
474
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
469
475
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
470
476
  # each row containing a list of values.
471
- expected_dtype = "ARRAY"
477
+ expected_dtype = "array"
472
478
 
473
479
  # If we were unable to assign a type to this transform in the factory, infer the type here.
474
480
  if expected_dtype == "":
475
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
481
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
476
482
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
477
- expected_dtype = "ARRAY"
478
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
483
+ expected_dtype = "array"
484
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
479
485
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
480
- expected_dtype = "ARRAY"
486
+ expected_dtype = "array"
481
487
  else:
482
488
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
483
489
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -495,7 +501,7 @@ class Lasso(BaseTransformer):
495
501
  transform_kwargs = dict(
496
502
  session = dataset._session,
497
503
  dependencies = self._deps,
498
- pass_through_cols = self._get_pass_through_columns(dataset),
504
+ drop_input_cols = self._drop_input_cols,
499
505
  expected_output_cols_type = expected_dtype,
500
506
  )
501
507
 
@@ -546,7 +552,7 @@ class Lasso(BaseTransformer):
546
552
  subproject=_SUBPROJECT,
547
553
  )
548
554
  output_result, fitted_estimator = model_trainer.train_fit_predict(
549
- pass_through_columns=self._get_pass_through_columns(dataset),
555
+ drop_input_cols=self._drop_input_cols,
550
556
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
551
557
  )
552
558
  self._sklearn_object = fitted_estimator
@@ -564,44 +570,6 @@ class Lasso(BaseTransformer):
564
570
  assert self._sklearn_object is not None
565
571
  return self._sklearn_object.embedding_
566
572
 
567
-
568
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
569
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
570
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
571
- """
572
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
573
- if output_cols:
574
- output_cols = [
575
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
576
- for c in output_cols
577
- ]
578
- elif getattr(self._sklearn_object, "classes_", None) is None:
579
- output_cols = [output_cols_prefix]
580
- elif self._sklearn_object is not None:
581
- classes = self._sklearn_object.classes_
582
- if isinstance(classes, numpy.ndarray):
583
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
584
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
585
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
586
- output_cols = []
587
- for i, cl in enumerate(classes):
588
- # For binary classification, there is only one output column for each class
589
- # ndarray as the two classes are complementary.
590
- if len(cl) == 2:
591
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
592
- else:
593
- output_cols.extend([
594
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
595
- ])
596
- else:
597
- output_cols = []
598
-
599
- # Make sure column names are valid snowflake identifiers.
600
- assert output_cols is not None # Make MyPy happy
601
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
602
-
603
- return rv
604
-
605
573
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
606
574
  @telemetry.send_api_usage_telemetry(
607
575
  project=_PROJECT,
@@ -641,7 +609,7 @@ class Lasso(BaseTransformer):
641
609
  transform_kwargs = dict(
642
610
  session=dataset._session,
643
611
  dependencies=self._deps,
644
- pass_through_cols=self._get_pass_through_columns(dataset),
612
+ drop_input_cols = self._drop_input_cols,
645
613
  expected_output_cols_type="float",
646
614
  )
647
615
 
@@ -706,7 +674,7 @@ class Lasso(BaseTransformer):
706
674
  transform_kwargs = dict(
707
675
  session=dataset._session,
708
676
  dependencies=self._deps,
709
- pass_through_cols=self._get_pass_through_columns(dataset),
677
+ drop_input_cols = self._drop_input_cols,
710
678
  expected_output_cols_type="float",
711
679
  )
712
680
  elif isinstance(dataset, pd.DataFrame):
@@ -767,7 +735,7 @@ class Lasso(BaseTransformer):
767
735
  transform_kwargs = dict(
768
736
  session=dataset._session,
769
737
  dependencies=self._deps,
770
- pass_through_cols=self._get_pass_through_columns(dataset),
738
+ drop_input_cols = self._drop_input_cols,
771
739
  expected_output_cols_type="float",
772
740
  )
773
741
 
@@ -832,7 +800,7 @@ class Lasso(BaseTransformer):
832
800
  transform_kwargs = dict(
833
801
  session=dataset._session,
834
802
  dependencies=self._deps,
835
- pass_through_cols=self._get_pass_through_columns(dataset),
803
+ drop_input_cols = self._drop_input_cols,
836
804
  expected_output_cols_type="float",
837
805
  )
838
806
 
@@ -888,13 +856,17 @@ class Lasso(BaseTransformer):
888
856
  transform_kwargs: ScoreKwargsTypedDict = dict()
889
857
 
890
858
  if isinstance(dataset, DataFrame):
859
+ self._deps = self._batch_inference_validate_snowpark(
860
+ dataset=dataset,
861
+ inference_method="score",
862
+ )
891
863
  selected_cols = self._get_active_columns()
892
864
  if len(selected_cols) > 0:
893
865
  dataset = dataset.select(selected_cols)
894
866
  assert isinstance(dataset._session, Session) # keep mypy happy
895
867
  transform_kwargs = dict(
896
868
  session=dataset._session,
897
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
869
+ dependencies=["snowflake-snowpark-python"] + self._deps,
898
870
  score_sproc_imports=['sklearn'],
899
871
  )
900
872
  elif isinstance(dataset, pd.DataFrame):
@@ -968,9 +940,9 @@ class Lasso(BaseTransformer):
968
940
  transform_kwargs = dict(
969
941
  session = dataset._session,
970
942
  dependencies = self._deps,
971
- pass_through_cols = self._get_pass_through_columns(dataset),
972
- expected_output_cols_type = "array",
973
- n_neighbors = n_neighbors,
943
+ drop_input_cols = self._drop_input_cols,
944
+ expected_output_cols_type="array",
945
+ n_neighbors = n_neighbors,
974
946
  return_distance = return_distance
975
947
  )
976
948
  elif isinstance(dataset, pd.DataFrame):