snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. snowflake/ml/_internal/telemetry.py +142 -20
  2. snowflake/ml/_internal/utils/identifier.py +48 -11
  3. snowflake/ml/_internal/utils/snowflake_env.py +23 -13
  4. snowflake/ml/_internal/utils/sql_identifier.py +1 -1
  5. snowflake/ml/_internal/utils/table_manager.py +19 -1
  6. snowflake/ml/_internal/utils/uri.py +2 -2
  7. snowflake/ml/data/data_connector.py +33 -7
  8. snowflake/ml/data/torch_utils.py +68 -0
  9. snowflake/ml/dataset/dataset.py +1 -3
  10. snowflake/ml/feature_store/feature_store.py +41 -17
  11. snowflake/ml/feature_store/feature_view.py +2 -2
  12. snowflake/ml/fileset/embedded_stage_fs.py +1 -1
  13. snowflake/ml/fileset/fileset.py +1 -1
  14. snowflake/ml/fileset/sfcfs.py +9 -3
  15. snowflake/ml/model/_client/model/model_version_impl.py +22 -7
  16. snowflake/ml/model/_client/ops/model_ops.py +39 -3
  17. snowflake/ml/model/_client/ops/service_ops.py +198 -7
  18. snowflake/ml/model/_client/service/model_deployment_spec.py +4 -5
  19. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -2
  20. snowflake/ml/model/_client/sql/service.py +85 -18
  21. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -1
  22. snowflake/ml/model/_deploy_client/snowservice/deploy.py +3 -3
  23. snowflake/ml/model/_model_composer/model_composer.py +2 -0
  24. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +3 -8
  25. snowflake/ml/model/_packager/model_handlers/_utils.py +46 -14
  26. snowflake/ml/model/_packager/model_handlers/catboost.py +17 -15
  27. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +23 -15
  28. snowflake/ml/model/_packager/model_handlers/lightgbm.py +15 -57
  29. snowflake/ml/model/_packager/model_handlers/llm.py +4 -2
  30. snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +116 -0
  31. snowflake/ml/model/_packager/model_handlers/sklearn.py +36 -24
  32. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +119 -6
  33. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  34. snowflake/ml/model/_packager/model_handlers/xgboost.py +48 -48
  35. snowflake/ml/model/_packager/model_meta/model_meta.py +10 -7
  36. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +0 -8
  37. snowflake/ml/model/_packager/model_packager.py +2 -0
  38. snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
  39. snowflake/ml/model/_signatures/utils.py +9 -0
  40. snowflake/ml/model/models/llm.py +3 -1
  41. snowflake/ml/model/type_hints.py +9 -1
  42. snowflake/ml/modeling/_internal/constants.py +1 -0
  43. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
  44. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
  45. snowflake/ml/modeling/_internal/model_specifications.py +2 -0
  46. snowflake/ml/modeling/_internal/model_trainer.py +1 -0
  47. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
  48. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
  49. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +113 -160
  50. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +60 -21
  51. snowflake/ml/modeling/cluster/affinity_propagation.py +60 -21
  52. snowflake/ml/modeling/cluster/agglomerative_clustering.py +60 -21
  53. snowflake/ml/modeling/cluster/birch.py +60 -21
  54. snowflake/ml/modeling/cluster/bisecting_k_means.py +60 -21
  55. snowflake/ml/modeling/cluster/dbscan.py +60 -21
  56. snowflake/ml/modeling/cluster/feature_agglomeration.py +60 -21
  57. snowflake/ml/modeling/cluster/k_means.py +60 -21
  58. snowflake/ml/modeling/cluster/mean_shift.py +60 -21
  59. snowflake/ml/modeling/cluster/mini_batch_k_means.py +60 -21
  60. snowflake/ml/modeling/cluster/optics.py +60 -21
  61. snowflake/ml/modeling/cluster/spectral_biclustering.py +60 -21
  62. snowflake/ml/modeling/cluster/spectral_clustering.py +60 -21
  63. snowflake/ml/modeling/cluster/spectral_coclustering.py +60 -21
  64. snowflake/ml/modeling/compose/column_transformer.py +60 -21
  65. snowflake/ml/modeling/compose/transformed_target_regressor.py +60 -21
  66. snowflake/ml/modeling/covariance/elliptic_envelope.py +60 -21
  67. snowflake/ml/modeling/covariance/empirical_covariance.py +60 -21
  68. snowflake/ml/modeling/covariance/graphical_lasso.py +60 -21
  69. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +60 -21
  70. snowflake/ml/modeling/covariance/ledoit_wolf.py +60 -21
  71. snowflake/ml/modeling/covariance/min_cov_det.py +60 -21
  72. snowflake/ml/modeling/covariance/oas.py +60 -21
  73. snowflake/ml/modeling/covariance/shrunk_covariance.py +60 -21
  74. snowflake/ml/modeling/decomposition/dictionary_learning.py +60 -21
  75. snowflake/ml/modeling/decomposition/factor_analysis.py +60 -21
  76. snowflake/ml/modeling/decomposition/fast_ica.py +60 -21
  77. snowflake/ml/modeling/decomposition/incremental_pca.py +60 -21
  78. snowflake/ml/modeling/decomposition/kernel_pca.py +60 -21
  79. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +60 -21
  80. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +60 -21
  81. snowflake/ml/modeling/decomposition/pca.py +60 -21
  82. snowflake/ml/modeling/decomposition/sparse_pca.py +60 -21
  83. snowflake/ml/modeling/decomposition/truncated_svd.py +60 -21
  84. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +60 -21
  85. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +60 -21
  86. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +60 -21
  87. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +60 -21
  88. snowflake/ml/modeling/ensemble/bagging_classifier.py +60 -21
  89. snowflake/ml/modeling/ensemble/bagging_regressor.py +60 -21
  90. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +60 -21
  91. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +60 -21
  92. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +60 -21
  93. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +60 -21
  94. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +60 -21
  95. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +60 -21
  96. snowflake/ml/modeling/ensemble/isolation_forest.py +60 -21
  97. snowflake/ml/modeling/ensemble/random_forest_classifier.py +60 -21
  98. snowflake/ml/modeling/ensemble/random_forest_regressor.py +60 -21
  99. snowflake/ml/modeling/ensemble/stacking_regressor.py +60 -21
  100. snowflake/ml/modeling/ensemble/voting_classifier.py +60 -21
  101. snowflake/ml/modeling/ensemble/voting_regressor.py +60 -21
  102. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +60 -21
  103. snowflake/ml/modeling/feature_selection/select_fdr.py +60 -21
  104. snowflake/ml/modeling/feature_selection/select_fpr.py +60 -21
  105. snowflake/ml/modeling/feature_selection/select_fwe.py +60 -21
  106. snowflake/ml/modeling/feature_selection/select_k_best.py +60 -21
  107. snowflake/ml/modeling/feature_selection/select_percentile.py +60 -21
  108. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +60 -21
  109. snowflake/ml/modeling/feature_selection/variance_threshold.py +60 -21
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +60 -21
  111. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +60 -21
  112. snowflake/ml/modeling/impute/iterative_imputer.py +60 -21
  113. snowflake/ml/modeling/impute/knn_imputer.py +60 -21
  114. snowflake/ml/modeling/impute/missing_indicator.py +60 -21
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +60 -21
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +60 -21
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +60 -21
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +60 -21
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +60 -21
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +60 -21
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +60 -21
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +60 -21
  123. snowflake/ml/modeling/linear_model/ard_regression.py +60 -21
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +60 -21
  125. snowflake/ml/modeling/linear_model/elastic_net.py +60 -21
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +60 -21
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +60 -21
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +60 -21
  129. snowflake/ml/modeling/linear_model/lars.py +60 -21
  130. snowflake/ml/modeling/linear_model/lars_cv.py +60 -21
  131. snowflake/ml/modeling/linear_model/lasso.py +60 -21
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +60 -21
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +60 -21
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +60 -21
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +60 -21
  136. snowflake/ml/modeling/linear_model/linear_regression.py +60 -21
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +60 -21
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +60 -21
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +60 -21
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +60 -21
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +60 -21
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +60 -21
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +60 -21
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +60 -21
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +60 -21
  146. snowflake/ml/modeling/linear_model/perceptron.py +60 -21
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +60 -21
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +60 -21
  149. snowflake/ml/modeling/linear_model/ridge.py +60 -21
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +60 -21
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +60 -21
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +60 -21
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +60 -21
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +60 -21
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +60 -21
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +60 -21
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +60 -21
  158. snowflake/ml/modeling/manifold/isomap.py +60 -21
  159. snowflake/ml/modeling/manifold/mds.py +60 -21
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +60 -21
  161. snowflake/ml/modeling/manifold/tsne.py +60 -21
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +60 -21
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +60 -21
  164. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +60 -21
  165. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +60 -21
  166. snowflake/ml/modeling/multiclass/output_code_classifier.py +60 -21
  167. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +60 -21
  168. snowflake/ml/modeling/naive_bayes/categorical_nb.py +60 -21
  169. snowflake/ml/modeling/naive_bayes/complement_nb.py +60 -21
  170. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +60 -21
  171. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +60 -21
  172. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +60 -21
  173. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +60 -21
  174. snowflake/ml/modeling/neighbors/kernel_density.py +60 -21
  175. snowflake/ml/modeling/neighbors/local_outlier_factor.py +60 -21
  176. snowflake/ml/modeling/neighbors/nearest_centroid.py +60 -21
  177. snowflake/ml/modeling/neighbors/nearest_neighbors.py +60 -21
  178. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +60 -21
  179. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +60 -21
  180. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +60 -21
  181. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +60 -21
  182. snowflake/ml/modeling/neural_network/mlp_classifier.py +60 -21
  183. snowflake/ml/modeling/neural_network/mlp_regressor.py +60 -21
  184. snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
  185. snowflake/ml/modeling/pipeline/pipeline.py +1 -12
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +60 -21
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +60 -21
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +60 -21
  189. snowflake/ml/modeling/svm/linear_svc.py +60 -21
  190. snowflake/ml/modeling/svm/linear_svr.py +60 -21
  191. snowflake/ml/modeling/svm/nu_svc.py +60 -21
  192. snowflake/ml/modeling/svm/nu_svr.py +60 -21
  193. snowflake/ml/modeling/svm/svc.py +60 -21
  194. snowflake/ml/modeling/svm/svr.py +60 -21
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +60 -21
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +60 -21
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +60 -21
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +60 -21
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +63 -23
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +63 -23
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +63 -23
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +63 -23
  203. snowflake/ml/registry/_manager/model_manager.py +4 -0
  204. snowflake/ml/registry/model_registry.py +1 -1
  205. snowflake/ml/registry/registry.py +1 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/METADATA +23 -4
  208. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/RECORD +211 -209
  209. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/WHEEL +1 -1
  210. snowflake/ml/data/torch_dataset.py +0 -33
  211. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/LICENSE.txt +0 -0
  212. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/top_level.txt +0 -0
@@ -4,18 +4,17 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
18
16
  import numpy
17
+ import sklearn
19
18
  import xgboost
20
19
  from sklearn.utils.metaestimators import available_if
21
20
 
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
23
22
  from snowflake.ml._internal import telemetry
24
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
25
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
27
26
  from snowflake.snowpark import DataFrame, Session
28
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
30
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
31
- ModelTransformHandlers,
32
30
  BatchInferenceKwargsTypedDict,
33
31
  ScoreKwargsTypedDict
34
32
  )
@@ -361,7 +359,7 @@ class XGBRegressor(BaseTransformer):
361
359
  self.set_sample_weight_col(sample_weight_col)
362
360
  self._use_external_memory_version = use_external_memory_version
363
361
  self._batch_size = batch_size
364
- deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
362
+ deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
365
363
 
366
364
  self._deps = list(deps)
367
365
 
@@ -694,12 +692,23 @@ class XGBRegressor(BaseTransformer):
694
692
  autogenerated=self._autogenerated,
695
693
  subproject=_SUBPROJECT,
696
694
  )
697
- output_result, fitted_estimator = model_trainer.train_fit_predict(
698
- drop_input_cols=self._drop_input_cols,
699
- expected_output_cols_list=(
700
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
701
- ),
695
+ expected_output_cols = (
696
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
702
697
  )
698
+ if isinstance(dataset, DataFrame):
699
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
700
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
701
+ )
702
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
703
+ drop_input_cols=self._drop_input_cols,
704
+ expected_output_cols_list=expected_output_cols,
705
+ example_output_pd_df=example_output_pd_df,
706
+ )
707
+ else:
708
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
709
+ drop_input_cols=self._drop_input_cols,
710
+ expected_output_cols_list=expected_output_cols,
711
+ )
703
712
  self._sklearn_object = fitted_estimator
704
713
  self._is_fitted = True
705
714
  return output_result
@@ -778,12 +787,41 @@ class XGBRegressor(BaseTransformer):
778
787
 
779
788
  return rv
780
789
 
781
- def _align_expected_output_names(
782
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
783
- ) -> List[str]:
790
+ def _align_expected_output(
791
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
792
+ ) -> Tuple[List[str], pd.DataFrame]:
793
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
794
+ and output dataframe with 1 line.
795
+ If the method is fit_predict, run 2 lines of data.
796
+ """
784
797
  # in case the inferred output column names dimension is different
785
798
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
786
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
799
+
800
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
801
+ # so change the minimum of number of rows to 2
802
+ num_examples = 2
803
+ statement_params = telemetry.get_function_usage_statement_params(
804
+ project=_PROJECT,
805
+ subproject=_SUBPROJECT,
806
+ function_name=telemetry.get_statement_params_full_func_name(
807
+ inspect.currentframe(), XGBRegressor.__class__.__name__
808
+ ),
809
+ api_calls=[Session.call],
810
+ custom_tags={"autogen": True} if self._autogenerated else None,
811
+ )
812
+ if output_cols_prefix == "fit_predict_":
813
+ if hasattr(self._sklearn_object, "n_clusters"):
814
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
815
+ num_examples = self._sklearn_object.n_clusters
816
+ elif hasattr(self._sklearn_object, "min_samples"):
817
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
818
+ num_examples = self._sklearn_object.min_samples
819
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
820
+ # LocalOutlierFactor expects n_neighbors <= n_samples
821
+ num_examples = self._sklearn_object.n_neighbors
822
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
823
+ else:
824
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
787
825
 
788
826
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
789
827
  # seen during the fit.
@@ -795,12 +833,14 @@ class XGBRegressor(BaseTransformer):
795
833
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
796
834
  if self.sample_weight_col:
797
835
  output_df_columns_set -= set(self.sample_weight_col)
836
+
798
837
  # if the dimension of inferred output column names is correct; use it
799
838
  if len(expected_output_cols_list) == len(output_df_columns_set):
800
- return expected_output_cols_list
839
+ return expected_output_cols_list, output_df_pd
801
840
  # otherwise, use the sklearn estimator's output
802
841
  else:
803
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
842
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
843
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
804
844
 
805
845
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
806
846
  @telemetry.send_api_usage_telemetry(
@@ -846,7 +886,7 @@ class XGBRegressor(BaseTransformer):
846
886
  drop_input_cols=self._drop_input_cols,
847
887
  expected_output_cols_type="float",
848
888
  )
849
- expected_output_cols = self._align_expected_output_names(
889
+ expected_output_cols, _ = self._align_expected_output(
850
890
  inference_method, dataset, expected_output_cols, output_cols_prefix
851
891
  )
852
892
 
@@ -912,7 +952,7 @@ class XGBRegressor(BaseTransformer):
912
952
  drop_input_cols=self._drop_input_cols,
913
953
  expected_output_cols_type="float",
914
954
  )
915
- expected_output_cols = self._align_expected_output_names(
955
+ expected_output_cols, _ = self._align_expected_output(
916
956
  inference_method, dataset, expected_output_cols, output_cols_prefix
917
957
  )
918
958
  elif isinstance(dataset, pd.DataFrame):
@@ -975,7 +1015,7 @@ class XGBRegressor(BaseTransformer):
975
1015
  drop_input_cols=self._drop_input_cols,
976
1016
  expected_output_cols_type="float",
977
1017
  )
978
- expected_output_cols = self._align_expected_output_names(
1018
+ expected_output_cols, _ = self._align_expected_output(
979
1019
  inference_method, dataset, expected_output_cols, output_cols_prefix
980
1020
  )
981
1021
 
@@ -1040,7 +1080,7 @@ class XGBRegressor(BaseTransformer):
1040
1080
  drop_input_cols = self._drop_input_cols,
1041
1081
  expected_output_cols_type="float",
1042
1082
  )
1043
- expected_output_cols = self._align_expected_output_names(
1083
+ expected_output_cols, _ = self._align_expected_output(
1044
1084
  inference_method, dataset, expected_output_cols, output_cols_prefix
1045
1085
  )
1046
1086
 
@@ -1105,7 +1145,7 @@ class XGBRegressor(BaseTransformer):
1105
1145
  transform_kwargs = dict(
1106
1146
  session=dataset._session,
1107
1147
  dependencies=self._deps,
1108
- score_sproc_imports=['xgboost'],
1148
+ score_sproc_imports=['xgboost', 'sklearn'],
1109
1149
  )
1110
1150
  elif isinstance(dataset, pd.DataFrame):
1111
1151
  # pandas_handler.score() does not require any extra kwargs.
@@ -4,18 +4,17 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
18
16
  import numpy
17
+ import sklearn
19
18
  import xgboost
20
19
  from sklearn.utils.metaestimators import available_if
21
20
 
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
23
22
  from snowflake.ml._internal import telemetry
24
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
25
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
27
26
  from snowflake.snowpark import DataFrame, Session
28
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
30
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
31
- ModelTransformHandlers,
32
30
  BatchInferenceKwargsTypedDict,
33
31
  ScoreKwargsTypedDict
34
32
  )
@@ -363,7 +361,7 @@ class XGBRFClassifier(BaseTransformer):
363
361
  self.set_sample_weight_col(sample_weight_col)
364
362
  self._use_external_memory_version = use_external_memory_version
365
363
  self._batch_size = batch_size
366
- deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
364
+ deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
367
365
 
368
366
  self._deps = list(deps)
369
367
 
@@ -699,12 +697,23 @@ class XGBRFClassifier(BaseTransformer):
699
697
  autogenerated=self._autogenerated,
700
698
  subproject=_SUBPROJECT,
701
699
  )
702
- output_result, fitted_estimator = model_trainer.train_fit_predict(
703
- drop_input_cols=self._drop_input_cols,
704
- expected_output_cols_list=(
705
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
706
- ),
700
+ expected_output_cols = (
701
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
707
702
  )
703
+ if isinstance(dataset, DataFrame):
704
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
705
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
706
+ )
707
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
708
+ drop_input_cols=self._drop_input_cols,
709
+ expected_output_cols_list=expected_output_cols,
710
+ example_output_pd_df=example_output_pd_df,
711
+ )
712
+ else:
713
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
714
+ drop_input_cols=self._drop_input_cols,
715
+ expected_output_cols_list=expected_output_cols,
716
+ )
708
717
  self._sklearn_object = fitted_estimator
709
718
  self._is_fitted = True
710
719
  return output_result
@@ -783,12 +792,41 @@ class XGBRFClassifier(BaseTransformer):
783
792
 
784
793
  return rv
785
794
 
786
- def _align_expected_output_names(
787
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
788
- ) -> List[str]:
795
+ def _align_expected_output(
796
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
797
+ ) -> Tuple[List[str], pd.DataFrame]:
798
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
799
+ and output dataframe with 1 line.
800
+ If the method is fit_predict, run 2 lines of data.
801
+ """
789
802
  # in case the inferred output column names dimension is different
790
803
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
804
+
805
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
806
+ # so change the minimum of number of rows to 2
807
+ num_examples = 2
808
+ statement_params = telemetry.get_function_usage_statement_params(
809
+ project=_PROJECT,
810
+ subproject=_SUBPROJECT,
811
+ function_name=telemetry.get_statement_params_full_func_name(
812
+ inspect.currentframe(), XGBRFClassifier.__class__.__name__
813
+ ),
814
+ api_calls=[Session.call],
815
+ custom_tags={"autogen": True} if self._autogenerated else None,
816
+ )
817
+ if output_cols_prefix == "fit_predict_":
818
+ if hasattr(self._sklearn_object, "n_clusters"):
819
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
820
+ num_examples = self._sklearn_object.n_clusters
821
+ elif hasattr(self._sklearn_object, "min_samples"):
822
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
823
+ num_examples = self._sklearn_object.min_samples
824
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
825
+ # LocalOutlierFactor expects n_neighbors <= n_samples
826
+ num_examples = self._sklearn_object.n_neighbors
827
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
828
+ else:
829
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
792
830
 
793
831
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
832
  # seen during the fit.
@@ -800,12 +838,14 @@ class XGBRFClassifier(BaseTransformer):
800
838
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
801
839
  if self.sample_weight_col:
802
840
  output_df_columns_set -= set(self.sample_weight_col)
841
+
803
842
  # if the dimension of inferred output column names is correct; use it
804
843
  if len(expected_output_cols_list) == len(output_df_columns_set):
805
- return expected_output_cols_list
844
+ return expected_output_cols_list, output_df_pd
806
845
  # otherwise, use the sklearn estimator's output
807
846
  else:
808
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
847
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
848
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
809
849
 
810
850
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
811
851
  @telemetry.send_api_usage_telemetry(
@@ -853,7 +893,7 @@ class XGBRFClassifier(BaseTransformer):
853
893
  drop_input_cols=self._drop_input_cols,
854
894
  expected_output_cols_type="float",
855
895
  )
856
- expected_output_cols = self._align_expected_output_names(
896
+ expected_output_cols, _ = self._align_expected_output(
857
897
  inference_method, dataset, expected_output_cols, output_cols_prefix
858
898
  )
859
899
 
@@ -921,7 +961,7 @@ class XGBRFClassifier(BaseTransformer):
921
961
  drop_input_cols=self._drop_input_cols,
922
962
  expected_output_cols_type="float",
923
963
  )
924
- expected_output_cols = self._align_expected_output_names(
964
+ expected_output_cols, _ = self._align_expected_output(
925
965
  inference_method, dataset, expected_output_cols, output_cols_prefix
926
966
  )
927
967
  elif isinstance(dataset, pd.DataFrame):
@@ -984,7 +1024,7 @@ class XGBRFClassifier(BaseTransformer):
984
1024
  drop_input_cols=self._drop_input_cols,
985
1025
  expected_output_cols_type="float",
986
1026
  )
987
- expected_output_cols = self._align_expected_output_names(
1027
+ expected_output_cols, _ = self._align_expected_output(
988
1028
  inference_method, dataset, expected_output_cols, output_cols_prefix
989
1029
  )
990
1030
 
@@ -1049,7 +1089,7 @@ class XGBRFClassifier(BaseTransformer):
1049
1089
  drop_input_cols = self._drop_input_cols,
1050
1090
  expected_output_cols_type="float",
1051
1091
  )
1052
- expected_output_cols = self._align_expected_output_names(
1092
+ expected_output_cols, _ = self._align_expected_output(
1053
1093
  inference_method, dataset, expected_output_cols, output_cols_prefix
1054
1094
  )
1055
1095
 
@@ -1114,7 +1154,7 @@ class XGBRFClassifier(BaseTransformer):
1114
1154
  transform_kwargs = dict(
1115
1155
  session=dataset._session,
1116
1156
  dependencies=self._deps,
1117
- score_sproc_imports=['xgboost'],
1157
+ score_sproc_imports=['xgboost', 'sklearn'],
1118
1158
  )
1119
1159
  elif isinstance(dataset, pd.DataFrame):
1120
1160
  # pandas_handler.score() does not require any extra kwargs.
@@ -4,18 +4,17 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
18
16
  import numpy
17
+ import sklearn
19
18
  import xgboost
20
19
  from sklearn.utils.metaestimators import available_if
21
20
 
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
23
22
  from snowflake.ml._internal import telemetry
24
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
25
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
27
26
  from snowflake.snowpark import DataFrame, Session
28
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
30
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
31
- ModelTransformHandlers,
32
30
  BatchInferenceKwargsTypedDict,
33
31
  ScoreKwargsTypedDict
34
32
  )
@@ -363,7 +361,7 @@ class XGBRFRegressor(BaseTransformer):
363
361
  self.set_sample_weight_col(sample_weight_col)
364
362
  self._use_external_memory_version = use_external_memory_version
365
363
  self._batch_size = batch_size
366
- deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
364
+ deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
367
365
 
368
366
  self._deps = list(deps)
369
367
 
@@ -699,12 +697,23 @@ class XGBRFRegressor(BaseTransformer):
699
697
  autogenerated=self._autogenerated,
700
698
  subproject=_SUBPROJECT,
701
699
  )
702
- output_result, fitted_estimator = model_trainer.train_fit_predict(
703
- drop_input_cols=self._drop_input_cols,
704
- expected_output_cols_list=(
705
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
706
- ),
700
+ expected_output_cols = (
701
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
707
702
  )
703
+ if isinstance(dataset, DataFrame):
704
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
705
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
706
+ )
707
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
708
+ drop_input_cols=self._drop_input_cols,
709
+ expected_output_cols_list=expected_output_cols,
710
+ example_output_pd_df=example_output_pd_df,
711
+ )
712
+ else:
713
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
714
+ drop_input_cols=self._drop_input_cols,
715
+ expected_output_cols_list=expected_output_cols,
716
+ )
708
717
  self._sklearn_object = fitted_estimator
709
718
  self._is_fitted = True
710
719
  return output_result
@@ -783,12 +792,41 @@ class XGBRFRegressor(BaseTransformer):
783
792
 
784
793
  return rv
785
794
 
786
- def _align_expected_output_names(
787
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
788
- ) -> List[str]:
795
+ def _align_expected_output(
796
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
797
+ ) -> Tuple[List[str], pd.DataFrame]:
798
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
799
+ and output dataframe with 1 line.
800
+ If the method is fit_predict, run 2 lines of data.
801
+ """
789
802
  # in case the inferred output column names dimension is different
790
803
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
804
+
805
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
806
+ # so change the minimum of number of rows to 2
807
+ num_examples = 2
808
+ statement_params = telemetry.get_function_usage_statement_params(
809
+ project=_PROJECT,
810
+ subproject=_SUBPROJECT,
811
+ function_name=telemetry.get_statement_params_full_func_name(
812
+ inspect.currentframe(), XGBRFRegressor.__class__.__name__
813
+ ),
814
+ api_calls=[Session.call],
815
+ custom_tags={"autogen": True} if self._autogenerated else None,
816
+ )
817
+ if output_cols_prefix == "fit_predict_":
818
+ if hasattr(self._sklearn_object, "n_clusters"):
819
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
820
+ num_examples = self._sklearn_object.n_clusters
821
+ elif hasattr(self._sklearn_object, "min_samples"):
822
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
823
+ num_examples = self._sklearn_object.min_samples
824
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
825
+ # LocalOutlierFactor expects n_neighbors <= n_samples
826
+ num_examples = self._sklearn_object.n_neighbors
827
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
828
+ else:
829
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
792
830
 
793
831
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
832
  # seen during the fit.
@@ -800,12 +838,14 @@ class XGBRFRegressor(BaseTransformer):
800
838
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
801
839
  if self.sample_weight_col:
802
840
  output_df_columns_set -= set(self.sample_weight_col)
841
+
803
842
  # if the dimension of inferred output column names is correct; use it
804
843
  if len(expected_output_cols_list) == len(output_df_columns_set):
805
- return expected_output_cols_list
844
+ return expected_output_cols_list, output_df_pd
806
845
  # otherwise, use the sklearn estimator's output
807
846
  else:
808
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
847
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
848
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
809
849
 
810
850
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
811
851
  @telemetry.send_api_usage_telemetry(
@@ -851,7 +891,7 @@ class XGBRFRegressor(BaseTransformer):
851
891
  drop_input_cols=self._drop_input_cols,
852
892
  expected_output_cols_type="float",
853
893
  )
854
- expected_output_cols = self._align_expected_output_names(
894
+ expected_output_cols, _ = self._align_expected_output(
855
895
  inference_method, dataset, expected_output_cols, output_cols_prefix
856
896
  )
857
897
 
@@ -917,7 +957,7 @@ class XGBRFRegressor(BaseTransformer):
917
957
  drop_input_cols=self._drop_input_cols,
918
958
  expected_output_cols_type="float",
919
959
  )
920
- expected_output_cols = self._align_expected_output_names(
960
+ expected_output_cols, _ = self._align_expected_output(
921
961
  inference_method, dataset, expected_output_cols, output_cols_prefix
922
962
  )
923
963
  elif isinstance(dataset, pd.DataFrame):
@@ -980,7 +1020,7 @@ class XGBRFRegressor(BaseTransformer):
980
1020
  drop_input_cols=self._drop_input_cols,
981
1021
  expected_output_cols_type="float",
982
1022
  )
983
- expected_output_cols = self._align_expected_output_names(
1023
+ expected_output_cols, _ = self._align_expected_output(
984
1024
  inference_method, dataset, expected_output_cols, output_cols_prefix
985
1025
  )
986
1026
 
@@ -1045,7 +1085,7 @@ class XGBRFRegressor(BaseTransformer):
1045
1085
  drop_input_cols = self._drop_input_cols,
1046
1086
  expected_output_cols_type="float",
1047
1087
  )
1048
- expected_output_cols = self._align_expected_output_names(
1088
+ expected_output_cols, _ = self._align_expected_output(
1049
1089
  inference_method, dataset, expected_output_cols, output_cols_prefix
1050
1090
  )
1051
1091
 
@@ -1110,7 +1150,7 @@ class XGBRFRegressor(BaseTransformer):
1110
1150
  transform_kwargs = dict(
1111
1151
  session=dataset._session,
1112
1152
  dependencies=self._deps,
1113
- score_sproc_imports=['xgboost'],
1153
+ score_sproc_imports=['xgboost', 'sklearn'],
1114
1154
  )
1115
1155
  elif isinstance(dataset, pd.DataFrame):
1116
1156
  # pandas_handler.score() does not require any extra kwargs.
@@ -50,6 +50,7 @@ class ModelManager:
50
50
  sample_input_data: Optional[model_types.SupportedDataType] = None,
51
51
  code_paths: Optional[List[str]] = None,
52
52
  ext_modules: Optional[List[ModuleType]] = None,
53
+ model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
53
54
  options: Optional[model_types.ModelSaveOption] = None,
54
55
  statement_params: Optional[Dict[str, Any]] = None,
55
56
  ) -> model_version_impl.ModelVersion:
@@ -89,6 +90,7 @@ class ModelManager:
89
90
  sample_input_data=sample_input_data,
90
91
  code_paths=code_paths,
91
92
  ext_modules=ext_modules,
93
+ model_objective=model_objective,
92
94
  options=options,
93
95
  statement_params=statement_params,
94
96
  )
@@ -108,6 +110,7 @@ class ModelManager:
108
110
  sample_input_data: Optional[model_types.SupportedDataType] = None,
109
111
  code_paths: Optional[List[str]] = None,
110
112
  ext_modules: Optional[List[ModuleType]] = None,
113
+ model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
111
114
  options: Optional[model_types.ModelSaveOption] = None,
112
115
  statement_params: Optional[Dict[str, Any]] = None,
113
116
  ) -> model_version_impl.ModelVersion:
@@ -156,6 +159,7 @@ class ModelManager:
156
159
  code_paths=code_paths,
157
160
  ext_modules=ext_modules,
158
161
  options=options,
162
+ model_objective=model_objective,
159
163
  )
160
164
  statement_params = telemetry.add_statement_params_custom_tags(
161
165
  statement_params, model_metadata.telemetry_metadata()
@@ -576,7 +576,7 @@ fully integrated into the new registry.
576
576
  raw_stage_path = uri.get_snowflake_stage_path_from_uri(model_uri)
577
577
  if not raw_stage_path:
578
578
  return None
579
- (db, schema, stage, _) = identifier.parse_schema_level_object_identifier(raw_stage_path)
579
+ (db, schema, stage, _) = identifier.parse_snowflake_stage_path(raw_stage_path)
580
580
  return identifier.get_schema_level_object_identifier(db, schema, stage)
581
581
 
582
582
  def _list_selected_models(
@@ -244,8 +244,7 @@ class Registry:
244
244
  warnings.warn(
245
245
  "Models logged specifying `pip_requirements` can not be executed "
246
246
  "in Snowflake Warehouse where all dependencies are required to be retrieved "
247
- "from Snowflake Anaconda Channel. Specify model save option `include_pip_dependencies`"
248
- "to log model with pip dependencies.",
247
+ "from Snowflake Anaconda Channel.",
249
248
  category=UserWarning,
250
249
  stacklevel=1,
251
250
  )
snowflake/ml/version.py CHANGED
@@ -1 +1 @@
1
- VERSION="1.6.1"
1
+ VERSION="1.6.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: snowflake-ml-python
3
- Version: 1.6.1
3
+ Version: 1.6.2
4
4
  Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
5
5
  Author-email: "Snowflake, Inc" <support@snowflake.com>
6
6
  License:
@@ -253,7 +253,7 @@ Requires-Dist: snowflake-connector-python[pandas] <4,>=3.5.0
253
253
  Requires-Dist: snowflake-snowpark-python <2,>=1.17.0
254
254
  Requires-Dist: sqlparse <1,>=0.4
255
255
  Requires-Dist: typing-extensions <5,>=4.1.0
256
- Requires-Dist: xgboost <2,>=1.7.3
256
+ Requires-Dist: xgboost <2.1,>=1.7.3
257
257
  Provides-Extra: all
258
258
  Requires-Dist: catboost <2,>=1.2.0 ; extra == 'all'
259
259
  Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'all'
@@ -373,7 +373,27 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
373
373
 
374
374
  # Release History
375
375
 
376
- ## 1.6.1 (TBD)
376
+ ## 1.6.2 (TBD)
377
+
378
+ ### Bug Fixes
379
+
380
+ - Modeling: Support XGBoost version that is larger than 2.
381
+
382
+ - Data: Fix multiple epoch iteration over `DataConnector.to_torch_datapipe()` DataPipes.
383
+ - Generic: Fix a bug that when an invalid name is provided to argument where fully qualified name is expected, it will
384
+ be parsed wrongly. Now it raises an exception correctly.
385
+ - Model Explainability: Handle explanations for multiclass XGBoost classification models
386
+ - Model Explainability: Workarounds and better error handling for XGB>2.1.0 not working with SHAP==0.42.1
387
+
388
+ ### New Features
389
+
390
+ - Data: Add top-level exports for `DataConnector` and `DataSource` to `snowflake.ml.data`.
391
+ - Data: Add native batching support via `batch_size` and `drop_last_batch` arguments to `DataConnector.to_torch_dataset()`
392
+ - Feature Store: update_feature_view() supports taking feature view object as argument.
393
+
394
+ ### Behavior Changes
395
+
396
+ ## 1.6.1 (2024-08-12)
377
397
 
378
398
  ### Bug Fixes
379
399
 
@@ -390,7 +410,6 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
390
410
  ### New Features
391
411
 
392
412
  - Enable `set_params` to set the parameters of the underlying sklearn estimator, if the snowflake-ml model has been fit.
393
- - Data: Add top-level exports for `DataConnector` and `DataSource` to `snowflake.ml.data`.
394
413
  - Data: Add `snowflake.ml.data.ingestor_utils` module with utility functions helpful for `DataIngestor` implementations.
395
414
  - Data: Add new `to_torch_dataset()` connector to `DataConnector` to replace deprecated DataPipe.
396
415
  - Registry: Option to `enable_explainability` set to True by default for XGBoost, LightGBM and CatBoost as PuPr feature.