snowflake-ml-python 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. snowflake/cortex/_complete.py +7 -33
  2. snowflake/ml/_internal/env_utils.py +11 -5
  3. snowflake/ml/_internal/exceptions/modeling_error_messages.py +4 -1
  4. snowflake/ml/_internal/telemetry.py +156 -20
  5. snowflake/ml/_internal/utils/identifier.py +48 -11
  6. snowflake/ml/_internal/utils/pkg_version_utils.py +8 -22
  7. snowflake/ml/_internal/utils/snowflake_env.py +23 -13
  8. snowflake/ml/_internal/utils/sql_identifier.py +1 -1
  9. snowflake/ml/_internal/utils/table_manager.py +19 -1
  10. snowflake/ml/_internal/utils/uri.py +2 -2
  11. snowflake/ml/data/_internal/arrow_ingestor.py +66 -10
  12. snowflake/ml/data/data_connector.py +88 -9
  13. snowflake/ml/data/data_ingestor.py +18 -1
  14. snowflake/ml/data/{_internal/ingestor_utils.py → ingestor_utils.py} +5 -1
  15. snowflake/ml/data/torch_utils.py +68 -0
  16. snowflake/ml/dataset/dataset.py +1 -3
  17. snowflake/ml/dataset/dataset_metadata.py +3 -1
  18. snowflake/ml/dataset/dataset_reader.py +9 -3
  19. snowflake/ml/feature_store/examples/airline_features/entities.py +16 -0
  20. snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +31 -0
  21. snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +42 -0
  22. snowflake/ml/feature_store/examples/airline_features/source.yaml +7 -0
  23. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +10 -4
  24. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +6 -0
  25. snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +3 -0
  26. snowflake/ml/feature_store/examples/example_helper.py +69 -31
  27. snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +3 -3
  28. snowflake/ml/feature_store/examples/new_york_taxi_features/features/{dropoff_features.py → location_features.py} +14 -9
  29. snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +36 -0
  30. snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -1
  31. snowflake/ml/feature_store/examples/source_data/airline.yaml +4 -0
  32. snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +1 -1
  33. snowflake/ml/feature_store/examples/wine_quality_features/entities.py +3 -3
  34. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +13 -6
  35. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +8 -5
  36. snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +3 -0
  37. snowflake/ml/feature_store/feature_store.py +100 -41
  38. snowflake/ml/feature_store/feature_view.py +149 -5
  39. snowflake/ml/fileset/embedded_stage_fs.py +1 -1
  40. snowflake/ml/fileset/fileset.py +1 -1
  41. snowflake/ml/fileset/sfcfs.py +9 -3
  42. snowflake/ml/model/_client/model/model_impl.py +11 -2
  43. snowflake/ml/model/_client/model/model_version_impl.py +186 -20
  44. snowflake/ml/model/_client/ops/model_ops.py +144 -30
  45. snowflake/ml/model/_client/ops/service_ops.py +312 -0
  46. snowflake/ml/model/_client/service/model_deployment_spec.py +94 -0
  47. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +30 -0
  48. snowflake/ml/model/_client/sql/model_version.py +13 -4
  49. snowflake/ml/model/_client/sql/service.py +196 -0
  50. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -1
  51. snowflake/ml/model/_deploy_client/snowservice/deploy.py +3 -3
  52. snowflake/ml/model/_model_composer/model_composer.py +5 -0
  53. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +13 -10
  54. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +3 -0
  55. snowflake/ml/model/_packager/model_env/model_env.py +7 -2
  56. snowflake/ml/model/_packager/model_handlers/_base.py +29 -12
  57. snowflake/ml/model/_packager/model_handlers/_utils.py +46 -14
  58. snowflake/ml/model/_packager/model_handlers/catboost.py +25 -16
  59. snowflake/ml/model/_packager/model_handlers/custom.py +6 -2
  60. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +32 -20
  61. snowflake/ml/model/_packager/model_handlers/lightgbm.py +23 -56
  62. snowflake/ml/model/_packager/model_handlers/llm.py +11 -5
  63. snowflake/ml/model/_packager/model_handlers/mlflow.py +8 -3
  64. snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +116 -0
  65. snowflake/ml/model/_packager/model_handlers/pytorch.py +8 -3
  66. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -3
  67. snowflake/ml/model/_packager/model_handlers/sklearn.py +99 -4
  68. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +123 -5
  69. snowflake/ml/model/_packager/model_handlers/tensorflow.py +9 -4
  70. snowflake/ml/model/_packager/model_handlers/torchscript.py +10 -5
  71. snowflake/ml/model/_packager/model_handlers/xgboost.py +56 -47
  72. snowflake/ml/model/_packager/model_meta/model_meta.py +35 -2
  73. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +11 -0
  74. snowflake/ml/model/_packager/model_packager.py +4 -1
  75. snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -2
  76. snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
  77. snowflake/ml/model/_signatures/utils.py +9 -0
  78. snowflake/ml/model/models/llm.py +3 -1
  79. snowflake/ml/model/type_hints.py +10 -4
  80. snowflake/ml/modeling/_internal/constants.py +1 -0
  81. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
  82. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
  83. snowflake/ml/modeling/_internal/model_specifications.py +2 -0
  84. snowflake/ml/modeling/_internal/model_trainer.py +1 -0
  85. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
  86. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
  87. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +113 -160
  88. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +60 -21
  89. snowflake/ml/modeling/cluster/affinity_propagation.py +60 -21
  90. snowflake/ml/modeling/cluster/agglomerative_clustering.py +60 -21
  91. snowflake/ml/modeling/cluster/birch.py +60 -21
  92. snowflake/ml/modeling/cluster/bisecting_k_means.py +60 -21
  93. snowflake/ml/modeling/cluster/dbscan.py +60 -21
  94. snowflake/ml/modeling/cluster/feature_agglomeration.py +60 -21
  95. snowflake/ml/modeling/cluster/k_means.py +60 -21
  96. snowflake/ml/modeling/cluster/mean_shift.py +60 -21
  97. snowflake/ml/modeling/cluster/mini_batch_k_means.py +60 -21
  98. snowflake/ml/modeling/cluster/optics.py +60 -21
  99. snowflake/ml/modeling/cluster/spectral_biclustering.py +60 -21
  100. snowflake/ml/modeling/cluster/spectral_clustering.py +60 -21
  101. snowflake/ml/modeling/cluster/spectral_coclustering.py +60 -21
  102. snowflake/ml/modeling/compose/column_transformer.py +60 -21
  103. snowflake/ml/modeling/compose/transformed_target_regressor.py +60 -21
  104. snowflake/ml/modeling/covariance/elliptic_envelope.py +60 -21
  105. snowflake/ml/modeling/covariance/empirical_covariance.py +60 -21
  106. snowflake/ml/modeling/covariance/graphical_lasso.py +60 -21
  107. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +60 -21
  108. snowflake/ml/modeling/covariance/ledoit_wolf.py +60 -21
  109. snowflake/ml/modeling/covariance/min_cov_det.py +60 -21
  110. snowflake/ml/modeling/covariance/oas.py +60 -21
  111. snowflake/ml/modeling/covariance/shrunk_covariance.py +60 -21
  112. snowflake/ml/modeling/decomposition/dictionary_learning.py +60 -21
  113. snowflake/ml/modeling/decomposition/factor_analysis.py +60 -21
  114. snowflake/ml/modeling/decomposition/fast_ica.py +60 -21
  115. snowflake/ml/modeling/decomposition/incremental_pca.py +60 -21
  116. snowflake/ml/modeling/decomposition/kernel_pca.py +60 -21
  117. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +60 -21
  118. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +60 -21
  119. snowflake/ml/modeling/decomposition/pca.py +60 -21
  120. snowflake/ml/modeling/decomposition/sparse_pca.py +60 -21
  121. snowflake/ml/modeling/decomposition/truncated_svd.py +60 -21
  122. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +60 -21
  123. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +60 -21
  124. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +60 -21
  125. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +60 -21
  126. snowflake/ml/modeling/ensemble/bagging_classifier.py +60 -21
  127. snowflake/ml/modeling/ensemble/bagging_regressor.py +60 -21
  128. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +60 -21
  129. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +60 -21
  130. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +60 -21
  131. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +60 -21
  132. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +60 -21
  133. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +60 -21
  134. snowflake/ml/modeling/ensemble/isolation_forest.py +60 -21
  135. snowflake/ml/modeling/ensemble/random_forest_classifier.py +60 -21
  136. snowflake/ml/modeling/ensemble/random_forest_regressor.py +60 -21
  137. snowflake/ml/modeling/ensemble/stacking_regressor.py +60 -21
  138. snowflake/ml/modeling/ensemble/voting_classifier.py +60 -21
  139. snowflake/ml/modeling/ensemble/voting_regressor.py +60 -21
  140. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +60 -21
  141. snowflake/ml/modeling/feature_selection/select_fdr.py +60 -21
  142. snowflake/ml/modeling/feature_selection/select_fpr.py +60 -21
  143. snowflake/ml/modeling/feature_selection/select_fwe.py +60 -21
  144. snowflake/ml/modeling/feature_selection/select_k_best.py +60 -21
  145. snowflake/ml/modeling/feature_selection/select_percentile.py +60 -21
  146. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +60 -21
  147. snowflake/ml/modeling/feature_selection/variance_threshold.py +60 -21
  148. snowflake/ml/modeling/framework/base.py +28 -19
  149. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +60 -21
  150. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +60 -21
  151. snowflake/ml/modeling/impute/iterative_imputer.py +60 -21
  152. snowflake/ml/modeling/impute/knn_imputer.py +60 -21
  153. snowflake/ml/modeling/impute/missing_indicator.py +60 -21
  154. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +60 -21
  155. snowflake/ml/modeling/kernel_approximation/nystroem.py +60 -21
  156. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +60 -21
  157. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +60 -21
  158. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +60 -21
  159. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +60 -21
  160. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +60 -21
  161. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +60 -21
  162. snowflake/ml/modeling/linear_model/ard_regression.py +60 -21
  163. snowflake/ml/modeling/linear_model/bayesian_ridge.py +60 -21
  164. snowflake/ml/modeling/linear_model/elastic_net.py +60 -21
  165. snowflake/ml/modeling/linear_model/elastic_net_cv.py +60 -21
  166. snowflake/ml/modeling/linear_model/gamma_regressor.py +60 -21
  167. snowflake/ml/modeling/linear_model/huber_regressor.py +60 -21
  168. snowflake/ml/modeling/linear_model/lars.py +60 -21
  169. snowflake/ml/modeling/linear_model/lars_cv.py +60 -21
  170. snowflake/ml/modeling/linear_model/lasso.py +60 -21
  171. snowflake/ml/modeling/linear_model/lasso_cv.py +60 -21
  172. snowflake/ml/modeling/linear_model/lasso_lars.py +60 -21
  173. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +60 -21
  174. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +60 -21
  175. snowflake/ml/modeling/linear_model/linear_regression.py +60 -21
  176. snowflake/ml/modeling/linear_model/logistic_regression.py +60 -21
  177. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +60 -21
  178. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +60 -21
  179. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +60 -21
  180. snowflake/ml/modeling/linear_model/multi_task_lasso.py +60 -21
  181. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +60 -21
  182. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +60 -21
  183. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +60 -21
  184. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +60 -21
  185. snowflake/ml/modeling/linear_model/perceptron.py +60 -21
  186. snowflake/ml/modeling/linear_model/poisson_regressor.py +60 -21
  187. snowflake/ml/modeling/linear_model/ransac_regressor.py +60 -21
  188. snowflake/ml/modeling/linear_model/ridge.py +60 -21
  189. snowflake/ml/modeling/linear_model/ridge_classifier.py +60 -21
  190. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +60 -21
  191. snowflake/ml/modeling/linear_model/ridge_cv.py +60 -21
  192. snowflake/ml/modeling/linear_model/sgd_classifier.py +60 -21
  193. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +60 -21
  194. snowflake/ml/modeling/linear_model/sgd_regressor.py +60 -21
  195. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +60 -21
  196. snowflake/ml/modeling/linear_model/tweedie_regressor.py +60 -21
  197. snowflake/ml/modeling/manifold/isomap.py +60 -21
  198. snowflake/ml/modeling/manifold/mds.py +60 -21
  199. snowflake/ml/modeling/manifold/spectral_embedding.py +60 -21
  200. snowflake/ml/modeling/manifold/tsne.py +60 -21
  201. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +60 -21
  202. snowflake/ml/modeling/mixture/gaussian_mixture.py +60 -21
  203. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +60 -21
  204. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +60 -21
  205. snowflake/ml/modeling/multiclass/output_code_classifier.py +60 -21
  206. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +60 -21
  207. snowflake/ml/modeling/naive_bayes/categorical_nb.py +60 -21
  208. snowflake/ml/modeling/naive_bayes/complement_nb.py +60 -21
  209. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +60 -21
  210. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +60 -21
  211. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +60 -21
  212. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +60 -21
  213. snowflake/ml/modeling/neighbors/kernel_density.py +60 -21
  214. snowflake/ml/modeling/neighbors/local_outlier_factor.py +60 -21
  215. snowflake/ml/modeling/neighbors/nearest_centroid.py +60 -21
  216. snowflake/ml/modeling/neighbors/nearest_neighbors.py +60 -21
  217. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +60 -21
  218. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +60 -21
  219. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +60 -21
  220. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +60 -21
  221. snowflake/ml/modeling/neural_network/mlp_classifier.py +60 -21
  222. snowflake/ml/modeling/neural_network/mlp_regressor.py +60 -21
  223. snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
  224. snowflake/ml/modeling/pipeline/pipeline.py +4 -12
  225. snowflake/ml/modeling/preprocessing/polynomial_features.py +60 -21
  226. snowflake/ml/modeling/semi_supervised/label_propagation.py +60 -21
  227. snowflake/ml/modeling/semi_supervised/label_spreading.py +60 -21
  228. snowflake/ml/modeling/svm/linear_svc.py +60 -21
  229. snowflake/ml/modeling/svm/linear_svr.py +60 -21
  230. snowflake/ml/modeling/svm/nu_svc.py +60 -21
  231. snowflake/ml/modeling/svm/nu_svr.py +60 -21
  232. snowflake/ml/modeling/svm/svc.py +60 -21
  233. snowflake/ml/modeling/svm/svr.py +60 -21
  234. snowflake/ml/modeling/tree/decision_tree_classifier.py +60 -21
  235. snowflake/ml/modeling/tree/decision_tree_regressor.py +60 -21
  236. snowflake/ml/modeling/tree/extra_tree_classifier.py +60 -21
  237. snowflake/ml/modeling/tree/extra_tree_regressor.py +60 -21
  238. snowflake/ml/modeling/xgboost/xgb_classifier.py +63 -23
  239. snowflake/ml/modeling/xgboost/xgb_regressor.py +63 -23
  240. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +63 -23
  241. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +63 -23
  242. snowflake/ml/registry/_manager/model_manager.py +20 -2
  243. snowflake/ml/registry/model_registry.py +1 -1
  244. snowflake/ml/registry/registry.py +1 -2
  245. snowflake/ml/utils/sql_client.py +22 -0
  246. snowflake/ml/version.py +1 -1
  247. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/METADATA +55 -3
  248. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/RECORD +251 -238
  249. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/WHEEL +1 -1
  250. snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +0 -58
  251. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/LICENSE.txt +0 -0
  252. {snowflake_ml_python-1.6.0.dist-info → snowflake_ml_python-1.6.2.dist-info}/top_level.txt +0 -0
@@ -4,18 +4,17 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
18
16
  import numpy
17
+ import sklearn
19
18
  import xgboost
20
19
  from sklearn.utils.metaestimators import available_if
21
20
 
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
23
22
  from snowflake.ml._internal import telemetry
24
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
25
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
27
26
  from snowflake.snowpark import DataFrame, Session
28
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
30
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
31
- ModelTransformHandlers,
32
30
  BatchInferenceKwargsTypedDict,
33
31
  ScoreKwargsTypedDict
34
32
  )
@@ -363,7 +361,7 @@ class XGBRFClassifier(BaseTransformer):
363
361
  self.set_sample_weight_col(sample_weight_col)
364
362
  self._use_external_memory_version = use_external_memory_version
365
363
  self._batch_size = batch_size
366
- deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
364
+ deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
367
365
 
368
366
  self._deps = list(deps)
369
367
 
@@ -699,12 +697,23 @@ class XGBRFClassifier(BaseTransformer):
699
697
  autogenerated=self._autogenerated,
700
698
  subproject=_SUBPROJECT,
701
699
  )
702
- output_result, fitted_estimator = model_trainer.train_fit_predict(
703
- drop_input_cols=self._drop_input_cols,
704
- expected_output_cols_list=(
705
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
706
- ),
700
+ expected_output_cols = (
701
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
707
702
  )
703
+ if isinstance(dataset, DataFrame):
704
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
705
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
706
+ )
707
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
708
+ drop_input_cols=self._drop_input_cols,
709
+ expected_output_cols_list=expected_output_cols,
710
+ example_output_pd_df=example_output_pd_df,
711
+ )
712
+ else:
713
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
714
+ drop_input_cols=self._drop_input_cols,
715
+ expected_output_cols_list=expected_output_cols,
716
+ )
708
717
  self._sklearn_object = fitted_estimator
709
718
  self._is_fitted = True
710
719
  return output_result
@@ -783,12 +792,41 @@ class XGBRFClassifier(BaseTransformer):
783
792
 
784
793
  return rv
785
794
 
786
- def _align_expected_output_names(
787
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
788
- ) -> List[str]:
795
+ def _align_expected_output(
796
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
797
+ ) -> Tuple[List[str], pd.DataFrame]:
798
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
799
+ and output dataframe with 1 line.
800
+ If the method is fit_predict, run 2 lines of data.
801
+ """
789
802
  # in case the inferred output column names dimension is different
790
803
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
804
+
805
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
806
+ # so change the minimum of number of rows to 2
807
+ num_examples = 2
808
+ statement_params = telemetry.get_function_usage_statement_params(
809
+ project=_PROJECT,
810
+ subproject=_SUBPROJECT,
811
+ function_name=telemetry.get_statement_params_full_func_name(
812
+ inspect.currentframe(), XGBRFClassifier.__class__.__name__
813
+ ),
814
+ api_calls=[Session.call],
815
+ custom_tags={"autogen": True} if self._autogenerated else None,
816
+ )
817
+ if output_cols_prefix == "fit_predict_":
818
+ if hasattr(self._sklearn_object, "n_clusters"):
819
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
820
+ num_examples = self._sklearn_object.n_clusters
821
+ elif hasattr(self._sklearn_object, "min_samples"):
822
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
823
+ num_examples = self._sklearn_object.min_samples
824
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
825
+ # LocalOutlierFactor expects n_neighbors <= n_samples
826
+ num_examples = self._sklearn_object.n_neighbors
827
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
828
+ else:
829
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
792
830
 
793
831
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
832
  # seen during the fit.
@@ -800,12 +838,14 @@ class XGBRFClassifier(BaseTransformer):
800
838
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
801
839
  if self.sample_weight_col:
802
840
  output_df_columns_set -= set(self.sample_weight_col)
841
+
803
842
  # if the dimension of inferred output column names is correct; use it
804
843
  if len(expected_output_cols_list) == len(output_df_columns_set):
805
- return expected_output_cols_list
844
+ return expected_output_cols_list, output_df_pd
806
845
  # otherwise, use the sklearn estimator's output
807
846
  else:
808
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
847
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
848
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
809
849
 
810
850
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
811
851
  @telemetry.send_api_usage_telemetry(
@@ -853,7 +893,7 @@ class XGBRFClassifier(BaseTransformer):
853
893
  drop_input_cols=self._drop_input_cols,
854
894
  expected_output_cols_type="float",
855
895
  )
856
- expected_output_cols = self._align_expected_output_names(
896
+ expected_output_cols, _ = self._align_expected_output(
857
897
  inference_method, dataset, expected_output_cols, output_cols_prefix
858
898
  )
859
899
 
@@ -921,7 +961,7 @@ class XGBRFClassifier(BaseTransformer):
921
961
  drop_input_cols=self._drop_input_cols,
922
962
  expected_output_cols_type="float",
923
963
  )
924
- expected_output_cols = self._align_expected_output_names(
964
+ expected_output_cols, _ = self._align_expected_output(
925
965
  inference_method, dataset, expected_output_cols, output_cols_prefix
926
966
  )
927
967
  elif isinstance(dataset, pd.DataFrame):
@@ -984,7 +1024,7 @@ class XGBRFClassifier(BaseTransformer):
984
1024
  drop_input_cols=self._drop_input_cols,
985
1025
  expected_output_cols_type="float",
986
1026
  )
987
- expected_output_cols = self._align_expected_output_names(
1027
+ expected_output_cols, _ = self._align_expected_output(
988
1028
  inference_method, dataset, expected_output_cols, output_cols_prefix
989
1029
  )
990
1030
 
@@ -1049,7 +1089,7 @@ class XGBRFClassifier(BaseTransformer):
1049
1089
  drop_input_cols = self._drop_input_cols,
1050
1090
  expected_output_cols_type="float",
1051
1091
  )
1052
- expected_output_cols = self._align_expected_output_names(
1092
+ expected_output_cols, _ = self._align_expected_output(
1053
1093
  inference_method, dataset, expected_output_cols, output_cols_prefix
1054
1094
  )
1055
1095
 
@@ -1114,7 +1154,7 @@ class XGBRFClassifier(BaseTransformer):
1114
1154
  transform_kwargs = dict(
1115
1155
  session=dataset._session,
1116
1156
  dependencies=self._deps,
1117
- score_sproc_imports=['xgboost'],
1157
+ score_sproc_imports=['xgboost', 'sklearn'],
1118
1158
  )
1119
1159
  elif isinstance(dataset, pd.DataFrame):
1120
1160
  # pandas_handler.score() does not require any extra kwargs.
@@ -4,18 +4,17 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
18
16
  import numpy
17
+ import sklearn
19
18
  import xgboost
20
19
  from sklearn.utils.metaestimators import available_if
21
20
 
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
23
22
  from snowflake.ml._internal import telemetry
24
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
25
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
26
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
27
26
  from snowflake.snowpark import DataFrame, Session
28
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
30
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
31
- ModelTransformHandlers,
32
30
  BatchInferenceKwargsTypedDict,
33
31
  ScoreKwargsTypedDict
34
32
  )
@@ -363,7 +361,7 @@ class XGBRFRegressor(BaseTransformer):
363
361
  self.set_sample_weight_col(sample_weight_col)
364
362
  self._use_external_memory_version = use_external_memory_version
365
363
  self._batch_size = batch_size
366
- deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
364
+ deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
367
365
 
368
366
  self._deps = list(deps)
369
367
 
@@ -699,12 +697,23 @@ class XGBRFRegressor(BaseTransformer):
699
697
  autogenerated=self._autogenerated,
700
698
  subproject=_SUBPROJECT,
701
699
  )
702
- output_result, fitted_estimator = model_trainer.train_fit_predict(
703
- drop_input_cols=self._drop_input_cols,
704
- expected_output_cols_list=(
705
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
706
- ),
700
+ expected_output_cols = (
701
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
707
702
  )
703
+ if isinstance(dataset, DataFrame):
704
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
705
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
706
+ )
707
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
708
+ drop_input_cols=self._drop_input_cols,
709
+ expected_output_cols_list=expected_output_cols,
710
+ example_output_pd_df=example_output_pd_df,
711
+ )
712
+ else:
713
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
714
+ drop_input_cols=self._drop_input_cols,
715
+ expected_output_cols_list=expected_output_cols,
716
+ )
708
717
  self._sklearn_object = fitted_estimator
709
718
  self._is_fitted = True
710
719
  return output_result
@@ -783,12 +792,41 @@ class XGBRFRegressor(BaseTransformer):
783
792
 
784
793
  return rv
785
794
 
786
- def _align_expected_output_names(
787
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
788
- ) -> List[str]:
795
+ def _align_expected_output(
796
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
797
+ ) -> Tuple[List[str], pd.DataFrame]:
798
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
799
+ and output dataframe with 1 line.
800
+ If the method is fit_predict, run 2 lines of data.
801
+ """
789
802
  # in case the inferred output column names dimension is different
790
803
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
804
+
805
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
806
+ # so change the minimum of number of rows to 2
807
+ num_examples = 2
808
+ statement_params = telemetry.get_function_usage_statement_params(
809
+ project=_PROJECT,
810
+ subproject=_SUBPROJECT,
811
+ function_name=telemetry.get_statement_params_full_func_name(
812
+ inspect.currentframe(), XGBRFRegressor.__class__.__name__
813
+ ),
814
+ api_calls=[Session.call],
815
+ custom_tags={"autogen": True} if self._autogenerated else None,
816
+ )
817
+ if output_cols_prefix == "fit_predict_":
818
+ if hasattr(self._sklearn_object, "n_clusters"):
819
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
820
+ num_examples = self._sklearn_object.n_clusters
821
+ elif hasattr(self._sklearn_object, "min_samples"):
822
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
823
+ num_examples = self._sklearn_object.min_samples
824
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
825
+ # LocalOutlierFactor expects n_neighbors <= n_samples
826
+ num_examples = self._sklearn_object.n_neighbors
827
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
828
+ else:
829
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
792
830
 
793
831
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
832
  # seen during the fit.
@@ -800,12 +838,14 @@ class XGBRFRegressor(BaseTransformer):
800
838
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
801
839
  if self.sample_weight_col:
802
840
  output_df_columns_set -= set(self.sample_weight_col)
841
+
803
842
  # if the dimension of inferred output column names is correct; use it
804
843
  if len(expected_output_cols_list) == len(output_df_columns_set):
805
- return expected_output_cols_list
844
+ return expected_output_cols_list, output_df_pd
806
845
  # otherwise, use the sklearn estimator's output
807
846
  else:
808
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
847
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
848
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
809
849
 
810
850
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
811
851
  @telemetry.send_api_usage_telemetry(
@@ -851,7 +891,7 @@ class XGBRFRegressor(BaseTransformer):
851
891
  drop_input_cols=self._drop_input_cols,
852
892
  expected_output_cols_type="float",
853
893
  )
854
- expected_output_cols = self._align_expected_output_names(
894
+ expected_output_cols, _ = self._align_expected_output(
855
895
  inference_method, dataset, expected_output_cols, output_cols_prefix
856
896
  )
857
897
 
@@ -917,7 +957,7 @@ class XGBRFRegressor(BaseTransformer):
917
957
  drop_input_cols=self._drop_input_cols,
918
958
  expected_output_cols_type="float",
919
959
  )
920
- expected_output_cols = self._align_expected_output_names(
960
+ expected_output_cols, _ = self._align_expected_output(
921
961
  inference_method, dataset, expected_output_cols, output_cols_prefix
922
962
  )
923
963
  elif isinstance(dataset, pd.DataFrame):
@@ -980,7 +1020,7 @@ class XGBRFRegressor(BaseTransformer):
980
1020
  drop_input_cols=self._drop_input_cols,
981
1021
  expected_output_cols_type="float",
982
1022
  )
983
- expected_output_cols = self._align_expected_output_names(
1023
+ expected_output_cols, _ = self._align_expected_output(
984
1024
  inference_method, dataset, expected_output_cols, output_cols_prefix
985
1025
  )
986
1026
 
@@ -1045,7 +1085,7 @@ class XGBRFRegressor(BaseTransformer):
1045
1085
  drop_input_cols = self._drop_input_cols,
1046
1086
  expected_output_cols_type="float",
1047
1087
  )
1048
- expected_output_cols = self._align_expected_output_names(
1088
+ expected_output_cols, _ = self._align_expected_output(
1049
1089
  inference_method, dataset, expected_output_cols, output_cols_prefix
1050
1090
  )
1051
1091
 
@@ -1110,7 +1150,7 @@ class XGBRFRegressor(BaseTransformer):
1110
1150
  transform_kwargs = dict(
1111
1151
  session=dataset._session,
1112
1152
  dependencies=self._deps,
1113
- score_sproc_imports=['xgboost'],
1153
+ score_sproc_imports=['xgboost', 'sklearn'],
1114
1154
  )
1115
1155
  elif isinstance(dataset, pd.DataFrame):
1116
1156
  # pandas_handler.score() does not require any extra kwargs.
@@ -9,7 +9,7 @@ from snowflake.ml._internal.human_readable_id import hrid_generator
9
9
  from snowflake.ml._internal.utils import sql_identifier
10
10
  from snowflake.ml.model import model_signature, type_hints as model_types
11
11
  from snowflake.ml.model._client.model import model_impl, model_version_impl
12
- from snowflake.ml.model._client.ops import metadata_ops, model_ops
12
+ from snowflake.ml.model._client.ops import metadata_ops, model_ops, service_ops
13
13
  from snowflake.ml.model._model_composer import model_composer
14
14
  from snowflake.ml.model._packager.model_meta import model_meta
15
15
  from snowflake.snowpark import session
@@ -30,6 +30,9 @@ class ModelManager:
30
30
  self._model_ops = model_ops.ModelOperator(
31
31
  session, database_name=self._database_name, schema_name=self._schema_name
32
32
  )
33
+ self._service_ops = service_ops.ServiceOperator(
34
+ session, database_name=self._database_name, schema_name=self._schema_name
35
+ )
33
36
  self._hrid_generator = hrid_generator.HRID16()
34
37
 
35
38
  def log_model(
@@ -47,6 +50,7 @@ class ModelManager:
47
50
  sample_input_data: Optional[model_types.SupportedDataType] = None,
48
51
  code_paths: Optional[List[str]] = None,
49
52
  ext_modules: Optional[List[ModuleType]] = None,
53
+ model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
50
54
  options: Optional[model_types.ModelSaveOption] = None,
51
55
  statement_params: Optional[Dict[str, Any]] = None,
52
56
  ) -> model_version_impl.ModelVersion:
@@ -86,6 +90,7 @@ class ModelManager:
86
90
  sample_input_data=sample_input_data,
87
91
  code_paths=code_paths,
88
92
  ext_modules=ext_modules,
93
+ model_objective=model_objective,
89
94
  options=options,
90
95
  statement_params=statement_params,
91
96
  )
@@ -105,6 +110,7 @@ class ModelManager:
105
110
  sample_input_data: Optional[model_types.SupportedDataType] = None,
106
111
  code_paths: Optional[List[str]] = None,
107
112
  ext_modules: Optional[List[ModuleType]] = None,
113
+ model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
108
114
  options: Optional[model_types.ModelSaveOption] = None,
109
115
  statement_params: Optional[Dict[str, Any]] = None,
110
116
  ) -> model_version_impl.ModelVersion:
@@ -153,6 +159,7 @@ class ModelManager:
153
159
  code_paths=code_paths,
154
160
  ext_modules=ext_modules,
155
161
  options=options,
162
+ model_objective=model_objective,
156
163
  )
157
164
  statement_params = telemetry.add_statement_params_custom_tags(
158
165
  statement_params, model_metadata.telemetry_metadata()
@@ -173,11 +180,16 @@ class ModelManager:
173
180
  )
174
181
 
175
182
  mv = model_version_impl.ModelVersion._ref(
176
- model_ops.ModelOperator(
183
+ model_ops=model_ops.ModelOperator(
177
184
  self._model_ops._session,
178
185
  database_name=database_name_id or self._database_name,
179
186
  schema_name=schema_name_id or self._schema_name,
180
187
  ),
188
+ service_ops=service_ops.ServiceOperator(
189
+ self._service_ops._session,
190
+ database_name=database_name_id or self._database_name,
191
+ schema_name=schema_name_id or self._schema_name,
192
+ ),
181
193
  model_name=model_name_id,
182
194
  version_name=version_name_id,
183
195
  )
@@ -216,6 +228,11 @@ class ModelManager:
216
228
  database_name=database_name_id or self._database_name,
217
229
  schema_name=schema_name_id or self._schema_name,
218
230
  ),
231
+ service_ops=service_ops.ServiceOperator(
232
+ self._service_ops._session,
233
+ database_name=database_name_id or self._database_name,
234
+ schema_name=schema_name_id or self._schema_name,
235
+ ),
219
236
  model_name=model_name_id,
220
237
  )
221
238
  else:
@@ -234,6 +251,7 @@ class ModelManager:
234
251
  return [
235
252
  model_impl.Model._ref(
236
253
  self._model_ops,
254
+ service_ops=self._service_ops,
237
255
  model_name=model_name,
238
256
  )
239
257
  for model_name in model_names
@@ -576,7 +576,7 @@ fully integrated into the new registry.
576
576
  raw_stage_path = uri.get_snowflake_stage_path_from_uri(model_uri)
577
577
  if not raw_stage_path:
578
578
  return None
579
- (db, schema, stage, _) = identifier.parse_schema_level_object_identifier(raw_stage_path)
579
+ (db, schema, stage, _) = identifier.parse_snowflake_stage_path(raw_stage_path)
580
580
  return identifier.get_schema_level_object_identifier(db, schema, stage)
581
581
 
582
582
  def _list_selected_models(
@@ -244,8 +244,7 @@ class Registry:
244
244
  warnings.warn(
245
245
  "Models logged specifying `pip_requirements` can not be executed "
246
246
  "in Snowflake Warehouse where all dependencies are required to be retrieved "
247
- "from Snowflake Anaconda Channel. Specify model save option `include_pip_dependencies`"
248
- "to log model with pip dependencies.",
247
+ "from Snowflake Anaconda Channel.",
249
248
  category=UserWarning,
250
249
  stacklevel=1,
251
250
  )
@@ -0,0 +1,22 @@
1
+ from enum import Enum
2
+ from typing import Dict
3
+
4
+
5
+ class CreationOption(Enum):
6
+ FAIL_IF_NOT_EXIST = 1
7
+ CREATE_IF_NOT_EXIST = 2
8
+ OR_REPLACE = 3
9
+
10
+
11
+ class CreationMode:
12
+ def __init__(self, *, if_not_exists: bool = False, or_replace: bool = False) -> None:
13
+ self.if_not_exists = if_not_exists
14
+ self.or_replace = or_replace
15
+
16
+ def get_ddl_phrases(self) -> Dict[CreationOption, str]:
17
+ if_not_exists_sql = " IF NOT EXISTS" if self.if_not_exists else ""
18
+ or_replace_sql = " OR REPLACE" if self.or_replace else ""
19
+ return {
20
+ CreationOption.CREATE_IF_NOT_EXIST: if_not_exists_sql,
21
+ CreationOption.OR_REPLACE: or_replace_sql,
22
+ }
snowflake/ml/version.py CHANGED
@@ -1 +1 @@
1
- VERSION="1.6.0"
1
+ VERSION="1.6.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: snowflake-ml-python
3
- Version: 1.6.0
3
+ Version: 1.6.2
4
4
  Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
5
5
  Author-email: "Snowflake, Inc" <support@snowflake.com>
6
6
  License:
@@ -253,7 +253,7 @@ Requires-Dist: snowflake-connector-python[pandas] <4,>=3.5.0
253
253
  Requires-Dist: snowflake-snowpark-python <2,>=1.17.0
254
254
  Requires-Dist: sqlparse <1,>=0.4
255
255
  Requires-Dist: typing-extensions <5,>=4.1.0
256
- Requires-Dist: xgboost <2,>=1.7.3
256
+ Requires-Dist: xgboost <2.1,>=1.7.3
257
257
  Provides-Extra: all
258
258
  Requires-Dist: catboost <2,>=1.2.0 ; extra == 'all'
259
259
  Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'all'
@@ -373,7 +373,51 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
373
373
 
374
374
  # Release History
375
375
 
376
- ## 1.6.0
376
+ ## 1.6.2 (TBD)
377
+
378
+ ### Bug Fixes
379
+
380
+ - Modeling: Support XGBoost version that is larger than 2.
381
+
382
+ - Data: Fix multiple epoch iteration over `DataConnector.to_torch_datapipe()` DataPipes.
383
+ - Generic: Fix a bug that when an invalid name is provided to argument where fully qualified name is expected, it will
384
+ be parsed wrongly. Now it raises an exception correctly.
385
+ - Model Explainability: Handle explanations for multiclass XGBoost classification models
386
+ - Model Explainability: Workarounds and better error handling for XGB>2.1.0 not working with SHAP==0.42.1
387
+
388
+ ### New Features
389
+
390
+ - Data: Add top-level exports for `DataConnector` and `DataSource` to `snowflake.ml.data`.
391
+ - Data: Add native batching support via `batch_size` and `drop_last_batch` arguments to `DataConnector.to_torch_dataset()`
392
+ - Feature Store: update_feature_view() supports taking feature view object as argument.
393
+
394
+ ### Behavior Changes
395
+
396
+ ## 1.6.1 (2024-08-12)
397
+
398
+ ### Bug Fixes
399
+
400
+ - Feature Store: Support large metadata blob when generating dataset
401
+ - Feature Store: Added a hidden knob in FeatureView as kargs for setting customized
402
+ refresh_mode
403
+ - Registry: Fix an error message in Model Version `run` when `function_name` is not mentioned and model has multiple
404
+ target methods.
405
+ - Cortex inference: snowflake.cortex.Complete now only uses the REST API for streaming and the use_rest_api_experimental
406
+ is no longer needed.
407
+ - Feature Store: Add a new API: FeatureView.list_columns() which list all column information.
408
+ - Data: Fix `DataFrame` ingestion with `ArrowIngestor`.
409
+
410
+ ### New Features
411
+
412
+ - Enable `set_params` to set the parameters of the underlying sklearn estimator, if the snowflake-ml model has been fit.
413
+ - Data: Add `snowflake.ml.data.ingestor_utils` module with utility functions helpful for `DataIngestor` implementations.
414
+ - Data: Add new `to_torch_dataset()` connector to `DataConnector` to replace deprecated DataPipe.
415
+ - Registry: Option to `enable_explainability` set to True by default for XGBoost, LightGBM and CatBoost as PuPr feature.
416
+ - Registry: Option to `enable_explainability` when registering SHAP supported sklearn models.
417
+
418
+ ### Behavior Changes
419
+
420
+ ## 1.6.0 (2024-07-29)
377
421
 
378
422
  ### Bug Fixes
379
423
 
@@ -402,6 +446,14 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
402
446
  distributed_hpo_trainer.ENABLE_EFFICIENT_MEMORY_USAGE = False
403
447
  `
404
448
  - Registry: Option to `enable_explainability` when registering LightGBM models as a pre-PuPr feature.
449
+ - Data: Add new `snowflake.ml.data` preview module which contains data reading utilities like `DataConnector`
450
+ - `DataConnector` provides efficient connectors from Snowpark `DataFrame`
451
+ and Snowpark ML `Dataset` to external frameworks like PyTorch, TensorFlow, and Pandas. Create `DataConnector`
452
+ instances using the classmethod constructors `DataConnector.from_dataset()` and `DataConnector.from_dataframe()`.
453
+ - Data: Add new `DataConnector.from_sources()` classmethod constructor for constructing from `DataSource` objects.
454
+ - Data: Add new `ingestor_class` arg to `DataConnector` classmethod constructors for easier `DataIngestor` injection.
455
+ - Dataset: `DatasetReader` now subclasses new `DataConnector` class.
456
+ - Add optional `limit` arg to `DatasetReader.to_pandas()`
405
457
 
406
458
  ### Behavior Changes
407
459