snowflake-ml-python 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. snowflake/cortex/_complete.py +1 -1
  2. snowflake/cortex/_extract_answer.py +1 -1
  3. snowflake/cortex/_sentiment.py +1 -1
  4. snowflake/cortex/_summarize.py +1 -1
  5. snowflake/cortex/_translate.py +1 -1
  6. snowflake/ml/_internal/env_utils.py +68 -6
  7. snowflake/ml/_internal/file_utils.py +34 -4
  8. snowflake/ml/_internal/telemetry.py +79 -91
  9. snowflake/ml/_internal/utils/retryable_http.py +16 -4
  10. snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
  11. snowflake/ml/dataset/dataset.py +1 -1
  12. snowflake/ml/model/_api.py +21 -14
  13. snowflake/ml/model/_client/model/model_impl.py +176 -0
  14. snowflake/ml/model/_client/model/model_method_info.py +19 -0
  15. snowflake/ml/model/_client/model/model_version_impl.py +291 -0
  16. snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
  17. snowflake/ml/model/_client/ops/model_ops.py +308 -0
  18. snowflake/ml/model/_client/sql/model.py +75 -0
  19. snowflake/ml/model/_client/sql/model_version.py +213 -0
  20. snowflake/ml/model/_client/sql/stage.py +40 -0
  21. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
  22. snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
  23. snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
  24. snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
  25. snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
  26. snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
  27. snowflake/ml/model/_model_composer/model_composer.py +31 -9
  28. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
  29. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
  30. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
  31. snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
  32. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
  33. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
  34. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
  35. snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
  36. snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
  37. snowflake/ml/model/model_signature.py +108 -53
  38. snowflake/ml/model/type_hints.py +1 -0
  39. snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
  40. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
  41. snowflake/ml/modeling/_internal/model_specifications.py +146 -0
  42. snowflake/ml/modeling/_internal/model_trainer.py +13 -0
  43. snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
  44. snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
  45. snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
  46. snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
  47. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +96 -124
  48. snowflake/ml/modeling/cluster/affinity_propagation.py +94 -124
  49. snowflake/ml/modeling/cluster/agglomerative_clustering.py +94 -124
  50. snowflake/ml/modeling/cluster/birch.py +94 -124
  51. snowflake/ml/modeling/cluster/bisecting_k_means.py +94 -124
  52. snowflake/ml/modeling/cluster/dbscan.py +94 -124
  53. snowflake/ml/modeling/cluster/feature_agglomeration.py +94 -124
  54. snowflake/ml/modeling/cluster/k_means.py +93 -124
  55. snowflake/ml/modeling/cluster/mean_shift.py +94 -124
  56. snowflake/ml/modeling/cluster/mini_batch_k_means.py +93 -124
  57. snowflake/ml/modeling/cluster/optics.py +94 -124
  58. snowflake/ml/modeling/cluster/spectral_biclustering.py +94 -124
  59. snowflake/ml/modeling/cluster/spectral_clustering.py +94 -124
  60. snowflake/ml/modeling/cluster/spectral_coclustering.py +94 -124
  61. snowflake/ml/modeling/compose/column_transformer.py +94 -124
  62. snowflake/ml/modeling/compose/transformed_target_regressor.py +96 -124
  63. snowflake/ml/modeling/covariance/elliptic_envelope.py +94 -124
  64. snowflake/ml/modeling/covariance/empirical_covariance.py +80 -110
  65. snowflake/ml/modeling/covariance/graphical_lasso.py +94 -124
  66. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +94 -124
  67. snowflake/ml/modeling/covariance/ledoit_wolf.py +85 -115
  68. snowflake/ml/modeling/covariance/min_cov_det.py +94 -124
  69. snowflake/ml/modeling/covariance/oas.py +80 -110
  70. snowflake/ml/modeling/covariance/shrunk_covariance.py +84 -114
  71. snowflake/ml/modeling/decomposition/dictionary_learning.py +94 -124
  72. snowflake/ml/modeling/decomposition/factor_analysis.py +94 -124
  73. snowflake/ml/modeling/decomposition/fast_ica.py +94 -124
  74. snowflake/ml/modeling/decomposition/incremental_pca.py +94 -124
  75. snowflake/ml/modeling/decomposition/kernel_pca.py +94 -124
  76. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +94 -124
  77. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +94 -124
  78. snowflake/ml/modeling/decomposition/pca.py +94 -124
  79. snowflake/ml/modeling/decomposition/sparse_pca.py +94 -124
  80. snowflake/ml/modeling/decomposition/truncated_svd.py +94 -124
  81. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +96 -124
  82. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +91 -119
  83. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +96 -124
  84. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +96 -124
  85. snowflake/ml/modeling/ensemble/bagging_classifier.py +96 -124
  86. snowflake/ml/modeling/ensemble/bagging_regressor.py +96 -124
  87. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +96 -124
  88. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +96 -124
  89. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +96 -124
  90. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +96 -124
  91. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +96 -124
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +96 -124
  93. snowflake/ml/modeling/ensemble/isolation_forest.py +94 -124
  94. snowflake/ml/modeling/ensemble/random_forest_classifier.py +96 -124
  95. snowflake/ml/modeling/ensemble/random_forest_regressor.py +96 -124
  96. snowflake/ml/modeling/ensemble/stacking_regressor.py +96 -124
  97. snowflake/ml/modeling/ensemble/voting_classifier.py +96 -124
  98. snowflake/ml/modeling/ensemble/voting_regressor.py +91 -119
  99. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +82 -110
  100. snowflake/ml/modeling/feature_selection/select_fdr.py +80 -108
  101. snowflake/ml/modeling/feature_selection/select_fpr.py +80 -108
  102. snowflake/ml/modeling/feature_selection/select_fwe.py +80 -108
  103. snowflake/ml/modeling/feature_selection/select_k_best.py +81 -109
  104. snowflake/ml/modeling/feature_selection/select_percentile.py +80 -108
  105. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +94 -124
  106. snowflake/ml/modeling/feature_selection/variance_threshold.py +76 -106
  107. snowflake/ml/modeling/framework/base.py +2 -2
  108. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +96 -124
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +96 -124
  110. snowflake/ml/modeling/impute/iterative_imputer.py +94 -124
  111. snowflake/ml/modeling/impute/knn_imputer.py +94 -124
  112. snowflake/ml/modeling/impute/missing_indicator.py +94 -124
  113. snowflake/ml/modeling/impute/simple_imputer.py +1 -1
  114. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +77 -107
  115. snowflake/ml/modeling/kernel_approximation/nystroem.py +94 -124
  116. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +94 -124
  117. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +86 -116
  118. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +84 -114
  119. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +96 -124
  120. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +71 -100
  121. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +71 -100
  122. snowflake/ml/modeling/linear_model/ard_regression.py +96 -124
  123. snowflake/ml/modeling/linear_model/bayesian_ridge.py +96 -124
  124. snowflake/ml/modeling/linear_model/elastic_net.py +96 -124
  125. snowflake/ml/modeling/linear_model/elastic_net_cv.py +96 -124
  126. snowflake/ml/modeling/linear_model/gamma_regressor.py +96 -124
  127. snowflake/ml/modeling/linear_model/huber_regressor.py +96 -124
  128. snowflake/ml/modeling/linear_model/lars.py +96 -124
  129. snowflake/ml/modeling/linear_model/lars_cv.py +96 -124
  130. snowflake/ml/modeling/linear_model/lasso.py +96 -124
  131. snowflake/ml/modeling/linear_model/lasso_cv.py +96 -124
  132. snowflake/ml/modeling/linear_model/lasso_lars.py +96 -124
  133. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +96 -124
  134. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +96 -124
  135. snowflake/ml/modeling/linear_model/linear_regression.py +91 -119
  136. snowflake/ml/modeling/linear_model/logistic_regression.py +96 -124
  137. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +96 -124
  138. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +96 -124
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +96 -124
  140. snowflake/ml/modeling/linear_model/multi_task_lasso.py +96 -124
  141. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +96 -124
  142. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +96 -124
  143. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +96 -124
  144. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +95 -124
  145. snowflake/ml/modeling/linear_model/perceptron.py +95 -124
  146. snowflake/ml/modeling/linear_model/poisson_regressor.py +96 -124
  147. snowflake/ml/modeling/linear_model/ransac_regressor.py +96 -124
  148. snowflake/ml/modeling/linear_model/ridge.py +96 -124
  149. snowflake/ml/modeling/linear_model/ridge_classifier.py +96 -124
  150. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +96 -124
  151. snowflake/ml/modeling/linear_model/ridge_cv.py +96 -124
  152. snowflake/ml/modeling/linear_model/sgd_classifier.py +96 -124
  153. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +94 -124
  154. snowflake/ml/modeling/linear_model/sgd_regressor.py +96 -124
  155. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +96 -124
  156. snowflake/ml/modeling/linear_model/tweedie_regressor.py +96 -124
  157. snowflake/ml/modeling/manifold/isomap.py +94 -124
  158. snowflake/ml/modeling/manifold/mds.py +94 -124
  159. snowflake/ml/modeling/manifold/spectral_embedding.py +94 -124
  160. snowflake/ml/modeling/manifold/tsne.py +94 -124
  161. snowflake/ml/modeling/metrics/classification.py +187 -52
  162. snowflake/ml/modeling/metrics/correlation.py +4 -2
  163. snowflake/ml/modeling/metrics/covariance.py +7 -4
  164. snowflake/ml/modeling/metrics/ranking.py +32 -16
  165. snowflake/ml/modeling/metrics/regression.py +60 -32
  166. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +94 -124
  167. snowflake/ml/modeling/mixture/gaussian_mixture.py +94 -124
  168. snowflake/ml/modeling/model_selection/grid_search_cv.py +88 -138
  169. snowflake/ml/modeling/model_selection/randomized_search_cv.py +90 -144
  170. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +86 -114
  171. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +93 -121
  172. snowflake/ml/modeling/multiclass/output_code_classifier.py +94 -122
  173. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +92 -120
  174. snowflake/ml/modeling/naive_bayes/categorical_nb.py +96 -124
  175. snowflake/ml/modeling/naive_bayes/complement_nb.py +92 -120
  176. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -107
  177. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +88 -116
  178. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +96 -124
  179. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +96 -124
  180. snowflake/ml/modeling/neighbors/kernel_density.py +94 -124
  181. snowflake/ml/modeling/neighbors/local_outlier_factor.py +94 -124
  182. snowflake/ml/modeling/neighbors/nearest_centroid.py +89 -117
  183. snowflake/ml/modeling/neighbors/nearest_neighbors.py +94 -124
  184. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +96 -124
  185. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +96 -124
  186. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +96 -124
  187. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +94 -124
  188. snowflake/ml/modeling/neural_network/mlp_classifier.py +96 -124
  189. snowflake/ml/modeling/neural_network/mlp_regressor.py +96 -124
  190. snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
  191. snowflake/ml/modeling/preprocessing/binarizer.py +14 -9
  192. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +0 -4
  193. snowflake/ml/modeling/preprocessing/label_encoder.py +21 -13
  194. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +20 -14
  195. snowflake/ml/modeling/preprocessing/min_max_scaler.py +35 -19
  196. snowflake/ml/modeling/preprocessing/normalizer.py +6 -9
  197. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +20 -13
  198. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +25 -13
  199. snowflake/ml/modeling/preprocessing/polynomial_features.py +94 -124
  200. snowflake/ml/modeling/preprocessing/robust_scaler.py +28 -14
  201. snowflake/ml/modeling/preprocessing/standard_scaler.py +25 -13
  202. snowflake/ml/modeling/semi_supervised/label_propagation.py +96 -124
  203. snowflake/ml/modeling/semi_supervised/label_spreading.py +96 -124
  204. snowflake/ml/modeling/svm/linear_svc.py +96 -124
  205. snowflake/ml/modeling/svm/linear_svr.py +96 -124
  206. snowflake/ml/modeling/svm/nu_svc.py +96 -124
  207. snowflake/ml/modeling/svm/nu_svr.py +96 -124
  208. snowflake/ml/modeling/svm/svc.py +96 -124
  209. snowflake/ml/modeling/svm/svr.py +96 -124
  210. snowflake/ml/modeling/tree/decision_tree_classifier.py +96 -124
  211. snowflake/ml/modeling/tree/decision_tree_regressor.py +96 -124
  212. snowflake/ml/modeling/tree/extra_tree_classifier.py +96 -124
  213. snowflake/ml/modeling/tree/extra_tree_regressor.py +96 -124
  214. snowflake/ml/modeling/xgboost/xgb_classifier.py +96 -125
  215. snowflake/ml/modeling/xgboost/xgb_regressor.py +96 -125
  216. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +96 -125
  217. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +96 -125
  218. snowflake/ml/registry/model_registry.py +2 -0
  219. snowflake/ml/registry/registry.py +215 -0
  220. snowflake/ml/version.py +1 -1
  221. {snowflake_ml_python-1.1.1.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +21 -3
  222. snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
  223. snowflake_ml_python-1.1.1.dist-info/RECORD +0 -331
  224. {snowflake_ml_python-1.1.1.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0
@@ -21,17 +21,19 @@ from sklearn.utils.metaestimators import available_if
21
21
  from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
22
22
  from snowflake.ml._internal import telemetry
23
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
24
+ from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
24
25
  from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
- from snowflake.snowpark import DataFrame
26
+ from snowflake.snowpark import DataFrame, Session
26
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
27
28
  from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
29
+ from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
30
+ from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
28
31
  from snowflake.ml.modeling._internal.estimator_utils import (
29
32
  gather_dependencies,
30
33
  original_estimator_has_callable,
31
34
  transform_snowml_obj_to_sklearn_obj,
32
35
  validate_sklearn_args,
33
36
  )
34
- from snowflake.ml.modeling._internal.snowpark_handlers import XGBoostWrapperProvider
35
37
  from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
36
38
 
37
39
  from snowflake.ml.model.model_signature import (
@@ -51,7 +53,6 @@ _PROJECT = "ModelDevelopment"
51
53
  _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "").split("_")])
52
54
 
53
55
 
54
-
55
56
  class XGBRFRegressor(BaseTransformer):
56
57
  r"""scikit-learn API for XGBoost random forest regression
57
58
  For more details on this class, see [xgboost.XGBRFRegressor]
@@ -60,7 +61,51 @@ class XGBRFRegressor(BaseTransformer):
60
61
  Parameters
61
62
  ----------
62
63
 
63
- n_estimators: int
64
+ input_cols: Optional[Union[str, List[str]]]
65
+ A string or list of strings representing column names that contain features.
66
+ If this parameter is not specified, all columns in the input DataFrame except
67
+ the columns specified by label_cols, sample_weight_col, and passthrough_cols
68
+ parameters are considered input columns. Input columns can also be set after
69
+ initialization with the `set_input_cols` method.
70
+
71
+ label_cols: Optional[Union[str, List[str]]]
72
+ A string or list of strings representing column names that contain labels.
73
+ Label columns must be specified with this parameter during initialization
74
+ or with the `set_label_cols` method before fitting.
75
+
76
+ output_cols: Optional[Union[str, List[str]]]
77
+ A string or list of strings representing column names that will store the
78
+ output of predict and transform operations. The length of output_cols must
79
+ match the expected number of output columns from the specific predictor or
80
+ transformer class used.
81
+ If you omit this parameter, output column names are derived by adding an
82
+ OUTPUT_ prefix to the label column names for supervised estimators, or
83
+ OUTPUT_<IDX>for unsupervised estimators. These inferred output column names
84
+ work for predictors, but output_cols must be set explicitly for transformers.
85
+ In general, explicitly specifying output column names is clearer, especially
86
+ if you don’t specify the input column names.
87
+ To transform in place, pass the same names for input_cols and output_cols.
88
+ be set explicitly for transformers. Output columns can also be set after
89
+ initialization with the `set_output_cols` method.
90
+
91
+ sample_weight_col: Optional[str]
92
+ A string representing the column name containing the sample weights.
93
+ This argument is only required when working with weighted datasets. Sample
94
+ weight column can also be set after initialization with the
95
+ `set_sample_weight_col` method.
96
+
97
+ passthrough_cols: Optional[Union[str, List[str]]]
98
+ A string or a list of strings indicating column names to be excluded from any
99
+ operations (such as train, transform, or inference). These specified column(s)
100
+ will remain untouched throughout the process. This option is helpful in scenarios
101
+ requiring automatic input_cols inference, but need to avoid using specific
102
+ columns, like index columns, during training or inference. Passthrough columns
103
+ can also be set after initialization with the `set_passthrough_cols` method.
104
+
105
+ drop_input_cols: Optional[bool], default=False
106
+ If set, the response of predict(), transform() methods will not contain input columns.
107
+
108
+ n_estimators: int
64
109
  Number of trees in random forest to fit.
65
110
 
66
111
  max_depth: Optional[int]
@@ -267,42 +312,6 @@ class XGBRFRegressor(BaseTransformer):
267
312
  The value of the gradient for each sample point.
268
313
  hess: array_like of shape [n_samples]
269
314
  The value of the second derivative for each sample point
270
-
271
- input_cols: Optional[Union[str, List[str]]]
272
- A string or list of strings representing column names that contain features.
273
- If this parameter is not specified, all columns in the input DataFrame except
274
- the columns specified by label_cols, sample_weight_col, and passthrough_cols
275
- parameters are considered input columns.
276
-
277
- label_cols: Optional[Union[str, List[str]]]
278
- A string or list of strings representing column names that contain labels.
279
- This is a required param for estimators, as there is no way to infer these
280
- columns. If this parameter is not specified, then object is fitted without
281
- labels (like a transformer).
282
-
283
- output_cols: Optional[Union[str, List[str]]]
284
- A string or list of strings representing column names that will store the
285
- output of predict and transform operations. The length of output_cols must
286
- match the expected number of output columns from the specific estimator or
287
- transformer class used.
288
- If this parameter is not specified, output column names are derived by
289
- adding an OUTPUT_ prefix to the label column names. These inferred output
290
- column names work for estimator's predict() method, but output_cols must
291
- be set explicitly for transformers.
292
-
293
- sample_weight_col: Optional[str]
294
- A string representing the column name containing the sample weights.
295
- This argument is only required when working with weighted datasets.
296
-
297
- passthrough_cols: Optional[Union[str, List[str]]]
298
- A string or a list of strings indicating column names to be excluded from any
299
- operations (such as train, transform, or inference). These specified column(s)
300
- will remain untouched throughout the process. This option is helpful in scenarios
301
- requiring automatic input_cols inference, but need to avoid using specific
302
- columns, like index columns, during training or inference.
303
-
304
- drop_input_cols: Optional[bool], default=False
305
- If set, the response of predict(), transform() methods will not contain input columns.
306
315
  """
307
316
 
308
317
  def __init__( # type: ignore[no-untyped-def]
@@ -328,7 +337,7 @@ class XGBRFRegressor(BaseTransformer):
328
337
  self.set_passthrough_cols(passthrough_cols)
329
338
  self.set_drop_input_cols(drop_input_cols)
330
339
  self.set_sample_weight_col(sample_weight_col)
331
- deps = set(XGBoostWrapperProvider().dependencies)
340
+ deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
332
341
 
333
342
  self._deps = list(deps)
334
343
 
@@ -340,14 +349,15 @@ class XGBRFRegressor(BaseTransformer):
340
349
  args=init_args,
341
350
  klass=xgboost.XGBRFRegressor
342
351
  )
343
- self._sklearn_object = xgboost.XGBRFRegressor(
352
+ self._sklearn_object: Any = xgboost.XGBRFRegressor(
344
353
  **cleaned_up_init_args,
345
354
  **kwargs,
346
355
  )
347
356
  self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
348
357
  # If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
349
358
  self._snowpark_cols: Optional[List[str]] = self.input_cols
350
- self._handlers: FitPredictHandlers = HandlersImpl(class_name=XGBRFRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True, wrapper_provider=XGBoostWrapperProvider())
359
+ self._handlers: FitPredictHandlers = HandlersImpl(class_name=XGBRFRegressor.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
360
+ self._autogenerated = True
351
361
 
352
362
  def _get_rand_id(self) -> str:
353
363
  """
@@ -403,54 +413,48 @@ class XGBRFRegressor(BaseTransformer):
403
413
  self
404
414
  """
405
415
  self._infer_input_output_cols(dataset)
406
- if isinstance(dataset, pd.DataFrame):
407
- assert self._sklearn_object is not None # keep mypy happy
408
- self._sklearn_object = self._handlers.fit_pandas(
409
- dataset,
410
- self._sklearn_object,
411
- self.input_cols,
412
- self.label_cols,
413
- self.sample_weight_col
414
- )
415
- elif isinstance(dataset, DataFrame):
416
- self._fit_snowpark(dataset)
417
- else:
418
- raise TypeError(
419
- f"Unexpected dataset type: {type(dataset)}."
420
- "Supported dataset types: snowpark.DataFrame, pandas.DataFrame."
421
- )
416
+ if isinstance(dataset, DataFrame):
417
+ session = dataset._session
418
+ assert session is not None # keep mypy happy
419
+ # Validate that key package version in user workspace are supported in snowflake conda channel
420
+ # If customer doesn't have package in conda channel, replace the ones have the closest versions
421
+ self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
422
+ pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
423
+
424
+ # Specify input columns so column pruning will be enforced
425
+ selected_cols = self._get_active_columns()
426
+ if len(selected_cols) > 0:
427
+ dataset = dataset.select(selected_cols)
428
+
429
+ self._snowpark_cols = dataset.select(self.input_cols).columns
430
+
431
+ # If we are already in a stored procedure, no need to kick off another one.
432
+ if SNOWML_SPROC_ENV in os.environ:
433
+ statement_params = telemetry.get_function_usage_statement_params(
434
+ project=_PROJECT,
435
+ subproject=_SUBPROJECT,
436
+ function_name=telemetry.get_statement_params_full_func_name(inspect.currentframe(), XGBRFRegressor.__class__.__name__),
437
+ api_calls=[Session.call],
438
+ custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
439
+ )
440
+ pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
441
+ pd_df.columns = dataset.columns
442
+ dataset = pd_df
443
+
444
+ model_trainer = ModelTrainerBuilder.build(
445
+ estimator=self._sklearn_object,
446
+ dataset=dataset,
447
+ input_cols=self.input_cols,
448
+ label_cols=self.label_cols,
449
+ sample_weight_col=self.sample_weight_col,
450
+ autogenerated=self._autogenerated,
451
+ subproject=_SUBPROJECT
452
+ )
453
+ self._sklearn_object = model_trainer.train()
422
454
  self._is_fitted = True
423
455
  self._get_model_signatures(dataset)
424
456
  return self
425
457
 
426
- def _fit_snowpark(self, dataset: DataFrame) -> None:
427
- session = dataset._session
428
- assert session is not None # keep mypy happy
429
- # Validate that key package version in user workspace are supported in snowflake conda channel
430
- # If customer doesn't have package in conda channel, replace the ones have the closest versions
431
- self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
432
- pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
433
-
434
- # Specify input columns so column pruning will be enforced
435
- selected_cols = self._get_active_columns()
436
- if len(selected_cols) > 0:
437
- dataset = dataset.select(selected_cols)
438
-
439
- estimator = self._sklearn_object
440
- assert estimator is not None # Keep mypy happy
441
-
442
- self._snowpark_cols = dataset.select(self.input_cols).columns
443
-
444
- self._sklearn_object = self._handlers.fit_snowpark(
445
- dataset,
446
- session,
447
- estimator,
448
- ["snowflake-snowpark-python"] + self._get_dependencies(),
449
- self.input_cols,
450
- self.label_cols,
451
- self.sample_weight_col,
452
- )
453
-
454
458
  def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
455
459
  if self._drop_input_cols:
456
460
  return []
@@ -638,11 +642,6 @@ class XGBRFRegressor(BaseTransformer):
638
642
  subproject=_SUBPROJECT,
639
643
  custom_tags=dict([("autogen", True)]),
640
644
  )
641
- @telemetry.add_stmt_params_to_df(
642
- project=_PROJECT,
643
- subproject=_SUBPROJECT,
644
- custom_tags=dict([("autogen", True)]),
645
- )
646
645
  def predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
647
646
  """Predict with `X`
648
647
  For more details on this function, see [xgboost.XGBRFRegressor.predict]
@@ -696,11 +695,6 @@ class XGBRFRegressor(BaseTransformer):
696
695
  subproject=_SUBPROJECT,
697
696
  custom_tags=dict([("autogen", True)]),
698
697
  )
699
- @telemetry.add_stmt_params_to_df(
700
- project=_PROJECT,
701
- subproject=_SUBPROJECT,
702
- custom_tags=dict([("autogen", True)]),
703
- )
704
698
  def transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
705
699
  """Method not supported for this class.
706
700
 
@@ -757,7 +751,8 @@ class XGBRFRegressor(BaseTransformer):
757
751
  if False:
758
752
  self.fit(dataset)
759
753
  assert self._sklearn_object is not None
760
- return self._sklearn_object.labels_
754
+ labels : npt.NDArray[Any] = self._sklearn_object.labels_
755
+ return labels
761
756
  else:
762
757
  raise NotImplementedError
763
758
 
@@ -793,6 +788,7 @@ class XGBRFRegressor(BaseTransformer):
793
788
  output_cols = []
794
789
 
795
790
  # Make sure column names are valid snowflake identifiers.
791
+ assert output_cols is not None # Make MyPy happy
796
792
  rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
797
793
 
798
794
  return rv
@@ -803,11 +799,6 @@ class XGBRFRegressor(BaseTransformer):
803
799
  subproject=_SUBPROJECT,
804
800
  custom_tags=dict([("autogen", True)]),
805
801
  )
806
- @telemetry.add_stmt_params_to_df(
807
- project=_PROJECT,
808
- subproject=_SUBPROJECT,
809
- custom_tags=dict([("autogen", True)]),
810
- )
811
802
  def predict_proba(
812
803
  self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_proba_"
813
804
  ) -> Union[DataFrame, pd.DataFrame]:
@@ -848,11 +839,6 @@ class XGBRFRegressor(BaseTransformer):
848
839
  subproject=_SUBPROJECT,
849
840
  custom_tags=dict([("autogen", True)]),
850
841
  )
851
- @telemetry.add_stmt_params_to_df(
852
- project=_PROJECT,
853
- subproject=_SUBPROJECT,
854
- custom_tags=dict([("autogen", True)]),
855
- )
856
842
  def predict_log_proba(
857
843
  self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_log_proba_"
858
844
  ) -> Union[DataFrame, pd.DataFrame]:
@@ -889,16 +875,6 @@ class XGBRFRegressor(BaseTransformer):
889
875
  return output_df
890
876
 
891
877
  @available_if(original_estimator_has_callable("decision_function")) # type: ignore[misc]
892
- @telemetry.send_api_usage_telemetry(
893
- project=_PROJECT,
894
- subproject=_SUBPROJECT,
895
- custom_tags=dict([("autogen", True)]),
896
- )
897
- @telemetry.add_stmt_params_to_df(
898
- project=_PROJECT,
899
- subproject=_SUBPROJECT,
900
- custom_tags=dict([("autogen", True)]),
901
- )
902
878
  def decision_function(
903
879
  self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "decision_function_"
904
880
  ) -> Union[DataFrame, pd.DataFrame]:
@@ -999,11 +975,6 @@ class XGBRFRegressor(BaseTransformer):
999
975
  subproject=_SUBPROJECT,
1000
976
  custom_tags=dict([("autogen", True)]),
1001
977
  )
1002
- @telemetry.add_stmt_params_to_df(
1003
- project=_PROJECT,
1004
- subproject=_SUBPROJECT,
1005
- custom_tags=dict([("autogen", True)]),
1006
- )
1007
978
  def kneighbors(
1008
979
  self,
1009
980
  dataset: Union[DataFrame, pd.DataFrame],
@@ -1063,9 +1034,9 @@ class XGBRFRegressor(BaseTransformer):
1063
1034
  # For classifier, the type of predict is the same as the type of label
1064
1035
  if self._sklearn_object._estimator_type == 'classifier':
1065
1036
  # label columns is the desired type for output
1066
- outputs = _infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True)
1037
+ outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1067
1038
  # rename the output columns
1068
- outputs = model_signature_utils.rename_features(outputs, self.output_cols)
1039
+ outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1069
1040
  self._model_signature_dict["predict"] = ModelSignature(inputs,
1070
1041
  ([] if self._drop_input_cols else inputs)
1071
1042
  + outputs)
@@ -24,6 +24,7 @@ from snowflake.ml._internal.utils import (
24
24
  formatting,
25
25
  identifier,
26
26
  query_result_checker,
27
+ spcs_attribution_utils,
27
28
  table_manager,
28
29
  uri,
29
30
  )
@@ -1767,6 +1768,7 @@ class ModelRegistry:
1767
1768
  service_name = identifier.get_schema_level_object_identifier(
1768
1769
  self._name, self._schema, f"service_{deployment['MODEL_ID']}"
1769
1770
  )
1771
+ spcs_attribution_utils.record_service_end(self._session, service_name)
1770
1772
  query_result_checker.SqlResultValidator(
1771
1773
  self._session,
1772
1774
  f"DROP SERVICE IF EXISTS {service_name}",
@@ -0,0 +1,215 @@
1
+ from types import ModuleType
2
+ from typing import Dict, List, Optional
3
+
4
+ from snowflake.ml._internal import telemetry
5
+ from snowflake.ml._internal.utils import sql_identifier
6
+ from snowflake.ml.model import model_signature, type_hints as model_types
7
+ from snowflake.ml.model._client.model import model_impl, model_version_impl
8
+ from snowflake.ml.model._client.ops import model_ops
9
+ from snowflake.ml.model._model_composer import model_composer
10
+ from snowflake.snowpark import session
11
+
12
+ _TELEMETRY_PROJECT = "MLOps"
13
+ _MODEL_TELEMETRY_SUBPROJECT = "ModelManagement"
14
+
15
+
16
+ class Registry:
17
+ def __init__(
18
+ self,
19
+ session: session.Session,
20
+ *,
21
+ database_name: Optional[str] = None,
22
+ schema_name: Optional[str] = None,
23
+ ) -> None:
24
+ if database_name:
25
+ self._database_name = sql_identifier.SqlIdentifier(database_name)
26
+ else:
27
+ session_db = session.get_current_database()
28
+ if session_db:
29
+ self._database_name = sql_identifier.SqlIdentifier(session_db)
30
+ else:
31
+ raise ValueError("You need to provide a database to use registry.")
32
+
33
+ if schema_name:
34
+ self._schema_name = sql_identifier.SqlIdentifier(schema_name)
35
+ elif database_name:
36
+ self._schema_name = sql_identifier.SqlIdentifier("PUBLIC")
37
+ else:
38
+ session_schema = session.get_current_schema()
39
+ self._schema_name = (
40
+ sql_identifier.SqlIdentifier(session_schema)
41
+ if session_schema
42
+ else sql_identifier.SqlIdentifier("PUBLIC")
43
+ )
44
+
45
+ self._model_ops = model_ops.ModelOperator(
46
+ session, database_name=self._database_name, schema_name=self._schema_name
47
+ )
48
+
49
+ @property
50
+ def location(self) -> str:
51
+ return ".".join([self._database_name.identifier(), self._schema_name.identifier()])
52
+
53
+ @telemetry.send_api_usage_telemetry(
54
+ project=_TELEMETRY_PROJECT,
55
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
56
+ )
57
+ def log_model(
58
+ self,
59
+ model: model_types.SupportedModelType,
60
+ *,
61
+ model_name: str,
62
+ version_name: str,
63
+ conda_dependencies: Optional[List[str]] = None,
64
+ pip_requirements: Optional[List[str]] = None,
65
+ python_version: Optional[str] = None,
66
+ signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
67
+ sample_input_data: Optional[model_types.SupportedDataType] = None,
68
+ code_paths: Optional[List[str]] = None,
69
+ ext_modules: Optional[List[ModuleType]] = None,
70
+ options: Optional[model_types.ModelSaveOption] = None,
71
+ ) -> model_version_impl.ModelVersion:
72
+ """Log a model.
73
+
74
+ Args:
75
+ model: Model Python object
76
+ model_name: A string as name.
77
+ version_name: A string as version. model_name and version_name combination must be unique.
78
+ signatures: Model data signatures for inputs and output for every target methods. If it is None,
79
+ sample_input_data would be used to infer the signatures for those models that cannot automatically
80
+ infer the signature. If not None, sample_input should not be specified. Defaults to None.
81
+ sample_input_data: Sample input data to infer the model signatures from. If it is None, signatures must be
82
+ specified if the model cannot automatically infer the signature. If not None, signatures should not be
83
+ specified. Defaults to None.
84
+ conda_dependencies: List of Conda package specs. Use "[channel::]package [operator version]" syntax to
85
+ specify a dependency. It is a recommended way to specify your dependencies using conda. When channel is
86
+ not specified, Snowflake Anaconda Channel will be used.
87
+ pip_requirements: List of Pip package specs.
88
+ python_version: A string of python version where model is run. Used for user override. If specified as None,
89
+ current version would be captured. Defaults to None.
90
+ code_paths: Directory of code to import.
91
+ ext_modules: External modules that user might want to get pickled with model object. Defaults to None.
92
+ options: Model specific kwargs.
93
+
94
+ Returns:
95
+ A ModelVersion object corresponding to the model just get logged.
96
+ """
97
+
98
+ statement_params = telemetry.get_statement_params(
99
+ project=_TELEMETRY_PROJECT,
100
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
101
+ )
102
+ model_name_id = sql_identifier.SqlIdentifier(model_name)
103
+
104
+ version_name_id = sql_identifier.SqlIdentifier(version_name)
105
+
106
+ stage_path = self._model_ops.prepare_model_stage_path(
107
+ statement_params=statement_params,
108
+ )
109
+
110
+ mc = model_composer.ModelComposer(self._model_ops._session, stage_path=stage_path)
111
+ mc.save(
112
+ name=model_name_id.resolved(),
113
+ model=model,
114
+ signatures=signatures,
115
+ sample_input=sample_input_data,
116
+ conda_dependencies=conda_dependencies,
117
+ pip_requirements=pip_requirements,
118
+ python_version=python_version,
119
+ code_paths=code_paths,
120
+ ext_modules=ext_modules,
121
+ options=options,
122
+ )
123
+ self._model_ops.create_from_stage(
124
+ composed_model=mc,
125
+ model_name=model_name_id,
126
+ version_name=version_name_id,
127
+ statement_params=statement_params,
128
+ )
129
+
130
+ return model_version_impl.ModelVersion._ref(
131
+ self._model_ops,
132
+ model_name=model_name_id,
133
+ version_name=version_name_id,
134
+ )
135
+
136
+ @telemetry.send_api_usage_telemetry(
137
+ project=_TELEMETRY_PROJECT,
138
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
139
+ )
140
+ def get_model(self, model_name: str) -> model_impl.Model:
141
+ """Get the model object.
142
+
143
+ Args:
144
+ model_name: The model name.
145
+
146
+ Raises:
147
+ ValueError: Raised when the model requested does not exist.
148
+
149
+ Returns:
150
+ The model object.
151
+ """
152
+ model_name_id = sql_identifier.SqlIdentifier(model_name)
153
+
154
+ statement_params = telemetry.get_statement_params(
155
+ project=_TELEMETRY_PROJECT,
156
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
157
+ )
158
+ if self._model_ops.validate_existence(
159
+ model_name=model_name_id,
160
+ statement_params=statement_params,
161
+ ):
162
+ return model_impl.Model._ref(
163
+ self._model_ops,
164
+ model_name=model_name_id,
165
+ )
166
+ else:
167
+ raise ValueError(f"Unable to find model {model_name}")
168
+
169
+ @telemetry.send_api_usage_telemetry(
170
+ project=_TELEMETRY_PROJECT,
171
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
172
+ )
173
+ def list_models(self) -> List[model_impl.Model]:
174
+ """List all models in the schema where the registry is opened.
175
+
176
+ Returns:
177
+ A List of Model= object representing all models in the schema where the registry is opened.
178
+ """
179
+ statement_params = telemetry.get_statement_params(
180
+ project=_TELEMETRY_PROJECT,
181
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
182
+ )
183
+ model_names = self._model_ops.list_models_or_versions(
184
+ statement_params=statement_params,
185
+ )
186
+ return [
187
+ model_impl.Model._ref(
188
+ self._model_ops,
189
+ model_name=model_name,
190
+ )
191
+ for model_name in model_names
192
+ ]
193
+
194
+ @telemetry.send_api_usage_telemetry(
195
+ project=_TELEMETRY_PROJECT,
196
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
197
+ )
198
+ def delete_model(self, model_name: str) -> None:
199
+ """Delete the model.
200
+
201
+ Args:
202
+ model_name: The model name, can be fully qualified one.
203
+ If not, use database name and schema name of the registry.
204
+ """
205
+ model_name_id = sql_identifier.SqlIdentifier(model_name)
206
+
207
+ statement_params = telemetry.get_statement_params(
208
+ project=_TELEMETRY_PROJECT,
209
+ subproject=_MODEL_TELEMETRY_SUBPROJECT,
210
+ )
211
+
212
+ self._model_ops.delete_model_or_version(
213
+ model_name=model_name_id,
214
+ statement_params=statement_params,
215
+ )
snowflake/ml/version.py CHANGED
@@ -1 +1 @@
1
- VERSION="1.1.1"
1
+ VERSION="1.1.2"
@@ -38,8 +38,10 @@ Requires-Dist: importlib_resources>=5.1.4, <6
38
38
  Requires-Dist: numpy>=1.23,<2
39
39
  Requires-Dist: packaging>=20.9,<24
40
40
  Requires-Dist: pandas>=1.0.0,<2
41
+ Requires-Dist: pyarrow
41
42
  Requires-Dist: pytimeparse>=1.1.8,<2
42
43
  Requires-Dist: pyyaml>=6.0,<7
44
+ Requires-Dist: retrying>=1.3.3,<2
43
45
  Requires-Dist: s3fs>=2022.11,<2024
44
46
  Requires-Dist: scikit-learn>=1.2.1,<1.4
45
47
  Requires-Dist: scipy>=1.9,<2
@@ -74,7 +76,7 @@ Provides-Extra: transformers
74
76
  Requires-Dist: sentencepiece>=0.1.95,<0.2; extra == 'transformers'
75
77
  Requires-Dist: tokenizers>=0.10,<1; extra == 'transformers'
76
78
  Requires-Dist: transformers>=4.32.1,<5; extra == 'transformers'
77
- Version: 1.1.1
79
+ Version: 1.1.2
78
80
 
79
81
  # Snowpark ML
80
82
 
@@ -163,6 +165,24 @@ Note that until a `snowflake-ml-python` package version is available in the offi
163
165
  be compatibility issues. Server-side functionality that `snowflake-ml-python` depends on may not yet be released.
164
166
  # Release History
165
167
 
168
+ ## 1.1.2
169
+
170
+ ### Bug Fixes
171
+
172
+ - Generic: Fix the issue that stack trace is hidden by telemetry unexpectedly.
173
+ - Model Development: Execute model signature inference without materializing full dataframe in memory.
174
+ - Model Registry: Fix occasional 'snowflake-ml-python library does not exist' error when deploying to SPCS.
175
+
176
+ ### Behavior Changes
177
+
178
+ - Model Registry: When calling `predict` with Snowpark DataFrame, both inferred or normalized column names are accepted.
179
+ - Model Registry: When logging a Snowpark ML Modeling Model, sample input data or manually provided signature will be
180
+ ignored since they are not necessary.
181
+
182
+ ### New Features
183
+
184
+ - Model Development: SQL implementation of binary `precision_score` metric.
185
+
166
186
  ## 1.1.1
167
187
 
168
188
  ### Bug Fixes
@@ -170,8 +190,6 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
170
190
  - Model Registry: The `predict` target method on registered models is now compatible with unsupervised estimators.
171
191
  - Model Development: Fix confusion_matrix incorrect results when the row number cannot be divided by the batch size.
172
192
 
173
- ### Behavior Changes
174
-
175
193
  ### New Features
176
194
 
177
195
  - Introduced passthrough_col param in Modeling API. This new param is helpful in scenarios