snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -3,19 +3,527 @@
3
3
  #
4
4
 
5
5
  import inspect
6
+ from typing import List, Optional, Union
6
7
 
8
+ import cloudpickle
9
+ import numpy as np
10
+ import numpy.typing as npt
11
+ import sklearn
12
+ from packaging import version
13
+ from sklearn import metrics
14
+
15
+ from snowflake import snowpark
7
16
  from snowflake.ml._internal import telemetry
8
- from snowflake.snowpark import DataFrame, functions as F
17
+ from snowflake.ml.modeling.metrics import metrics_utils
18
+ from snowflake.snowpark import functions as F
19
+ from snowflake.snowpark._internal import utils as snowpark_utils
9
20
 
10
21
  _PROJECT = "ModelDevelopment"
11
22
  _SUBPROJECT = "Metrics"
12
23
 
13
24
 
14
- @telemetry.send_api_usage_telemetry(
15
- project=_PROJECT,
16
- subproject=_SUBPROJECT,
17
- )
18
- def r2_score(*, df: DataFrame, y_true_col_name: str, y_pred_col_name: str) -> float:
25
+ @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
26
+ def d2_absolute_error_score(
27
+ *,
28
+ df: snowpark.DataFrame,
29
+ y_true_col_names: Union[str, List[str]],
30
+ y_pred_col_names: Union[str, List[str]],
31
+ sample_weight_col_name: Optional[str] = None,
32
+ multioutput: Union[str, npt.ArrayLike] = "uniform_average",
33
+ ) -> Union[float, npt.NDArray[np.float_]]:
34
+ """
35
+ :math:`D^2` regression score function, \
36
+ fraction of absolute error explained.
37
+
38
+ Best possible score is 1.0 and it can be negative (because the model can be
39
+ arbitrarily worse). A model that always uses the empirical median of `y_true`
40
+ as constant prediction, disregarding the input features,
41
+ gets a :math:`D^2` score of 0.0.
42
+
43
+ Args:
44
+ df: Input dataframe.
45
+ y_true_col_names: Column name(s) representing actual values.
46
+ y_pred_col_names: Column name(s) representing predicted values.
47
+ sample_weight_col_name: Column name representing sample weights.
48
+ multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
49
+ (n_outputs,), default='uniform_average'
50
+ Defines aggregating of multiple output values.
51
+ Array-like value defines weights used to average errors.
52
+ 'raw_values':
53
+ Returns a full set of errors in case of multioutput input.
54
+ 'uniform_average':
55
+ Errors of all outputs are averaged with uniform weight.
56
+
57
+ Returns:
58
+ score: float or ndarray of floats
59
+ The :math:`D^2` score with an absolute error deviance
60
+ or ndarray of scores if 'multioutput' is 'raw_values'.
61
+ """
62
+ metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
63
+
64
+ session = df._session
65
+ assert session is not None
66
+ sproc_name = f"d2_absolute_error_score_{snowpark_utils.generate_random_alphanumeric()}"
67
+ sklearn_release = version.parse(sklearn.__version__).release
68
+ statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT)
69
+ cols = metrics_utils.flatten_cols([y_true_col_names, y_pred_col_names, sample_weight_col_name])
70
+ query = df[cols].queries["queries"][-1]
71
+
72
+ @F.sproc( # type: ignore[misc]
73
+ session=session,
74
+ name=sproc_name,
75
+ replace=True,
76
+ packages=[
77
+ "cloudpickle",
78
+ f"scikit-learn=={sklearn_release[0]}.{sklearn_release[1]}.*",
79
+ "snowflake-snowpark-python",
80
+ ],
81
+ statement_params=statement_params,
82
+ )
83
+ def d2_absolute_error_score_sproc(session: snowpark.Session) -> bytes:
84
+ df = session.sql(query).to_pandas(statement_params=statement_params)
85
+ y_true = df[y_true_col_names]
86
+ y_pred = df[y_pred_col_names]
87
+ sample_weight = df[sample_weight_col_name] if sample_weight_col_name else None
88
+
89
+ score = metrics.d2_absolute_error_score(
90
+ y_true,
91
+ y_pred,
92
+ sample_weight=sample_weight,
93
+ multioutput=multioutput,
94
+ )
95
+
96
+ return cloudpickle.dumps(score) # type: ignore[no-any-return]
97
+
98
+ score: Union[float, npt.NDArray[np.float_]] = cloudpickle.loads(
99
+ session.call(sproc_name, statement_params=statement_params)
100
+ )
101
+ return score
102
+
103
+
104
+ @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
105
+ def d2_pinball_score(
106
+ *,
107
+ df: snowpark.DataFrame,
108
+ y_true_col_names: Union[str, List[str]],
109
+ y_pred_col_names: Union[str, List[str]],
110
+ sample_weight_col_name: Optional[str] = None,
111
+ alpha: float = 0.5,
112
+ multioutput: Union[str, npt.ArrayLike] = "uniform_average",
113
+ ) -> Union[float, npt.NDArray[np.float_]]:
114
+ """
115
+ :math:`D^2` regression score function, fraction of pinball loss explained.
116
+
117
+ Best possible score is 1.0 and it can be negative (because the model can be
118
+ arbitrarily worse). A model that always uses the empirical alpha-quantile of
119
+ `y_true` as constant prediction, disregarding the input features,
120
+ gets a :math:`D^2` score of 0.0.
121
+
122
+ Args:
123
+ df: Input dataframe.
124
+ y_true_col_names: Column name(s) representing actual values.
125
+ y_pred_col_names: Column name(s) representing predicted values.
126
+ sample_weight_col_name: Column name representing sample weights.
127
+ alpha: Slope of the pinball deviance. It determines the quantile level
128
+ alpha for which the pinball deviance and also D2 are optimal.
129
+ The default `alpha=0.5` is equivalent to `d2_absolute_error_score`.
130
+ multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
131
+ (n_outputs,), default='uniform_average'
132
+ Defines aggregating of multiple output values.
133
+ Array-like value defines weights used to average errors.
134
+ 'raw_values':
135
+ Returns a full set of errors in case of multioutput input.
136
+ 'uniform_average':
137
+ Scores of all outputs are averaged with uniform weight.
138
+
139
+ Returns:
140
+ score: float or ndarray of floats
141
+ The :math:`D^2` score with a pinball deviance
142
+ or ndarray of scores if `multioutput='raw_values'`.
143
+ """
144
+ metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
145
+
146
+ session = df._session
147
+ assert session is not None
148
+ sproc_name = f"d2_pinball_score_{snowpark_utils.generate_random_alphanumeric()}"
149
+ sklearn_release = version.parse(sklearn.__version__).release
150
+ statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT)
151
+ cols = metrics_utils.flatten_cols([y_true_col_names, y_pred_col_names, sample_weight_col_name])
152
+ query = df[cols].queries["queries"][-1]
153
+
154
+ @F.sproc( # type: ignore[misc]
155
+ session=session,
156
+ name=sproc_name,
157
+ replace=True,
158
+ packages=[
159
+ "cloudpickle",
160
+ f"scikit-learn=={sklearn_release[0]}.{sklearn_release[1]}.*",
161
+ "snowflake-snowpark-python",
162
+ ],
163
+ statement_params=statement_params,
164
+ )
165
+ def d2_pinball_score_sproc(session: snowpark.Session) -> bytes:
166
+ df = session.sql(query).to_pandas(statement_params=statement_params)
167
+ y_true = df[y_true_col_names]
168
+ y_pred = df[y_pred_col_names]
169
+ sample_weight = df[sample_weight_col_name] if sample_weight_col_name else None
170
+
171
+ score = metrics.d2_pinball_score(
172
+ y_true,
173
+ y_pred,
174
+ sample_weight=sample_weight,
175
+ alpha=alpha,
176
+ multioutput=multioutput,
177
+ )
178
+
179
+ return cloudpickle.dumps(score) # type: ignore[no-any-return]
180
+
181
+ score: Union[float, npt.NDArray[np.float_]] = cloudpickle.loads(
182
+ session.call(sproc_name, statement_params=statement_params)
183
+ )
184
+ return score
185
+
186
+
187
+ @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
188
+ def explained_variance_score(
189
+ *,
190
+ df: snowpark.DataFrame,
191
+ y_true_col_names: Union[str, List[str]],
192
+ y_pred_col_names: Union[str, List[str]],
193
+ sample_weight_col_name: Optional[str] = None,
194
+ multioutput: Union[str, npt.ArrayLike] = "uniform_average",
195
+ force_finite: bool = True,
196
+ ) -> Union[float, npt.NDArray[np.float_]]:
197
+ """
198
+ Explained variance regression score function.
199
+
200
+ Best possible score is 1.0, lower values are worse.
201
+
202
+ In the particular case when ``y_true`` is constant, the explained variance
203
+ score is not finite: it is either ``NaN`` (perfect predictions) or
204
+ ``-Inf`` (imperfect predictions). To prevent such non-finite numbers to
205
+ pollute higher-level experiments such as a grid search cross-validation,
206
+ by default these cases are replaced with 1.0 (perfect predictions) or 0.0
207
+ (imperfect predictions) respectively. If ``force_finite``
208
+ is set to ``False``, this score falls back on the original :math:`R^2`
209
+ definition.
210
+
211
+ Note:
212
+ The Explained Variance score is similar to the
213
+ :func:`R^2 score <r2_score>`, with the notable difference that it
214
+ does not account for systematic offsets in the prediction. Most often
215
+ the :func:`R^2 score <r2_score>` should be preferred.
216
+
217
+ Args:
218
+ df: Input dataframe.
219
+ y_true_col_names: Column name(s) representing actual values.
220
+ y_pred_col_names: Column name(s) representing predicted values.
221
+ sample_weight_col_name: Column name representing sample weights.
222
+ multioutput: {'raw_values', 'uniform_average', 'variance_weighted'} or \
223
+ array-like of shape (n_outputs,), default='uniform_average'
224
+ Defines aggregating of multiple output values.
225
+ Array-like value defines weights used to average errors.
226
+ 'raw_values':
227
+ Returns a full set of scores in case of multioutput input.
228
+ 'uniform_average':
229
+ Scores of all outputs are averaged with uniform weight.
230
+ 'variance_weighted':
231
+ Scores of all outputs are averaged, weighted by the variances
232
+ of each individual output.
233
+ force_finite: Flag indicating if ``NaN`` and ``-Inf`` scores resulting
234
+ from constant data should be replaced with real numbers (``1.0`` if
235
+ prediction is perfect, ``0.0`` otherwise). Default is ``True``, a
236
+ convenient setting for hyperparameters' search procedures (e.g. grid
237
+ search cross-validation).
238
+
239
+ Returns:
240
+ score: float or ndarray of floats
241
+ The explained variance or ndarray if 'multioutput' is 'raw_values'.
242
+ """
243
+ metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
244
+
245
+ session = df._session
246
+ assert session is not None
247
+ sproc_name = f"explained_variance_score_{snowpark_utils.generate_random_alphanumeric()}"
248
+ sklearn_release = version.parse(sklearn.__version__).release
249
+ statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT)
250
+ cols = metrics_utils.flatten_cols([y_true_col_names, y_pred_col_names, sample_weight_col_name])
251
+ query = df[cols].queries["queries"][-1]
252
+
253
+ @F.sproc( # type: ignore[misc]
254
+ session=session,
255
+ name=sproc_name,
256
+ replace=True,
257
+ packages=[
258
+ "cloudpickle",
259
+ f"scikit-learn=={sklearn_release[0]}.{sklearn_release[1]}.*",
260
+ "snowflake-snowpark-python",
261
+ ],
262
+ statement_params=statement_params,
263
+ )
264
+ def explained_variance_score_sproc(session: snowpark.Session) -> bytes:
265
+ df = session.sql(query).to_pandas(statement_params=statement_params)
266
+ y_true = df[y_true_col_names]
267
+ y_pred = df[y_pred_col_names]
268
+ sample_weight = df[sample_weight_col_name] if sample_weight_col_name else None
269
+
270
+ score = metrics.explained_variance_score(
271
+ y_true,
272
+ y_pred,
273
+ sample_weight=sample_weight,
274
+ multioutput=multioutput,
275
+ force_finite=force_finite,
276
+ )
277
+
278
+ return cloudpickle.dumps(score) # type: ignore[no-any-return]
279
+
280
+ score: Union[float, npt.NDArray[np.float_]] = cloudpickle.loads(
281
+ session.call(sproc_name, statement_params=statement_params)
282
+ )
283
+ return score
284
+
285
+
286
+ @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
287
+ def mean_absolute_error(
288
+ *,
289
+ df: snowpark.DataFrame,
290
+ y_true_col_names: Union[str, List[str]],
291
+ y_pred_col_names: Union[str, List[str]],
292
+ sample_weight_col_name: Optional[str] = None,
293
+ multioutput: Union[str, npt.ArrayLike] = "uniform_average",
294
+ ) -> Union[float, npt.NDArray[np.float_]]:
295
+ """
296
+ Mean absolute error regression loss.
297
+
298
+ Args:
299
+ df: Input dataframe.
300
+ y_true_col_names: Column name(s) representing actual values.
301
+ y_pred_col_names: Column name(s) representing predicted values.
302
+ sample_weight_col_name: Column name representing sample weights.
303
+ multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
304
+ (n_outputs,), default='uniform_average'
305
+ Defines aggregating of multiple output values.
306
+ Array-like value defines weights used to average errors.
307
+ 'raw_values':
308
+ Returns a full set of errors in case of multioutput input.
309
+ 'uniform_average':
310
+ Errors of all outputs are averaged with uniform weight.
311
+
312
+ Returns:
313
+ loss: float or ndarray of floats
314
+ If multioutput is 'raw_values', then mean absolute error is returned
315
+ for each output separately.
316
+ If multioutput is 'uniform_average' or an ndarray of weights, then the
317
+ weighted average of all output errors is returned.
318
+
319
+ MAE output is non-negative floating point. The best value is 0.0.
320
+ """
321
+ metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
322
+
323
+ session = df._session
324
+ assert session is not None
325
+ sproc_name = f"mean_absolute_error_{snowpark_utils.generate_random_alphanumeric()}"
326
+ sklearn_release = version.parse(sklearn.__version__).release
327
+ statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT)
328
+ cols = metrics_utils.flatten_cols([y_true_col_names, y_pred_col_names, sample_weight_col_name])
329
+ query = df[cols].queries["queries"][-1]
330
+
331
+ @F.sproc( # type: ignore[misc]
332
+ session=session,
333
+ name=sproc_name,
334
+ replace=True,
335
+ packages=[
336
+ "cloudpickle",
337
+ f"scikit-learn=={sklearn_release[0]}.{sklearn_release[1]}.*",
338
+ "snowflake-snowpark-python",
339
+ ],
340
+ statement_params=statement_params,
341
+ )
342
+ def mean_absolute_error_sproc(session: snowpark.Session) -> bytes:
343
+ df = session.sql(query).to_pandas(statement_params=statement_params)
344
+ y_true = df[y_true_col_names]
345
+ y_pred = df[y_pred_col_names]
346
+ sample_weight = df[sample_weight_col_name] if sample_weight_col_name else None
347
+
348
+ loss = metrics.mean_absolute_error(
349
+ y_true,
350
+ y_pred,
351
+ sample_weight=sample_weight,
352
+ multioutput=multioutput,
353
+ )
354
+
355
+ return cloudpickle.dumps(loss) # type: ignore[no-any-return]
356
+
357
+ loss: Union[float, npt.NDArray[np.float_]] = cloudpickle.loads(
358
+ session.call(sproc_name, statement_params=statement_params)
359
+ )
360
+ return loss
361
+
362
+
363
+ @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
364
+ def mean_absolute_percentage_error(
365
+ *,
366
+ df: snowpark.DataFrame,
367
+ y_true_col_names: Union[str, List[str]],
368
+ y_pred_col_names: Union[str, List[str]],
369
+ sample_weight_col_name: Optional[str] = None,
370
+ multioutput: Union[str, npt.ArrayLike] = "uniform_average",
371
+ ) -> Union[float, npt.NDArray[np.float_]]:
372
+ """
373
+ Mean absolute percentage error (MAPE) regression loss.
374
+
375
+ Note here that the output is not a percentage in the range [0, 100]
376
+ and a value of 100 does not mean 100% but 1e2. Furthermore, the output
377
+ can be arbitrarily high when `y_true` is small (which is specific to the
378
+ metric) or when `abs(y_true - y_pred)` is large (which is common for most
379
+ regression metrics).
380
+
381
+ Args:
382
+ df: Input dataframe.
383
+ y_true_col_names: Column name(s) representing actual values.
384
+ y_pred_col_names: Column name(s) representing predicted values.
385
+ sample_weight_col_name: Column name representing sample weights.
386
+ multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
387
+ (n_outputs,), default='uniform_average'
388
+ Defines aggregating of multiple output values.
389
+ Array-like value defines weights used to average errors.
390
+ 'raw_values':
391
+ Returns a full set of errors in case of multioutput input.
392
+ 'uniform_average':
393
+ Errors of all outputs are averaged with uniform weight.
394
+
395
+ Returns:
396
+ loss: float or ndarray of floats
397
+ If multioutput is 'raw_values', then mean absolute percentage error
398
+ is returned for each output separately.
399
+ If multioutput is 'uniform_average' or an ndarray of weights, then the
400
+ weighted average of all output errors is returned.
401
+
402
+ MAPE output is non-negative floating point. The best value is 0.0.
403
+ But note that bad predictions can lead to arbitrarily large
404
+ MAPE values, especially if some `y_true` values are very close to zero.
405
+ Note that we return a large value instead of `inf` when `y_true` is zero.
406
+ """
407
+ metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
408
+
409
+ session = df._session
410
+ assert session is not None
411
+ sproc_name = f"mean_absolute_percentage_error_{snowpark_utils.generate_random_alphanumeric()}"
412
+ sklearn_release = version.parse(sklearn.__version__).release
413
+ statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT)
414
+ cols = metrics_utils.flatten_cols([y_true_col_names, y_pred_col_names, sample_weight_col_name])
415
+ query = df[cols].queries["queries"][-1]
416
+
417
+ @F.sproc( # type: ignore[misc]
418
+ session=session,
419
+ name=sproc_name,
420
+ replace=True,
421
+ packages=[
422
+ "cloudpickle",
423
+ f"scikit-learn=={sklearn_release[0]}.{sklearn_release[1]}.*",
424
+ "snowflake-snowpark-python",
425
+ ],
426
+ statement_params=statement_params,
427
+ )
428
+ def mean_absolute_percentage_error_sproc(session: snowpark.Session) -> bytes:
429
+ df = session.sql(query).to_pandas(statement_params=statement_params)
430
+ y_true = df[y_true_col_names]
431
+ y_pred = df[y_pred_col_names]
432
+ sample_weight = df[sample_weight_col_name] if sample_weight_col_name else None
433
+
434
+ loss = metrics.mean_absolute_percentage_error(
435
+ y_true,
436
+ y_pred,
437
+ sample_weight=sample_weight,
438
+ multioutput=multioutput,
439
+ )
440
+
441
+ return cloudpickle.dumps(loss) # type: ignore[no-any-return]
442
+
443
+ loss: Union[float, npt.NDArray[np.float_]] = cloudpickle.loads(
444
+ session.call(sproc_name, statement_params=statement_params)
445
+ )
446
+ return loss
447
+
448
+
449
+ @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
450
+ def mean_squared_error(
451
+ *,
452
+ df: snowpark.DataFrame,
453
+ y_true_col_names: Union[str, List[str]],
454
+ y_pred_col_names: Union[str, List[str]],
455
+ sample_weight_col_name: Optional[str] = None,
456
+ multioutput: Union[str, npt.ArrayLike] = "uniform_average",
457
+ squared: bool = True,
458
+ ) -> Union[float, npt.NDArray[np.float_]]:
459
+ """
460
+ Mean squared error regression loss.
461
+
462
+ Args:
463
+ df: Input dataframe.
464
+ y_true_col_names: Column name(s) representing actual values.
465
+ y_pred_col_names: Column name(s) representing predicted values.
466
+ sample_weight_col_name: Column name representing sample weights.
467
+ multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
468
+ (n_outputs,), default='uniform_average'
469
+ Defines aggregating of multiple output values.
470
+ Array-like value defines weights used to average errors.
471
+ 'raw_values':
472
+ Returns a full set of errors in case of multioutput input.
473
+ 'uniform_average':
474
+ Errors of all outputs are averaged with uniform weight.
475
+ squared: If True returns MSE value, if False returns RMSE value.
476
+
477
+ Returns:
478
+ loss: float or ndarray of floats
479
+ A non-negative floating point value (the best value is 0.0), or an
480
+ array of floating point values, one for each individual target.
481
+ """
482
+ metrics_utils.check_label_columns(y_true_col_names, y_pred_col_names)
483
+
484
+ session = df._session
485
+ assert session is not None
486
+ sproc_name = f"mean_squared_error_{snowpark_utils.generate_random_alphanumeric()}"
487
+ sklearn_release = version.parse(sklearn.__version__).release
488
+ statement_params = telemetry.get_statement_params(_PROJECT, _SUBPROJECT)
489
+ cols = metrics_utils.flatten_cols([y_true_col_names, y_pred_col_names, sample_weight_col_name])
490
+ query = df[cols].queries["queries"][-1]
491
+
492
+ @F.sproc( # type: ignore[misc]
493
+ session=session,
494
+ name=sproc_name,
495
+ replace=True,
496
+ packages=[
497
+ "cloudpickle",
498
+ f"scikit-learn=={sklearn_release[0]}.{sklearn_release[1]}.*",
499
+ "snowflake-snowpark-python",
500
+ ],
501
+ statement_params=statement_params,
502
+ )
503
+ def mean_squared_error_sproc(session: snowpark.Session) -> bytes:
504
+ df = session.sql(query).to_pandas(statement_params=statement_params)
505
+ y_true = df[y_true_col_names]
506
+ y_pred = df[y_pred_col_names]
507
+ sample_weight = df[sample_weight_col_name] if sample_weight_col_name else None
508
+
509
+ loss = metrics.mean_squared_error(
510
+ y_true,
511
+ y_pred,
512
+ sample_weight=sample_weight,
513
+ multioutput=multioutput,
514
+ squared=squared,
515
+ )
516
+
517
+ return cloudpickle.dumps(loss) # type: ignore[no-any-return]
518
+
519
+ loss: Union[float, npt.NDArray[np.float_]] = cloudpickle.loads(
520
+ session.call(sproc_name, statement_params=statement_params)
521
+ )
522
+ return loss
523
+
524
+
525
+ @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
526
+ def r2_score(*, df: snowpark.DataFrame, y_true_col_name: str, y_pred_col_name: str) -> float:
19
527
  """:math:`R^2` (coefficient of determination) regression score function.
20
528
  Returns R squared metric on 2 columns in the dataframe.
21
529
 
@@ -27,9 +535,9 @@ def r2_score(*, df: DataFrame, y_true_col_name: str, y_pred_col_name: str) -> fl
27
535
  TODO(pdorairaj): Implement other params from sklearn - sample_weight, multi_output, force_finite.
28
536
 
29
537
  Args:
30
- df (DataFrame): Input dataframe.
31
- y_true_col_name (str): Column name representing actual values.
32
- y_pred_col_name (str): Column name representing predicted values.
538
+ df: Input dataframe.
539
+ y_true_col_name: Column name representing actual values.
540
+ y_pred_col_name: Column name representing predicted values.
33
541
 
34
542
  Returns:
35
543
  R squared metric.