snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class NuSVR(BaseTransformer):
|
71
64
|
r"""Nu Support Vector Regression
|
72
65
|
For more details on this class, see [sklearn.svm.NuSVR]
|
@@ -260,12 +253,7 @@ class NuSVR(BaseTransformer):
|
|
260
253
|
)
|
261
254
|
return selected_cols
|
262
255
|
|
263
|
-
|
264
|
-
project=_PROJECT,
|
265
|
-
subproject=_SUBPROJECT,
|
266
|
-
custom_tags=dict([("autogen", True)]),
|
267
|
-
)
|
268
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "NuSVR":
|
256
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "NuSVR":
|
269
257
|
"""Fit the SVM model according to the given training data
|
270
258
|
For more details on this function, see [sklearn.svm.NuSVR.fit]
|
271
259
|
(https://scikit-learn.org/stable/modules/generated/sklearn.svm.NuSVR.html#sklearn.svm.NuSVR.fit)
|
@@ -292,12 +280,14 @@ class NuSVR(BaseTransformer):
|
|
292
280
|
|
293
281
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
294
282
|
|
295
|
-
|
283
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
296
284
|
if SNOWML_SPROC_ENV in os.environ:
|
297
285
|
statement_params = telemetry.get_function_usage_statement_params(
|
298
286
|
project=_PROJECT,
|
299
287
|
subproject=_SUBPROJECT,
|
300
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
288
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
289
|
+
inspect.currentframe(), NuSVR.__class__.__name__
|
290
|
+
),
|
301
291
|
api_calls=[Session.call],
|
302
292
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
303
293
|
)
|
@@ -318,27 +308,24 @@ class NuSVR(BaseTransformer):
|
|
318
308
|
)
|
319
309
|
self._sklearn_object = model_trainer.train()
|
320
310
|
self._is_fitted = True
|
321
|
-
self.
|
311
|
+
self._generate_model_signatures(dataset)
|
322
312
|
return self
|
323
313
|
|
324
314
|
def _batch_inference_validate_snowpark(
|
325
315
|
self,
|
326
316
|
dataset: DataFrame,
|
327
317
|
inference_method: str,
|
328
|
-
) ->
|
329
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
330
|
-
return the available package that exists in the snowflake anaconda channel
|
318
|
+
) -> None:
|
319
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
331
320
|
|
332
321
|
Args:
|
333
322
|
dataset: snowpark dataframe
|
334
323
|
inference_method: the inference method such as predict, score...
|
335
|
-
|
324
|
+
|
336
325
|
Raises:
|
337
326
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
338
327
|
SnowflakeMLException: If the session is None, raise error
|
339
328
|
|
340
|
-
Returns:
|
341
|
-
A list of available package that exists in the snowflake anaconda channel
|
342
329
|
"""
|
343
330
|
if not self._is_fitted:
|
344
331
|
raise exceptions.SnowflakeMLException(
|
@@ -356,9 +343,7 @@ class NuSVR(BaseTransformer):
|
|
356
343
|
"Session must not specified for snowpark dataset."
|
357
344
|
),
|
358
345
|
)
|
359
|
-
|
360
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
361
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
346
|
+
|
362
347
|
|
363
348
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
364
349
|
@telemetry.send_api_usage_telemetry(
|
@@ -394,7 +379,9 @@ class NuSVR(BaseTransformer):
|
|
394
379
|
# when it is classifier, infer the datatype from label columns
|
395
380
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
396
381
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
397
|
-
label_cols_signatures = [
|
382
|
+
label_cols_signatures = [
|
383
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
384
|
+
]
|
398
385
|
if len(label_cols_signatures) == 0:
|
399
386
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
400
387
|
raise exceptions.SnowflakeMLException(
|
@@ -402,25 +389,23 @@ class NuSVR(BaseTransformer):
|
|
402
389
|
original_exception=ValueError(error_str),
|
403
390
|
)
|
404
391
|
|
405
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
406
|
-
label_cols_signatures[0].as_snowpark_type()
|
407
|
-
)
|
392
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
408
393
|
|
409
|
-
self.
|
410
|
-
|
394
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
395
|
+
self._deps = self._get_dependencies()
|
396
|
+
assert isinstance(
|
397
|
+
dataset._session, Session
|
398
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
411
399
|
|
412
400
|
transform_kwargs = dict(
|
413
|
-
session
|
414
|
-
dependencies
|
415
|
-
drop_input_cols
|
416
|
-
expected_output_cols_type
|
401
|
+
session=dataset._session,
|
402
|
+
dependencies=self._deps,
|
403
|
+
drop_input_cols=self._drop_input_cols,
|
404
|
+
expected_output_cols_type=expected_type_inferred,
|
417
405
|
)
|
418
406
|
|
419
407
|
elif isinstance(dataset, pd.DataFrame):
|
420
|
-
transform_kwargs = dict(
|
421
|
-
snowpark_input_cols = self._snowpark_cols,
|
422
|
-
drop_input_cols = self._drop_input_cols
|
423
|
-
)
|
408
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
424
409
|
|
425
410
|
transform_handlers = ModelTransformerBuilder.build(
|
426
411
|
dataset=dataset,
|
@@ -460,7 +445,7 @@ class NuSVR(BaseTransformer):
|
|
460
445
|
Transformed dataset.
|
461
446
|
"""
|
462
447
|
super()._check_dataset_type(dataset)
|
463
|
-
inference_method="transform"
|
448
|
+
inference_method = "transform"
|
464
449
|
|
465
450
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
466
451
|
# are specific to the type of dataset used.
|
@@ -490,24 +475,19 @@ class NuSVR(BaseTransformer):
|
|
490
475
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
491
476
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
492
477
|
|
493
|
-
self.
|
494
|
-
|
495
|
-
inference_method=inference_method,
|
496
|
-
)
|
478
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
479
|
+
self._deps = self._get_dependencies()
|
497
480
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
498
481
|
|
499
482
|
transform_kwargs = dict(
|
500
|
-
session
|
501
|
-
dependencies
|
502
|
-
drop_input_cols
|
503
|
-
expected_output_cols_type
|
483
|
+
session=dataset._session,
|
484
|
+
dependencies=self._deps,
|
485
|
+
drop_input_cols=self._drop_input_cols,
|
486
|
+
expected_output_cols_type=expected_dtype,
|
504
487
|
)
|
505
488
|
|
506
489
|
elif isinstance(dataset, pd.DataFrame):
|
507
|
-
transform_kwargs = dict(
|
508
|
-
snowpark_input_cols = self._snowpark_cols,
|
509
|
-
drop_input_cols = self._drop_input_cols
|
510
|
-
)
|
490
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
511
491
|
|
512
492
|
transform_handlers = ModelTransformerBuilder.build(
|
513
493
|
dataset=dataset,
|
@@ -526,7 +506,11 @@ class NuSVR(BaseTransformer):
|
|
526
506
|
return output_df
|
527
507
|
|
528
508
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
529
|
-
def fit_predict(
|
509
|
+
def fit_predict(
|
510
|
+
self,
|
511
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
512
|
+
output_cols_prefix: str = "fit_predict_",
|
513
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
530
514
|
""" Method not supported for this class.
|
531
515
|
|
532
516
|
|
@@ -551,22 +535,104 @@ class NuSVR(BaseTransformer):
|
|
551
535
|
)
|
552
536
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
553
537
|
drop_input_cols=self._drop_input_cols,
|
554
|
-
expected_output_cols_list=
|
538
|
+
expected_output_cols_list=(
|
539
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
540
|
+
),
|
555
541
|
)
|
556
542
|
self._sklearn_object = fitted_estimator
|
557
543
|
self._is_fitted = True
|
558
544
|
return output_result
|
559
545
|
|
546
|
+
|
547
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
548
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
549
|
+
""" Method not supported for this class.
|
550
|
+
|
560
551
|
|
561
|
-
|
562
|
-
|
563
|
-
|
552
|
+
Raises:
|
553
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
554
|
+
|
555
|
+
Args:
|
556
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
557
|
+
Snowpark or Pandas DataFrame.
|
558
|
+
output_cols_prefix: Prefix for the response columns
|
564
559
|
Returns:
|
565
560
|
Transformed dataset.
|
566
561
|
"""
|
567
|
-
self.
|
568
|
-
|
569
|
-
|
562
|
+
self._infer_input_output_cols(dataset)
|
563
|
+
super()._check_dataset_type(dataset)
|
564
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
565
|
+
estimator=self._sklearn_object,
|
566
|
+
dataset=dataset,
|
567
|
+
input_cols=self.input_cols,
|
568
|
+
label_cols=self.label_cols,
|
569
|
+
sample_weight_col=self.sample_weight_col,
|
570
|
+
autogenerated=self._autogenerated,
|
571
|
+
subproject=_SUBPROJECT,
|
572
|
+
)
|
573
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
574
|
+
drop_input_cols=self._drop_input_cols,
|
575
|
+
expected_output_cols_list=self.output_cols,
|
576
|
+
)
|
577
|
+
self._sklearn_object = fitted_estimator
|
578
|
+
self._is_fitted = True
|
579
|
+
return output_result
|
580
|
+
|
581
|
+
|
582
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
583
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
584
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
585
|
+
"""
|
586
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
587
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
588
|
+
if output_cols:
|
589
|
+
output_cols = [
|
590
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
591
|
+
for c in output_cols
|
592
|
+
]
|
593
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
594
|
+
output_cols = [output_cols_prefix]
|
595
|
+
elif self._sklearn_object is not None:
|
596
|
+
classes = self._sklearn_object.classes_
|
597
|
+
if isinstance(classes, numpy.ndarray):
|
598
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
599
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
600
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
601
|
+
output_cols = []
|
602
|
+
for i, cl in enumerate(classes):
|
603
|
+
# For binary classification, there is only one output column for each class
|
604
|
+
# ndarray as the two classes are complementary.
|
605
|
+
if len(cl) == 2:
|
606
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
607
|
+
else:
|
608
|
+
output_cols.extend([
|
609
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
610
|
+
])
|
611
|
+
else:
|
612
|
+
output_cols = []
|
613
|
+
|
614
|
+
# Make sure column names are valid snowflake identifiers.
|
615
|
+
assert output_cols is not None # Make MyPy happy
|
616
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
617
|
+
|
618
|
+
return rv
|
619
|
+
|
620
|
+
def _align_expected_output_names(
|
621
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
622
|
+
) -> List[str]:
|
623
|
+
# in case the inferred output column names dimension is different
|
624
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
625
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
626
|
+
output_df_columns = list(output_df_pd.columns)
|
627
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
628
|
+
if self.sample_weight_col:
|
629
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
630
|
+
# if the dimension of inferred output column names is correct; use it
|
631
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
632
|
+
return expected_output_cols_list
|
633
|
+
# otherwise, use the sklearn estimator's output
|
634
|
+
else:
|
635
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
570
636
|
|
571
637
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
572
638
|
@telemetry.send_api_usage_telemetry(
|
@@ -598,24 +664,26 @@ class NuSVR(BaseTransformer):
|
|
598
664
|
# are specific to the type of dataset used.
|
599
665
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
600
666
|
|
667
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
668
|
+
|
601
669
|
if isinstance(dataset, DataFrame):
|
602
|
-
self.
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
670
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
671
|
+
self._deps = self._get_dependencies()
|
672
|
+
assert isinstance(
|
673
|
+
dataset._session, Session
|
674
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
607
675
|
transform_kwargs = dict(
|
608
676
|
session=dataset._session,
|
609
677
|
dependencies=self._deps,
|
610
|
-
drop_input_cols
|
678
|
+
drop_input_cols=self._drop_input_cols,
|
611
679
|
expected_output_cols_type="float",
|
612
680
|
)
|
681
|
+
expected_output_cols = self._align_expected_output_names(
|
682
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
683
|
+
)
|
613
684
|
|
614
685
|
elif isinstance(dataset, pd.DataFrame):
|
615
|
-
transform_kwargs = dict(
|
616
|
-
snowpark_input_cols = self._snowpark_cols,
|
617
|
-
drop_input_cols = self._drop_input_cols
|
618
|
-
)
|
686
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
619
687
|
|
620
688
|
transform_handlers = ModelTransformerBuilder.build(
|
621
689
|
dataset=dataset,
|
@@ -627,7 +695,7 @@ class NuSVR(BaseTransformer):
|
|
627
695
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
628
696
|
inference_method=inference_method,
|
629
697
|
input_cols=self.input_cols,
|
630
|
-
expected_output_cols=
|
698
|
+
expected_output_cols=expected_output_cols,
|
631
699
|
**transform_kwargs
|
632
700
|
)
|
633
701
|
return output_df
|
@@ -657,29 +725,30 @@ class NuSVR(BaseTransformer):
|
|
657
725
|
Output dataset with log probability of the sample for each class in the model.
|
658
726
|
"""
|
659
727
|
super()._check_dataset_type(dataset)
|
660
|
-
inference_method="predict_log_proba"
|
728
|
+
inference_method = "predict_log_proba"
|
729
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
661
730
|
|
662
731
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
663
732
|
# are specific to the type of dataset used.
|
664
733
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
665
734
|
|
666
735
|
if isinstance(dataset, DataFrame):
|
667
|
-
self.
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
736
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
737
|
+
self._deps = self._get_dependencies()
|
738
|
+
assert isinstance(
|
739
|
+
dataset._session, Session
|
740
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
672
741
|
transform_kwargs = dict(
|
673
742
|
session=dataset._session,
|
674
743
|
dependencies=self._deps,
|
675
|
-
drop_input_cols
|
744
|
+
drop_input_cols=self._drop_input_cols,
|
676
745
|
expected_output_cols_type="float",
|
677
746
|
)
|
747
|
+
expected_output_cols = self._align_expected_output_names(
|
748
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
749
|
+
)
|
678
750
|
elif isinstance(dataset, pd.DataFrame):
|
679
|
-
transform_kwargs = dict(
|
680
|
-
snowpark_input_cols = self._snowpark_cols,
|
681
|
-
drop_input_cols = self._drop_input_cols
|
682
|
-
)
|
751
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
683
752
|
|
684
753
|
transform_handlers = ModelTransformerBuilder.build(
|
685
754
|
dataset=dataset,
|
@@ -692,7 +761,7 @@ class NuSVR(BaseTransformer):
|
|
692
761
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
693
762
|
inference_method=inference_method,
|
694
763
|
input_cols=self.input_cols,
|
695
|
-
expected_output_cols=
|
764
|
+
expected_output_cols=expected_output_cols,
|
696
765
|
**transform_kwargs
|
697
766
|
)
|
698
767
|
return output_df
|
@@ -718,30 +787,32 @@ class NuSVR(BaseTransformer):
|
|
718
787
|
Output dataset with results of the decision function for the samples in input dataset.
|
719
788
|
"""
|
720
789
|
super()._check_dataset_type(dataset)
|
721
|
-
inference_method="decision_function"
|
790
|
+
inference_method = "decision_function"
|
722
791
|
|
723
792
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
724
793
|
# are specific to the type of dataset used.
|
725
794
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
726
795
|
|
796
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
797
|
+
|
727
798
|
if isinstance(dataset, DataFrame):
|
728
|
-
self.
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
799
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
800
|
+
self._deps = self._get_dependencies()
|
801
|
+
assert isinstance(
|
802
|
+
dataset._session, Session
|
803
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
733
804
|
transform_kwargs = dict(
|
734
805
|
session=dataset._session,
|
735
806
|
dependencies=self._deps,
|
736
|
-
drop_input_cols
|
807
|
+
drop_input_cols=self._drop_input_cols,
|
737
808
|
expected_output_cols_type="float",
|
738
809
|
)
|
810
|
+
expected_output_cols = self._align_expected_output_names(
|
811
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
812
|
+
)
|
739
813
|
|
740
814
|
elif isinstance(dataset, pd.DataFrame):
|
741
|
-
transform_kwargs = dict(
|
742
|
-
snowpark_input_cols = self._snowpark_cols,
|
743
|
-
drop_input_cols = self._drop_input_cols
|
744
|
-
)
|
815
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
745
816
|
|
746
817
|
transform_handlers = ModelTransformerBuilder.build(
|
747
818
|
dataset=dataset,
|
@@ -754,7 +825,7 @@ class NuSVR(BaseTransformer):
|
|
754
825
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
755
826
|
inference_method=inference_method,
|
756
827
|
input_cols=self.input_cols,
|
757
|
-
expected_output_cols=
|
828
|
+
expected_output_cols=expected_output_cols,
|
758
829
|
**transform_kwargs
|
759
830
|
)
|
760
831
|
return output_df
|
@@ -783,17 +854,17 @@ class NuSVR(BaseTransformer):
|
|
783
854
|
Output dataset with probability of the sample for each class in the model.
|
784
855
|
"""
|
785
856
|
super()._check_dataset_type(dataset)
|
786
|
-
inference_method="score_samples"
|
857
|
+
inference_method = "score_samples"
|
787
858
|
|
788
859
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
789
860
|
# are specific to the type of dataset used.
|
790
861
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
791
862
|
|
863
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
864
|
+
|
792
865
|
if isinstance(dataset, DataFrame):
|
793
|
-
self.
|
794
|
-
|
795
|
-
inference_method=inference_method,
|
796
|
-
)
|
866
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
867
|
+
self._deps = self._get_dependencies()
|
797
868
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
798
869
|
transform_kwargs = dict(
|
799
870
|
session=dataset._session,
|
@@ -801,6 +872,9 @@ class NuSVR(BaseTransformer):
|
|
801
872
|
drop_input_cols = self._drop_input_cols,
|
802
873
|
expected_output_cols_type="float",
|
803
874
|
)
|
875
|
+
expected_output_cols = self._align_expected_output_names(
|
876
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
877
|
+
)
|
804
878
|
|
805
879
|
elif isinstance(dataset, pd.DataFrame):
|
806
880
|
transform_kwargs = dict(
|
@@ -819,7 +893,7 @@ class NuSVR(BaseTransformer):
|
|
819
893
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
820
894
|
inference_method=inference_method,
|
821
895
|
input_cols=self.input_cols,
|
822
|
-
expected_output_cols=
|
896
|
+
expected_output_cols=expected_output_cols,
|
823
897
|
**transform_kwargs
|
824
898
|
)
|
825
899
|
return output_df
|
@@ -854,17 +928,15 @@ class NuSVR(BaseTransformer):
|
|
854
928
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
855
929
|
|
856
930
|
if isinstance(dataset, DataFrame):
|
857
|
-
self.
|
858
|
-
|
859
|
-
inference_method="score",
|
860
|
-
)
|
931
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
932
|
+
self._deps = self._get_dependencies()
|
861
933
|
selected_cols = self._get_active_columns()
|
862
934
|
if len(selected_cols) > 0:
|
863
935
|
dataset = dataset.select(selected_cols)
|
864
936
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
865
937
|
transform_kwargs = dict(
|
866
938
|
session=dataset._session,
|
867
|
-
dependencies=
|
939
|
+
dependencies=self._deps,
|
868
940
|
score_sproc_imports=['sklearn'],
|
869
941
|
)
|
870
942
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -929,11 +1001,8 @@ class NuSVR(BaseTransformer):
|
|
929
1001
|
|
930
1002
|
if isinstance(dataset, DataFrame):
|
931
1003
|
|
932
|
-
self.
|
933
|
-
|
934
|
-
inference_method=inference_method,
|
935
|
-
|
936
|
-
)
|
1004
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1005
|
+
self._deps = self._get_dependencies()
|
937
1006
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
938
1007
|
transform_kwargs = dict(
|
939
1008
|
session = dataset._session,
|
@@ -966,50 +1035,84 @@ class NuSVR(BaseTransformer):
|
|
966
1035
|
)
|
967
1036
|
return output_df
|
968
1037
|
|
1038
|
+
|
1039
|
+
|
1040
|
+
def to_sklearn(self) -> Any:
|
1041
|
+
"""Get sklearn.svm.NuSVR object.
|
1042
|
+
"""
|
1043
|
+
if self._sklearn_object is None:
|
1044
|
+
self._sklearn_object = self._create_sklearn_object()
|
1045
|
+
return self._sklearn_object
|
1046
|
+
|
1047
|
+
def to_xgboost(self) -> Any:
|
1048
|
+
raise exceptions.SnowflakeMLException(
|
1049
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1050
|
+
original_exception=AttributeError(
|
1051
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1052
|
+
"to_xgboost()",
|
1053
|
+
"to_sklearn()"
|
1054
|
+
)
|
1055
|
+
),
|
1056
|
+
)
|
1057
|
+
|
1058
|
+
def to_lightgbm(self) -> Any:
|
1059
|
+
raise exceptions.SnowflakeMLException(
|
1060
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1061
|
+
original_exception=AttributeError(
|
1062
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1063
|
+
"to_lightgbm()",
|
1064
|
+
"to_sklearn()"
|
1065
|
+
)
|
1066
|
+
),
|
1067
|
+
)
|
1068
|
+
|
1069
|
+
def _get_dependencies(self) -> List[str]:
|
1070
|
+
return self._deps
|
1071
|
+
|
969
1072
|
|
970
|
-
def
|
1073
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
971
1074
|
self._model_signature_dict = dict()
|
972
1075
|
|
973
1076
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
974
1077
|
|
975
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1078
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
976
1079
|
outputs: List[BaseFeatureSpec] = []
|
977
1080
|
if hasattr(self, "predict"):
|
978
1081
|
# keep mypy happy
|
979
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1082
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
980
1083
|
# For classifier, the type of predict is the same as the type of label
|
981
|
-
if self._sklearn_object._estimator_type ==
|
982
|
-
|
1084
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1085
|
+
# label columns is the desired type for output
|
983
1086
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
984
1087
|
# rename the output columns
|
985
1088
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
986
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
987
|
-
|
988
|
-
|
1089
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1090
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1091
|
+
)
|
989
1092
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
990
1093
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
991
|
-
# Clusterer returns int64 cluster labels.
|
1094
|
+
# Clusterer returns int64 cluster labels.
|
992
1095
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
993
1096
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
994
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
995
|
-
|
996
|
-
|
997
|
-
|
1097
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1098
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1099
|
+
)
|
1100
|
+
|
998
1101
|
# For regressor, the type of predict is float64
|
999
|
-
elif self._sklearn_object._estimator_type ==
|
1102
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1000
1103
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1001
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1104
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1105
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1106
|
+
)
|
1107
|
+
|
1005
1108
|
for prob_func in PROB_FUNCTIONS:
|
1006
1109
|
if hasattr(self, prob_func):
|
1007
1110
|
output_cols_prefix: str = f"{prob_func}_"
|
1008
1111
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1009
1112
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1010
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1011
|
-
|
1012
|
-
|
1113
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1114
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1115
|
+
)
|
1013
1116
|
|
1014
1117
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1015
1118
|
items = list(self._model_signature_dict.items())
|
@@ -1022,10 +1125,10 @@ class NuSVR(BaseTransformer):
|
|
1022
1125
|
"""Returns model signature of current class.
|
1023
1126
|
|
1024
1127
|
Raises:
|
1025
|
-
|
1128
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1026
1129
|
|
1027
1130
|
Returns:
|
1028
|
-
Dict
|
1131
|
+
Dict with each method and its input output signature
|
1029
1132
|
"""
|
1030
1133
|
if self._model_signature_dict is None:
|
1031
1134
|
raise exceptions.SnowflakeMLException(
|
@@ -1033,35 +1136,3 @@ class NuSVR(BaseTransformer):
|
|
1033
1136
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1034
1137
|
)
|
1035
1138
|
return self._model_signature_dict
|
1036
|
-
|
1037
|
-
def to_sklearn(self) -> Any:
|
1038
|
-
"""Get sklearn.svm.NuSVR object.
|
1039
|
-
"""
|
1040
|
-
if self._sklearn_object is None:
|
1041
|
-
self._sklearn_object = self._create_sklearn_object()
|
1042
|
-
return self._sklearn_object
|
1043
|
-
|
1044
|
-
def to_xgboost(self) -> Any:
|
1045
|
-
raise exceptions.SnowflakeMLException(
|
1046
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1047
|
-
original_exception=AttributeError(
|
1048
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1049
|
-
"to_xgboost()",
|
1050
|
-
"to_sklearn()"
|
1051
|
-
)
|
1052
|
-
),
|
1053
|
-
)
|
1054
|
-
|
1055
|
-
def to_lightgbm(self) -> Any:
|
1056
|
-
raise exceptions.SnowflakeMLException(
|
1057
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1058
|
-
original_exception=AttributeError(
|
1059
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1060
|
-
"to_lightgbm()",
|
1061
|
-
"to_sklearn()"
|
1062
|
-
)
|
1063
|
-
),
|
1064
|
-
)
|
1065
|
-
|
1066
|
-
def _get_dependencies(self) -> List[str]:
|
1067
|
-
return self._deps
|