snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class Ridge(BaseTransformer):
|
71
64
|
r"""Linear least squares with l2 regularization
|
72
65
|
For more details on this class, see [sklearn.linear_model.Ridge]
|
@@ -297,12 +290,7 @@ class Ridge(BaseTransformer):
|
|
297
290
|
)
|
298
291
|
return selected_cols
|
299
292
|
|
300
|
-
|
301
|
-
project=_PROJECT,
|
302
|
-
subproject=_SUBPROJECT,
|
303
|
-
custom_tags=dict([("autogen", True)]),
|
304
|
-
)
|
305
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Ridge":
|
293
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Ridge":
|
306
294
|
"""Fit Ridge regression model
|
307
295
|
For more details on this function, see [sklearn.linear_model.Ridge.fit]
|
308
296
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge.fit)
|
@@ -329,12 +317,14 @@ class Ridge(BaseTransformer):
|
|
329
317
|
|
330
318
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
331
319
|
|
332
|
-
|
320
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
333
321
|
if SNOWML_SPROC_ENV in os.environ:
|
334
322
|
statement_params = telemetry.get_function_usage_statement_params(
|
335
323
|
project=_PROJECT,
|
336
324
|
subproject=_SUBPROJECT,
|
337
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
325
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
326
|
+
inspect.currentframe(), Ridge.__class__.__name__
|
327
|
+
),
|
338
328
|
api_calls=[Session.call],
|
339
329
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
340
330
|
)
|
@@ -355,27 +345,24 @@ class Ridge(BaseTransformer):
|
|
355
345
|
)
|
356
346
|
self._sklearn_object = model_trainer.train()
|
357
347
|
self._is_fitted = True
|
358
|
-
self.
|
348
|
+
self._generate_model_signatures(dataset)
|
359
349
|
return self
|
360
350
|
|
361
351
|
def _batch_inference_validate_snowpark(
|
362
352
|
self,
|
363
353
|
dataset: DataFrame,
|
364
354
|
inference_method: str,
|
365
|
-
) ->
|
366
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
367
|
-
return the available package that exists in the snowflake anaconda channel
|
355
|
+
) -> None:
|
356
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
368
357
|
|
369
358
|
Args:
|
370
359
|
dataset: snowpark dataframe
|
371
360
|
inference_method: the inference method such as predict, score...
|
372
|
-
|
361
|
+
|
373
362
|
Raises:
|
374
363
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
375
364
|
SnowflakeMLException: If the session is None, raise error
|
376
365
|
|
377
|
-
Returns:
|
378
|
-
A list of available package that exists in the snowflake anaconda channel
|
379
366
|
"""
|
380
367
|
if not self._is_fitted:
|
381
368
|
raise exceptions.SnowflakeMLException(
|
@@ -393,9 +380,7 @@ class Ridge(BaseTransformer):
|
|
393
380
|
"Session must not specified for snowpark dataset."
|
394
381
|
),
|
395
382
|
)
|
396
|
-
|
397
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
398
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
383
|
+
|
399
384
|
|
400
385
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
401
386
|
@telemetry.send_api_usage_telemetry(
|
@@ -431,7 +416,9 @@ class Ridge(BaseTransformer):
|
|
431
416
|
# when it is classifier, infer the datatype from label columns
|
432
417
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
433
418
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
434
|
-
label_cols_signatures = [
|
419
|
+
label_cols_signatures = [
|
420
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
421
|
+
]
|
435
422
|
if len(label_cols_signatures) == 0:
|
436
423
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
437
424
|
raise exceptions.SnowflakeMLException(
|
@@ -439,25 +426,23 @@ class Ridge(BaseTransformer):
|
|
439
426
|
original_exception=ValueError(error_str),
|
440
427
|
)
|
441
428
|
|
442
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
443
|
-
label_cols_signatures[0].as_snowpark_type()
|
444
|
-
)
|
429
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
445
430
|
|
446
|
-
self.
|
447
|
-
|
431
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
432
|
+
self._deps = self._get_dependencies()
|
433
|
+
assert isinstance(
|
434
|
+
dataset._session, Session
|
435
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
448
436
|
|
449
437
|
transform_kwargs = dict(
|
450
|
-
session
|
451
|
-
dependencies
|
452
|
-
drop_input_cols
|
453
|
-
expected_output_cols_type
|
438
|
+
session=dataset._session,
|
439
|
+
dependencies=self._deps,
|
440
|
+
drop_input_cols=self._drop_input_cols,
|
441
|
+
expected_output_cols_type=expected_type_inferred,
|
454
442
|
)
|
455
443
|
|
456
444
|
elif isinstance(dataset, pd.DataFrame):
|
457
|
-
transform_kwargs = dict(
|
458
|
-
snowpark_input_cols = self._snowpark_cols,
|
459
|
-
drop_input_cols = self._drop_input_cols
|
460
|
-
)
|
445
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
461
446
|
|
462
447
|
transform_handlers = ModelTransformerBuilder.build(
|
463
448
|
dataset=dataset,
|
@@ -497,7 +482,7 @@ class Ridge(BaseTransformer):
|
|
497
482
|
Transformed dataset.
|
498
483
|
"""
|
499
484
|
super()._check_dataset_type(dataset)
|
500
|
-
inference_method="transform"
|
485
|
+
inference_method = "transform"
|
501
486
|
|
502
487
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
503
488
|
# are specific to the type of dataset used.
|
@@ -527,24 +512,19 @@ class Ridge(BaseTransformer):
|
|
527
512
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
528
513
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
529
514
|
|
530
|
-
self.
|
531
|
-
|
532
|
-
inference_method=inference_method,
|
533
|
-
)
|
515
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
516
|
+
self._deps = self._get_dependencies()
|
534
517
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
535
518
|
|
536
519
|
transform_kwargs = dict(
|
537
|
-
session
|
538
|
-
dependencies
|
539
|
-
drop_input_cols
|
540
|
-
expected_output_cols_type
|
520
|
+
session=dataset._session,
|
521
|
+
dependencies=self._deps,
|
522
|
+
drop_input_cols=self._drop_input_cols,
|
523
|
+
expected_output_cols_type=expected_dtype,
|
541
524
|
)
|
542
525
|
|
543
526
|
elif isinstance(dataset, pd.DataFrame):
|
544
|
-
transform_kwargs = dict(
|
545
|
-
snowpark_input_cols = self._snowpark_cols,
|
546
|
-
drop_input_cols = self._drop_input_cols
|
547
|
-
)
|
527
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
548
528
|
|
549
529
|
transform_handlers = ModelTransformerBuilder.build(
|
550
530
|
dataset=dataset,
|
@@ -563,7 +543,11 @@ class Ridge(BaseTransformer):
|
|
563
543
|
return output_df
|
564
544
|
|
565
545
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
566
|
-
def fit_predict(
|
546
|
+
def fit_predict(
|
547
|
+
self,
|
548
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
549
|
+
output_cols_prefix: str = "fit_predict_",
|
550
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
567
551
|
""" Method not supported for this class.
|
568
552
|
|
569
553
|
|
@@ -588,22 +572,104 @@ class Ridge(BaseTransformer):
|
|
588
572
|
)
|
589
573
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
590
574
|
drop_input_cols=self._drop_input_cols,
|
591
|
-
expected_output_cols_list=
|
575
|
+
expected_output_cols_list=(
|
576
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
577
|
+
),
|
592
578
|
)
|
593
579
|
self._sklearn_object = fitted_estimator
|
594
580
|
self._is_fitted = True
|
595
581
|
return output_result
|
596
582
|
|
583
|
+
|
584
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
585
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
586
|
+
""" Method not supported for this class.
|
587
|
+
|
597
588
|
|
598
|
-
|
599
|
-
|
600
|
-
|
589
|
+
Raises:
|
590
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
591
|
+
|
592
|
+
Args:
|
593
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
594
|
+
Snowpark or Pandas DataFrame.
|
595
|
+
output_cols_prefix: Prefix for the response columns
|
601
596
|
Returns:
|
602
597
|
Transformed dataset.
|
603
598
|
"""
|
604
|
-
self.
|
605
|
-
|
606
|
-
|
599
|
+
self._infer_input_output_cols(dataset)
|
600
|
+
super()._check_dataset_type(dataset)
|
601
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
602
|
+
estimator=self._sklearn_object,
|
603
|
+
dataset=dataset,
|
604
|
+
input_cols=self.input_cols,
|
605
|
+
label_cols=self.label_cols,
|
606
|
+
sample_weight_col=self.sample_weight_col,
|
607
|
+
autogenerated=self._autogenerated,
|
608
|
+
subproject=_SUBPROJECT,
|
609
|
+
)
|
610
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
611
|
+
drop_input_cols=self._drop_input_cols,
|
612
|
+
expected_output_cols_list=self.output_cols,
|
613
|
+
)
|
614
|
+
self._sklearn_object = fitted_estimator
|
615
|
+
self._is_fitted = True
|
616
|
+
return output_result
|
617
|
+
|
618
|
+
|
619
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
620
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
621
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
622
|
+
"""
|
623
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
624
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
625
|
+
if output_cols:
|
626
|
+
output_cols = [
|
627
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
628
|
+
for c in output_cols
|
629
|
+
]
|
630
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
631
|
+
output_cols = [output_cols_prefix]
|
632
|
+
elif self._sklearn_object is not None:
|
633
|
+
classes = self._sklearn_object.classes_
|
634
|
+
if isinstance(classes, numpy.ndarray):
|
635
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
636
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
637
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
638
|
+
output_cols = []
|
639
|
+
for i, cl in enumerate(classes):
|
640
|
+
# For binary classification, there is only one output column for each class
|
641
|
+
# ndarray as the two classes are complementary.
|
642
|
+
if len(cl) == 2:
|
643
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
644
|
+
else:
|
645
|
+
output_cols.extend([
|
646
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
647
|
+
])
|
648
|
+
else:
|
649
|
+
output_cols = []
|
650
|
+
|
651
|
+
# Make sure column names are valid snowflake identifiers.
|
652
|
+
assert output_cols is not None # Make MyPy happy
|
653
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
654
|
+
|
655
|
+
return rv
|
656
|
+
|
657
|
+
def _align_expected_output_names(
|
658
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
659
|
+
) -> List[str]:
|
660
|
+
# in case the inferred output column names dimension is different
|
661
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
662
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
663
|
+
output_df_columns = list(output_df_pd.columns)
|
664
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
665
|
+
if self.sample_weight_col:
|
666
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
667
|
+
# if the dimension of inferred output column names is correct; use it
|
668
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
669
|
+
return expected_output_cols_list
|
670
|
+
# otherwise, use the sklearn estimator's output
|
671
|
+
else:
|
672
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
607
673
|
|
608
674
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
609
675
|
@telemetry.send_api_usage_telemetry(
|
@@ -635,24 +701,26 @@ class Ridge(BaseTransformer):
|
|
635
701
|
# are specific to the type of dataset used.
|
636
702
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
637
703
|
|
704
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
705
|
+
|
638
706
|
if isinstance(dataset, DataFrame):
|
639
|
-
self.
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
707
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
708
|
+
self._deps = self._get_dependencies()
|
709
|
+
assert isinstance(
|
710
|
+
dataset._session, Session
|
711
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
644
712
|
transform_kwargs = dict(
|
645
713
|
session=dataset._session,
|
646
714
|
dependencies=self._deps,
|
647
|
-
drop_input_cols
|
715
|
+
drop_input_cols=self._drop_input_cols,
|
648
716
|
expected_output_cols_type="float",
|
649
717
|
)
|
718
|
+
expected_output_cols = self._align_expected_output_names(
|
719
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
720
|
+
)
|
650
721
|
|
651
722
|
elif isinstance(dataset, pd.DataFrame):
|
652
|
-
transform_kwargs = dict(
|
653
|
-
snowpark_input_cols = self._snowpark_cols,
|
654
|
-
drop_input_cols = self._drop_input_cols
|
655
|
-
)
|
723
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
656
724
|
|
657
725
|
transform_handlers = ModelTransformerBuilder.build(
|
658
726
|
dataset=dataset,
|
@@ -664,7 +732,7 @@ class Ridge(BaseTransformer):
|
|
664
732
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
665
733
|
inference_method=inference_method,
|
666
734
|
input_cols=self.input_cols,
|
667
|
-
expected_output_cols=
|
735
|
+
expected_output_cols=expected_output_cols,
|
668
736
|
**transform_kwargs
|
669
737
|
)
|
670
738
|
return output_df
|
@@ -694,29 +762,30 @@ class Ridge(BaseTransformer):
|
|
694
762
|
Output dataset with log probability of the sample for each class in the model.
|
695
763
|
"""
|
696
764
|
super()._check_dataset_type(dataset)
|
697
|
-
inference_method="predict_log_proba"
|
765
|
+
inference_method = "predict_log_proba"
|
766
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
698
767
|
|
699
768
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
700
769
|
# are specific to the type of dataset used.
|
701
770
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
702
771
|
|
703
772
|
if isinstance(dataset, DataFrame):
|
704
|
-
self.
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
773
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
774
|
+
self._deps = self._get_dependencies()
|
775
|
+
assert isinstance(
|
776
|
+
dataset._session, Session
|
777
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
709
778
|
transform_kwargs = dict(
|
710
779
|
session=dataset._session,
|
711
780
|
dependencies=self._deps,
|
712
|
-
drop_input_cols
|
781
|
+
drop_input_cols=self._drop_input_cols,
|
713
782
|
expected_output_cols_type="float",
|
714
783
|
)
|
784
|
+
expected_output_cols = self._align_expected_output_names(
|
785
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
786
|
+
)
|
715
787
|
elif isinstance(dataset, pd.DataFrame):
|
716
|
-
transform_kwargs = dict(
|
717
|
-
snowpark_input_cols = self._snowpark_cols,
|
718
|
-
drop_input_cols = self._drop_input_cols
|
719
|
-
)
|
788
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
720
789
|
|
721
790
|
transform_handlers = ModelTransformerBuilder.build(
|
722
791
|
dataset=dataset,
|
@@ -729,7 +798,7 @@ class Ridge(BaseTransformer):
|
|
729
798
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
730
799
|
inference_method=inference_method,
|
731
800
|
input_cols=self.input_cols,
|
732
|
-
expected_output_cols=
|
801
|
+
expected_output_cols=expected_output_cols,
|
733
802
|
**transform_kwargs
|
734
803
|
)
|
735
804
|
return output_df
|
@@ -755,30 +824,32 @@ class Ridge(BaseTransformer):
|
|
755
824
|
Output dataset with results of the decision function for the samples in input dataset.
|
756
825
|
"""
|
757
826
|
super()._check_dataset_type(dataset)
|
758
|
-
inference_method="decision_function"
|
827
|
+
inference_method = "decision_function"
|
759
828
|
|
760
829
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
761
830
|
# are specific to the type of dataset used.
|
762
831
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
763
832
|
|
833
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
834
|
+
|
764
835
|
if isinstance(dataset, DataFrame):
|
765
|
-
self.
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
836
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
837
|
+
self._deps = self._get_dependencies()
|
838
|
+
assert isinstance(
|
839
|
+
dataset._session, Session
|
840
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
770
841
|
transform_kwargs = dict(
|
771
842
|
session=dataset._session,
|
772
843
|
dependencies=self._deps,
|
773
|
-
drop_input_cols
|
844
|
+
drop_input_cols=self._drop_input_cols,
|
774
845
|
expected_output_cols_type="float",
|
775
846
|
)
|
847
|
+
expected_output_cols = self._align_expected_output_names(
|
848
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
849
|
+
)
|
776
850
|
|
777
851
|
elif isinstance(dataset, pd.DataFrame):
|
778
|
-
transform_kwargs = dict(
|
779
|
-
snowpark_input_cols = self._snowpark_cols,
|
780
|
-
drop_input_cols = self._drop_input_cols
|
781
|
-
)
|
852
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
782
853
|
|
783
854
|
transform_handlers = ModelTransformerBuilder.build(
|
784
855
|
dataset=dataset,
|
@@ -791,7 +862,7 @@ class Ridge(BaseTransformer):
|
|
791
862
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
792
863
|
inference_method=inference_method,
|
793
864
|
input_cols=self.input_cols,
|
794
|
-
expected_output_cols=
|
865
|
+
expected_output_cols=expected_output_cols,
|
795
866
|
**transform_kwargs
|
796
867
|
)
|
797
868
|
return output_df
|
@@ -820,17 +891,17 @@ class Ridge(BaseTransformer):
|
|
820
891
|
Output dataset with probability of the sample for each class in the model.
|
821
892
|
"""
|
822
893
|
super()._check_dataset_type(dataset)
|
823
|
-
inference_method="score_samples"
|
894
|
+
inference_method = "score_samples"
|
824
895
|
|
825
896
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
826
897
|
# are specific to the type of dataset used.
|
827
898
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
828
899
|
|
900
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
901
|
+
|
829
902
|
if isinstance(dataset, DataFrame):
|
830
|
-
self.
|
831
|
-
|
832
|
-
inference_method=inference_method,
|
833
|
-
)
|
903
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
904
|
+
self._deps = self._get_dependencies()
|
834
905
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
835
906
|
transform_kwargs = dict(
|
836
907
|
session=dataset._session,
|
@@ -838,6 +909,9 @@ class Ridge(BaseTransformer):
|
|
838
909
|
drop_input_cols = self._drop_input_cols,
|
839
910
|
expected_output_cols_type="float",
|
840
911
|
)
|
912
|
+
expected_output_cols = self._align_expected_output_names(
|
913
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
914
|
+
)
|
841
915
|
|
842
916
|
elif isinstance(dataset, pd.DataFrame):
|
843
917
|
transform_kwargs = dict(
|
@@ -856,7 +930,7 @@ class Ridge(BaseTransformer):
|
|
856
930
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
857
931
|
inference_method=inference_method,
|
858
932
|
input_cols=self.input_cols,
|
859
|
-
expected_output_cols=
|
933
|
+
expected_output_cols=expected_output_cols,
|
860
934
|
**transform_kwargs
|
861
935
|
)
|
862
936
|
return output_df
|
@@ -891,17 +965,15 @@ class Ridge(BaseTransformer):
|
|
891
965
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
892
966
|
|
893
967
|
if isinstance(dataset, DataFrame):
|
894
|
-
self.
|
895
|
-
|
896
|
-
inference_method="score",
|
897
|
-
)
|
968
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
969
|
+
self._deps = self._get_dependencies()
|
898
970
|
selected_cols = self._get_active_columns()
|
899
971
|
if len(selected_cols) > 0:
|
900
972
|
dataset = dataset.select(selected_cols)
|
901
973
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
902
974
|
transform_kwargs = dict(
|
903
975
|
session=dataset._session,
|
904
|
-
dependencies=
|
976
|
+
dependencies=self._deps,
|
905
977
|
score_sproc_imports=['sklearn'],
|
906
978
|
)
|
907
979
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -966,11 +1038,8 @@ class Ridge(BaseTransformer):
|
|
966
1038
|
|
967
1039
|
if isinstance(dataset, DataFrame):
|
968
1040
|
|
969
|
-
self.
|
970
|
-
|
971
|
-
inference_method=inference_method,
|
972
|
-
|
973
|
-
)
|
1041
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1042
|
+
self._deps = self._get_dependencies()
|
974
1043
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
975
1044
|
transform_kwargs = dict(
|
976
1045
|
session = dataset._session,
|
@@ -1003,50 +1072,84 @@ class Ridge(BaseTransformer):
|
|
1003
1072
|
)
|
1004
1073
|
return output_df
|
1005
1074
|
|
1075
|
+
|
1076
|
+
|
1077
|
+
def to_sklearn(self) -> Any:
|
1078
|
+
"""Get sklearn.linear_model.Ridge object.
|
1079
|
+
"""
|
1080
|
+
if self._sklearn_object is None:
|
1081
|
+
self._sklearn_object = self._create_sklearn_object()
|
1082
|
+
return self._sklearn_object
|
1083
|
+
|
1084
|
+
def to_xgboost(self) -> Any:
|
1085
|
+
raise exceptions.SnowflakeMLException(
|
1086
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1087
|
+
original_exception=AttributeError(
|
1088
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1089
|
+
"to_xgboost()",
|
1090
|
+
"to_sklearn()"
|
1091
|
+
)
|
1092
|
+
),
|
1093
|
+
)
|
1094
|
+
|
1095
|
+
def to_lightgbm(self) -> Any:
|
1096
|
+
raise exceptions.SnowflakeMLException(
|
1097
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1098
|
+
original_exception=AttributeError(
|
1099
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1100
|
+
"to_lightgbm()",
|
1101
|
+
"to_sklearn()"
|
1102
|
+
)
|
1103
|
+
),
|
1104
|
+
)
|
1105
|
+
|
1106
|
+
def _get_dependencies(self) -> List[str]:
|
1107
|
+
return self._deps
|
1108
|
+
|
1006
1109
|
|
1007
|
-
def
|
1110
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1008
1111
|
self._model_signature_dict = dict()
|
1009
1112
|
|
1010
1113
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1011
1114
|
|
1012
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1115
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1013
1116
|
outputs: List[BaseFeatureSpec] = []
|
1014
1117
|
if hasattr(self, "predict"):
|
1015
1118
|
# keep mypy happy
|
1016
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1119
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1017
1120
|
# For classifier, the type of predict is the same as the type of label
|
1018
|
-
if self._sklearn_object._estimator_type ==
|
1019
|
-
|
1121
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1122
|
+
# label columns is the desired type for output
|
1020
1123
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1021
1124
|
# rename the output columns
|
1022
1125
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1023
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1024
|
-
|
1025
|
-
|
1126
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1127
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1128
|
+
)
|
1026
1129
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1027
1130
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1028
|
-
# Clusterer returns int64 cluster labels.
|
1131
|
+
# Clusterer returns int64 cluster labels.
|
1029
1132
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1030
1133
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1031
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1134
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1135
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1136
|
+
)
|
1137
|
+
|
1035
1138
|
# For regressor, the type of predict is float64
|
1036
|
-
elif self._sklearn_object._estimator_type ==
|
1139
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1037
1140
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1038
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1141
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1142
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1143
|
+
)
|
1144
|
+
|
1042
1145
|
for prob_func in PROB_FUNCTIONS:
|
1043
1146
|
if hasattr(self, prob_func):
|
1044
1147
|
output_cols_prefix: str = f"{prob_func}_"
|
1045
1148
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1046
1149
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1047
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1048
|
-
|
1049
|
-
|
1150
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1151
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1152
|
+
)
|
1050
1153
|
|
1051
1154
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1052
1155
|
items = list(self._model_signature_dict.items())
|
@@ -1059,10 +1162,10 @@ class Ridge(BaseTransformer):
|
|
1059
1162
|
"""Returns model signature of current class.
|
1060
1163
|
|
1061
1164
|
Raises:
|
1062
|
-
|
1165
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1063
1166
|
|
1064
1167
|
Returns:
|
1065
|
-
Dict
|
1168
|
+
Dict with each method and its input output signature
|
1066
1169
|
"""
|
1067
1170
|
if self._model_signature_dict is None:
|
1068
1171
|
raise exceptions.SnowflakeMLException(
|
@@ -1070,35 +1173,3 @@ class Ridge(BaseTransformer):
|
|
1070
1173
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1071
1174
|
)
|
1072
1175
|
return self._model_signature_dict
|
1073
|
-
|
1074
|
-
def to_sklearn(self) -> Any:
|
1075
|
-
"""Get sklearn.linear_model.Ridge object.
|
1076
|
-
"""
|
1077
|
-
if self._sklearn_object is None:
|
1078
|
-
self._sklearn_object = self._create_sklearn_object()
|
1079
|
-
return self._sklearn_object
|
1080
|
-
|
1081
|
-
def to_xgboost(self) -> Any:
|
1082
|
-
raise exceptions.SnowflakeMLException(
|
1083
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1084
|
-
original_exception=AttributeError(
|
1085
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1086
|
-
"to_xgboost()",
|
1087
|
-
"to_sklearn()"
|
1088
|
-
)
|
1089
|
-
),
|
1090
|
-
)
|
1091
|
-
|
1092
|
-
def to_lightgbm(self) -> Any:
|
1093
|
-
raise exceptions.SnowflakeMLException(
|
1094
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1095
|
-
original_exception=AttributeError(
|
1096
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1097
|
-
"to_lightgbm()",
|
1098
|
-
"to_sklearn()"
|
1099
|
-
)
|
1100
|
-
),
|
1101
|
-
)
|
1102
|
-
|
1103
|
-
def _get_dependencies(self) -> List[str]:
|
1104
|
-
return self._deps
|