snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class RANSACRegressor(BaseTransformer):
|
71
64
|
r"""RANSAC (RANdom SAmple Consensus) algorithm
|
72
65
|
For more details on this class, see [sklearn.linear_model.RANSACRegressor]
|
@@ -305,12 +298,7 @@ class RANSACRegressor(BaseTransformer):
|
|
305
298
|
)
|
306
299
|
return selected_cols
|
307
300
|
|
308
|
-
|
309
|
-
project=_PROJECT,
|
310
|
-
subproject=_SUBPROJECT,
|
311
|
-
custom_tags=dict([("autogen", True)]),
|
312
|
-
)
|
313
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RANSACRegressor":
|
301
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RANSACRegressor":
|
314
302
|
"""Fit estimator using RANSAC algorithm
|
315
303
|
For more details on this function, see [sklearn.linear_model.RANSACRegressor.fit]
|
316
304
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RANSACRegressor.html#sklearn.linear_model.RANSACRegressor.fit)
|
@@ -337,12 +325,14 @@ class RANSACRegressor(BaseTransformer):
|
|
337
325
|
|
338
326
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
339
327
|
|
340
|
-
|
328
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
341
329
|
if SNOWML_SPROC_ENV in os.environ:
|
342
330
|
statement_params = telemetry.get_function_usage_statement_params(
|
343
331
|
project=_PROJECT,
|
344
332
|
subproject=_SUBPROJECT,
|
345
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
333
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
334
|
+
inspect.currentframe(), RANSACRegressor.__class__.__name__
|
335
|
+
),
|
346
336
|
api_calls=[Session.call],
|
347
337
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
348
338
|
)
|
@@ -363,27 +353,24 @@ class RANSACRegressor(BaseTransformer):
|
|
363
353
|
)
|
364
354
|
self._sklearn_object = model_trainer.train()
|
365
355
|
self._is_fitted = True
|
366
|
-
self.
|
356
|
+
self._generate_model_signatures(dataset)
|
367
357
|
return self
|
368
358
|
|
369
359
|
def _batch_inference_validate_snowpark(
|
370
360
|
self,
|
371
361
|
dataset: DataFrame,
|
372
362
|
inference_method: str,
|
373
|
-
) ->
|
374
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
375
|
-
return the available package that exists in the snowflake anaconda channel
|
363
|
+
) -> None:
|
364
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
376
365
|
|
377
366
|
Args:
|
378
367
|
dataset: snowpark dataframe
|
379
368
|
inference_method: the inference method such as predict, score...
|
380
|
-
|
369
|
+
|
381
370
|
Raises:
|
382
371
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
383
372
|
SnowflakeMLException: If the session is None, raise error
|
384
373
|
|
385
|
-
Returns:
|
386
|
-
A list of available package that exists in the snowflake anaconda channel
|
387
374
|
"""
|
388
375
|
if not self._is_fitted:
|
389
376
|
raise exceptions.SnowflakeMLException(
|
@@ -401,9 +388,7 @@ class RANSACRegressor(BaseTransformer):
|
|
401
388
|
"Session must not specified for snowpark dataset."
|
402
389
|
),
|
403
390
|
)
|
404
|
-
|
405
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
406
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
391
|
+
|
407
392
|
|
408
393
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
409
394
|
@telemetry.send_api_usage_telemetry(
|
@@ -439,7 +424,9 @@ class RANSACRegressor(BaseTransformer):
|
|
439
424
|
# when it is classifier, infer the datatype from label columns
|
440
425
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
441
426
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
442
|
-
label_cols_signatures = [
|
427
|
+
label_cols_signatures = [
|
428
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
429
|
+
]
|
443
430
|
if len(label_cols_signatures) == 0:
|
444
431
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
445
432
|
raise exceptions.SnowflakeMLException(
|
@@ -447,25 +434,23 @@ class RANSACRegressor(BaseTransformer):
|
|
447
434
|
original_exception=ValueError(error_str),
|
448
435
|
)
|
449
436
|
|
450
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
451
|
-
label_cols_signatures[0].as_snowpark_type()
|
452
|
-
)
|
437
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
453
438
|
|
454
|
-
self.
|
455
|
-
|
439
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
440
|
+
self._deps = self._get_dependencies()
|
441
|
+
assert isinstance(
|
442
|
+
dataset._session, Session
|
443
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
456
444
|
|
457
445
|
transform_kwargs = dict(
|
458
|
-
session
|
459
|
-
dependencies
|
460
|
-
drop_input_cols
|
461
|
-
expected_output_cols_type
|
446
|
+
session=dataset._session,
|
447
|
+
dependencies=self._deps,
|
448
|
+
drop_input_cols=self._drop_input_cols,
|
449
|
+
expected_output_cols_type=expected_type_inferred,
|
462
450
|
)
|
463
451
|
|
464
452
|
elif isinstance(dataset, pd.DataFrame):
|
465
|
-
transform_kwargs = dict(
|
466
|
-
snowpark_input_cols = self._snowpark_cols,
|
467
|
-
drop_input_cols = self._drop_input_cols
|
468
|
-
)
|
453
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
469
454
|
|
470
455
|
transform_handlers = ModelTransformerBuilder.build(
|
471
456
|
dataset=dataset,
|
@@ -505,7 +490,7 @@ class RANSACRegressor(BaseTransformer):
|
|
505
490
|
Transformed dataset.
|
506
491
|
"""
|
507
492
|
super()._check_dataset_type(dataset)
|
508
|
-
inference_method="transform"
|
493
|
+
inference_method = "transform"
|
509
494
|
|
510
495
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
511
496
|
# are specific to the type of dataset used.
|
@@ -535,24 +520,19 @@ class RANSACRegressor(BaseTransformer):
|
|
535
520
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
536
521
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
537
522
|
|
538
|
-
self.
|
539
|
-
|
540
|
-
inference_method=inference_method,
|
541
|
-
)
|
523
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
524
|
+
self._deps = self._get_dependencies()
|
542
525
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
543
526
|
|
544
527
|
transform_kwargs = dict(
|
545
|
-
session
|
546
|
-
dependencies
|
547
|
-
drop_input_cols
|
548
|
-
expected_output_cols_type
|
528
|
+
session=dataset._session,
|
529
|
+
dependencies=self._deps,
|
530
|
+
drop_input_cols=self._drop_input_cols,
|
531
|
+
expected_output_cols_type=expected_dtype,
|
549
532
|
)
|
550
533
|
|
551
534
|
elif isinstance(dataset, pd.DataFrame):
|
552
|
-
transform_kwargs = dict(
|
553
|
-
snowpark_input_cols = self._snowpark_cols,
|
554
|
-
drop_input_cols = self._drop_input_cols
|
555
|
-
)
|
535
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
556
536
|
|
557
537
|
transform_handlers = ModelTransformerBuilder.build(
|
558
538
|
dataset=dataset,
|
@@ -571,7 +551,11 @@ class RANSACRegressor(BaseTransformer):
|
|
571
551
|
return output_df
|
572
552
|
|
573
553
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
574
|
-
def fit_predict(
|
554
|
+
def fit_predict(
|
555
|
+
self,
|
556
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
557
|
+
output_cols_prefix: str = "fit_predict_",
|
558
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
575
559
|
""" Method not supported for this class.
|
576
560
|
|
577
561
|
|
@@ -596,22 +580,104 @@ class RANSACRegressor(BaseTransformer):
|
|
596
580
|
)
|
597
581
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
598
582
|
drop_input_cols=self._drop_input_cols,
|
599
|
-
expected_output_cols_list=
|
583
|
+
expected_output_cols_list=(
|
584
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
585
|
+
),
|
600
586
|
)
|
601
587
|
self._sklearn_object = fitted_estimator
|
602
588
|
self._is_fitted = True
|
603
589
|
return output_result
|
604
590
|
|
591
|
+
|
592
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
593
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
594
|
+
""" Method not supported for this class.
|
595
|
+
|
605
596
|
|
606
|
-
|
607
|
-
|
608
|
-
|
597
|
+
Raises:
|
598
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
599
|
+
|
600
|
+
Args:
|
601
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
602
|
+
Snowpark or Pandas DataFrame.
|
603
|
+
output_cols_prefix: Prefix for the response columns
|
609
604
|
Returns:
|
610
605
|
Transformed dataset.
|
611
606
|
"""
|
612
|
-
self.
|
613
|
-
|
614
|
-
|
607
|
+
self._infer_input_output_cols(dataset)
|
608
|
+
super()._check_dataset_type(dataset)
|
609
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
610
|
+
estimator=self._sklearn_object,
|
611
|
+
dataset=dataset,
|
612
|
+
input_cols=self.input_cols,
|
613
|
+
label_cols=self.label_cols,
|
614
|
+
sample_weight_col=self.sample_weight_col,
|
615
|
+
autogenerated=self._autogenerated,
|
616
|
+
subproject=_SUBPROJECT,
|
617
|
+
)
|
618
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
619
|
+
drop_input_cols=self._drop_input_cols,
|
620
|
+
expected_output_cols_list=self.output_cols,
|
621
|
+
)
|
622
|
+
self._sklearn_object = fitted_estimator
|
623
|
+
self._is_fitted = True
|
624
|
+
return output_result
|
625
|
+
|
626
|
+
|
627
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
628
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
629
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
630
|
+
"""
|
631
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
632
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
633
|
+
if output_cols:
|
634
|
+
output_cols = [
|
635
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
636
|
+
for c in output_cols
|
637
|
+
]
|
638
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
639
|
+
output_cols = [output_cols_prefix]
|
640
|
+
elif self._sklearn_object is not None:
|
641
|
+
classes = self._sklearn_object.classes_
|
642
|
+
if isinstance(classes, numpy.ndarray):
|
643
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
644
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
645
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
646
|
+
output_cols = []
|
647
|
+
for i, cl in enumerate(classes):
|
648
|
+
# For binary classification, there is only one output column for each class
|
649
|
+
# ndarray as the two classes are complementary.
|
650
|
+
if len(cl) == 2:
|
651
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
652
|
+
else:
|
653
|
+
output_cols.extend([
|
654
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
655
|
+
])
|
656
|
+
else:
|
657
|
+
output_cols = []
|
658
|
+
|
659
|
+
# Make sure column names are valid snowflake identifiers.
|
660
|
+
assert output_cols is not None # Make MyPy happy
|
661
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
662
|
+
|
663
|
+
return rv
|
664
|
+
|
665
|
+
def _align_expected_output_names(
|
666
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
667
|
+
) -> List[str]:
|
668
|
+
# in case the inferred output column names dimension is different
|
669
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
670
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
671
|
+
output_df_columns = list(output_df_pd.columns)
|
672
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
673
|
+
if self.sample_weight_col:
|
674
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
675
|
+
# if the dimension of inferred output column names is correct; use it
|
676
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
677
|
+
return expected_output_cols_list
|
678
|
+
# otherwise, use the sklearn estimator's output
|
679
|
+
else:
|
680
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
615
681
|
|
616
682
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
617
683
|
@telemetry.send_api_usage_telemetry(
|
@@ -643,24 +709,26 @@ class RANSACRegressor(BaseTransformer):
|
|
643
709
|
# are specific to the type of dataset used.
|
644
710
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
645
711
|
|
712
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
713
|
+
|
646
714
|
if isinstance(dataset, DataFrame):
|
647
|
-
self.
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
715
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
716
|
+
self._deps = self._get_dependencies()
|
717
|
+
assert isinstance(
|
718
|
+
dataset._session, Session
|
719
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
652
720
|
transform_kwargs = dict(
|
653
721
|
session=dataset._session,
|
654
722
|
dependencies=self._deps,
|
655
|
-
drop_input_cols
|
723
|
+
drop_input_cols=self._drop_input_cols,
|
656
724
|
expected_output_cols_type="float",
|
657
725
|
)
|
726
|
+
expected_output_cols = self._align_expected_output_names(
|
727
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
728
|
+
)
|
658
729
|
|
659
730
|
elif isinstance(dataset, pd.DataFrame):
|
660
|
-
transform_kwargs = dict(
|
661
|
-
snowpark_input_cols = self._snowpark_cols,
|
662
|
-
drop_input_cols = self._drop_input_cols
|
663
|
-
)
|
731
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
664
732
|
|
665
733
|
transform_handlers = ModelTransformerBuilder.build(
|
666
734
|
dataset=dataset,
|
@@ -672,7 +740,7 @@ class RANSACRegressor(BaseTransformer):
|
|
672
740
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
673
741
|
inference_method=inference_method,
|
674
742
|
input_cols=self.input_cols,
|
675
|
-
expected_output_cols=
|
743
|
+
expected_output_cols=expected_output_cols,
|
676
744
|
**transform_kwargs
|
677
745
|
)
|
678
746
|
return output_df
|
@@ -702,29 +770,30 @@ class RANSACRegressor(BaseTransformer):
|
|
702
770
|
Output dataset with log probability of the sample for each class in the model.
|
703
771
|
"""
|
704
772
|
super()._check_dataset_type(dataset)
|
705
|
-
inference_method="predict_log_proba"
|
773
|
+
inference_method = "predict_log_proba"
|
774
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
706
775
|
|
707
776
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
708
777
|
# are specific to the type of dataset used.
|
709
778
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
710
779
|
|
711
780
|
if isinstance(dataset, DataFrame):
|
712
|
-
self.
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
781
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
782
|
+
self._deps = self._get_dependencies()
|
783
|
+
assert isinstance(
|
784
|
+
dataset._session, Session
|
785
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
717
786
|
transform_kwargs = dict(
|
718
787
|
session=dataset._session,
|
719
788
|
dependencies=self._deps,
|
720
|
-
drop_input_cols
|
789
|
+
drop_input_cols=self._drop_input_cols,
|
721
790
|
expected_output_cols_type="float",
|
722
791
|
)
|
792
|
+
expected_output_cols = self._align_expected_output_names(
|
793
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
794
|
+
)
|
723
795
|
elif isinstance(dataset, pd.DataFrame):
|
724
|
-
transform_kwargs = dict(
|
725
|
-
snowpark_input_cols = self._snowpark_cols,
|
726
|
-
drop_input_cols = self._drop_input_cols
|
727
|
-
)
|
796
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
728
797
|
|
729
798
|
transform_handlers = ModelTransformerBuilder.build(
|
730
799
|
dataset=dataset,
|
@@ -737,7 +806,7 @@ class RANSACRegressor(BaseTransformer):
|
|
737
806
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
738
807
|
inference_method=inference_method,
|
739
808
|
input_cols=self.input_cols,
|
740
|
-
expected_output_cols=
|
809
|
+
expected_output_cols=expected_output_cols,
|
741
810
|
**transform_kwargs
|
742
811
|
)
|
743
812
|
return output_df
|
@@ -763,30 +832,32 @@ class RANSACRegressor(BaseTransformer):
|
|
763
832
|
Output dataset with results of the decision function for the samples in input dataset.
|
764
833
|
"""
|
765
834
|
super()._check_dataset_type(dataset)
|
766
|
-
inference_method="decision_function"
|
835
|
+
inference_method = "decision_function"
|
767
836
|
|
768
837
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
769
838
|
# are specific to the type of dataset used.
|
770
839
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
771
840
|
|
841
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
842
|
+
|
772
843
|
if isinstance(dataset, DataFrame):
|
773
|
-
self.
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
844
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
845
|
+
self._deps = self._get_dependencies()
|
846
|
+
assert isinstance(
|
847
|
+
dataset._session, Session
|
848
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
778
849
|
transform_kwargs = dict(
|
779
850
|
session=dataset._session,
|
780
851
|
dependencies=self._deps,
|
781
|
-
drop_input_cols
|
852
|
+
drop_input_cols=self._drop_input_cols,
|
782
853
|
expected_output_cols_type="float",
|
783
854
|
)
|
855
|
+
expected_output_cols = self._align_expected_output_names(
|
856
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
857
|
+
)
|
784
858
|
|
785
859
|
elif isinstance(dataset, pd.DataFrame):
|
786
|
-
transform_kwargs = dict(
|
787
|
-
snowpark_input_cols = self._snowpark_cols,
|
788
|
-
drop_input_cols = self._drop_input_cols
|
789
|
-
)
|
860
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
790
861
|
|
791
862
|
transform_handlers = ModelTransformerBuilder.build(
|
792
863
|
dataset=dataset,
|
@@ -799,7 +870,7 @@ class RANSACRegressor(BaseTransformer):
|
|
799
870
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
800
871
|
inference_method=inference_method,
|
801
872
|
input_cols=self.input_cols,
|
802
|
-
expected_output_cols=
|
873
|
+
expected_output_cols=expected_output_cols,
|
803
874
|
**transform_kwargs
|
804
875
|
)
|
805
876
|
return output_df
|
@@ -828,17 +899,17 @@ class RANSACRegressor(BaseTransformer):
|
|
828
899
|
Output dataset with probability of the sample for each class in the model.
|
829
900
|
"""
|
830
901
|
super()._check_dataset_type(dataset)
|
831
|
-
inference_method="score_samples"
|
902
|
+
inference_method = "score_samples"
|
832
903
|
|
833
904
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
834
905
|
# are specific to the type of dataset used.
|
835
906
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
836
907
|
|
908
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
909
|
+
|
837
910
|
if isinstance(dataset, DataFrame):
|
838
|
-
self.
|
839
|
-
|
840
|
-
inference_method=inference_method,
|
841
|
-
)
|
911
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
912
|
+
self._deps = self._get_dependencies()
|
842
913
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
843
914
|
transform_kwargs = dict(
|
844
915
|
session=dataset._session,
|
@@ -846,6 +917,9 @@ class RANSACRegressor(BaseTransformer):
|
|
846
917
|
drop_input_cols = self._drop_input_cols,
|
847
918
|
expected_output_cols_type="float",
|
848
919
|
)
|
920
|
+
expected_output_cols = self._align_expected_output_names(
|
921
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
922
|
+
)
|
849
923
|
|
850
924
|
elif isinstance(dataset, pd.DataFrame):
|
851
925
|
transform_kwargs = dict(
|
@@ -864,7 +938,7 @@ class RANSACRegressor(BaseTransformer):
|
|
864
938
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
865
939
|
inference_method=inference_method,
|
866
940
|
input_cols=self.input_cols,
|
867
|
-
expected_output_cols=
|
941
|
+
expected_output_cols=expected_output_cols,
|
868
942
|
**transform_kwargs
|
869
943
|
)
|
870
944
|
return output_df
|
@@ -899,17 +973,15 @@ class RANSACRegressor(BaseTransformer):
|
|
899
973
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
900
974
|
|
901
975
|
if isinstance(dataset, DataFrame):
|
902
|
-
self.
|
903
|
-
|
904
|
-
inference_method="score",
|
905
|
-
)
|
976
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
977
|
+
self._deps = self._get_dependencies()
|
906
978
|
selected_cols = self._get_active_columns()
|
907
979
|
if len(selected_cols) > 0:
|
908
980
|
dataset = dataset.select(selected_cols)
|
909
981
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
910
982
|
transform_kwargs = dict(
|
911
983
|
session=dataset._session,
|
912
|
-
dependencies=
|
984
|
+
dependencies=self._deps,
|
913
985
|
score_sproc_imports=['sklearn'],
|
914
986
|
)
|
915
987
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -974,11 +1046,8 @@ class RANSACRegressor(BaseTransformer):
|
|
974
1046
|
|
975
1047
|
if isinstance(dataset, DataFrame):
|
976
1048
|
|
977
|
-
self.
|
978
|
-
|
979
|
-
inference_method=inference_method,
|
980
|
-
|
981
|
-
)
|
1049
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1050
|
+
self._deps = self._get_dependencies()
|
982
1051
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
983
1052
|
transform_kwargs = dict(
|
984
1053
|
session = dataset._session,
|
@@ -1011,50 +1080,84 @@ class RANSACRegressor(BaseTransformer):
|
|
1011
1080
|
)
|
1012
1081
|
return output_df
|
1013
1082
|
|
1083
|
+
|
1084
|
+
|
1085
|
+
def to_sklearn(self) -> Any:
|
1086
|
+
"""Get sklearn.linear_model.RANSACRegressor object.
|
1087
|
+
"""
|
1088
|
+
if self._sklearn_object is None:
|
1089
|
+
self._sklearn_object = self._create_sklearn_object()
|
1090
|
+
return self._sklearn_object
|
1091
|
+
|
1092
|
+
def to_xgboost(self) -> Any:
|
1093
|
+
raise exceptions.SnowflakeMLException(
|
1094
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1095
|
+
original_exception=AttributeError(
|
1096
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1097
|
+
"to_xgboost()",
|
1098
|
+
"to_sklearn()"
|
1099
|
+
)
|
1100
|
+
),
|
1101
|
+
)
|
1102
|
+
|
1103
|
+
def to_lightgbm(self) -> Any:
|
1104
|
+
raise exceptions.SnowflakeMLException(
|
1105
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1106
|
+
original_exception=AttributeError(
|
1107
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1108
|
+
"to_lightgbm()",
|
1109
|
+
"to_sklearn()"
|
1110
|
+
)
|
1111
|
+
),
|
1112
|
+
)
|
1113
|
+
|
1114
|
+
def _get_dependencies(self) -> List[str]:
|
1115
|
+
return self._deps
|
1116
|
+
|
1014
1117
|
|
1015
|
-
def
|
1118
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1016
1119
|
self._model_signature_dict = dict()
|
1017
1120
|
|
1018
1121
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1019
1122
|
|
1020
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1123
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1021
1124
|
outputs: List[BaseFeatureSpec] = []
|
1022
1125
|
if hasattr(self, "predict"):
|
1023
1126
|
# keep mypy happy
|
1024
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1127
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1025
1128
|
# For classifier, the type of predict is the same as the type of label
|
1026
|
-
if self._sklearn_object._estimator_type ==
|
1027
|
-
|
1129
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1130
|
+
# label columns is the desired type for output
|
1028
1131
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1029
1132
|
# rename the output columns
|
1030
1133
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1031
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1032
|
-
|
1033
|
-
|
1134
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1135
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1136
|
+
)
|
1034
1137
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1035
1138
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1036
|
-
# Clusterer returns int64 cluster labels.
|
1139
|
+
# Clusterer returns int64 cluster labels.
|
1037
1140
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1038
1141
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1039
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1142
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1143
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1144
|
+
)
|
1145
|
+
|
1043
1146
|
# For regressor, the type of predict is float64
|
1044
|
-
elif self._sklearn_object._estimator_type ==
|
1147
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1045
1148
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1046
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1149
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1150
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1151
|
+
)
|
1152
|
+
|
1050
1153
|
for prob_func in PROB_FUNCTIONS:
|
1051
1154
|
if hasattr(self, prob_func):
|
1052
1155
|
output_cols_prefix: str = f"{prob_func}_"
|
1053
1156
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1054
1157
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1055
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1056
|
-
|
1057
|
-
|
1158
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1159
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1160
|
+
)
|
1058
1161
|
|
1059
1162
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1060
1163
|
items = list(self._model_signature_dict.items())
|
@@ -1067,10 +1170,10 @@ class RANSACRegressor(BaseTransformer):
|
|
1067
1170
|
"""Returns model signature of current class.
|
1068
1171
|
|
1069
1172
|
Raises:
|
1070
|
-
|
1173
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1071
1174
|
|
1072
1175
|
Returns:
|
1073
|
-
Dict
|
1176
|
+
Dict with each method and its input output signature
|
1074
1177
|
"""
|
1075
1178
|
if self._model_signature_dict is None:
|
1076
1179
|
raise exceptions.SnowflakeMLException(
|
@@ -1078,35 +1181,3 @@ class RANSACRegressor(BaseTransformer):
|
|
1078
1181
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1079
1182
|
)
|
1080
1183
|
return self._model_signature_dict
|
1081
|
-
|
1082
|
-
def to_sklearn(self) -> Any:
|
1083
|
-
"""Get sklearn.linear_model.RANSACRegressor object.
|
1084
|
-
"""
|
1085
|
-
if self._sklearn_object is None:
|
1086
|
-
self._sklearn_object = self._create_sklearn_object()
|
1087
|
-
return self._sklearn_object
|
1088
|
-
|
1089
|
-
def to_xgboost(self) -> Any:
|
1090
|
-
raise exceptions.SnowflakeMLException(
|
1091
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1092
|
-
original_exception=AttributeError(
|
1093
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1094
|
-
"to_xgboost()",
|
1095
|
-
"to_sklearn()"
|
1096
|
-
)
|
1097
|
-
),
|
1098
|
-
)
|
1099
|
-
|
1100
|
-
def to_lightgbm(self) -> Any:
|
1101
|
-
raise exceptions.SnowflakeMLException(
|
1102
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1103
|
-
original_exception=AttributeError(
|
1104
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1105
|
-
"to_lightgbm()",
|
1106
|
-
"to_sklearn()"
|
1107
|
-
)
|
1108
|
-
),
|
1109
|
-
)
|
1110
|
-
|
1111
|
-
def _get_dependencies(self) -> List[str]:
|
1112
|
-
return self._deps
|