snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neural_network".replace(
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class MLPRegressor(BaseTransformer):
|
71
64
|
r"""Multi-layer Perceptron regressor
|
72
65
|
For more details on this class, see [sklearn.neural_network.MLPRegressor]
|
@@ -383,12 +376,7 @@ class MLPRegressor(BaseTransformer):
|
|
383
376
|
)
|
384
377
|
return selected_cols
|
385
378
|
|
386
|
-
|
387
|
-
project=_PROJECT,
|
388
|
-
subproject=_SUBPROJECT,
|
389
|
-
custom_tags=dict([("autogen", True)]),
|
390
|
-
)
|
391
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MLPRegressor":
|
379
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MLPRegressor":
|
392
380
|
"""Fit the model to data matrix X and target(s) y
|
393
381
|
For more details on this function, see [sklearn.neural_network.MLPRegressor.fit]
|
394
382
|
(https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor.fit)
|
@@ -415,12 +403,14 @@ class MLPRegressor(BaseTransformer):
|
|
415
403
|
|
416
404
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
417
405
|
|
418
|
-
|
406
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
419
407
|
if SNOWML_SPROC_ENV in os.environ:
|
420
408
|
statement_params = telemetry.get_function_usage_statement_params(
|
421
409
|
project=_PROJECT,
|
422
410
|
subproject=_SUBPROJECT,
|
423
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
411
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
412
|
+
inspect.currentframe(), MLPRegressor.__class__.__name__
|
413
|
+
),
|
424
414
|
api_calls=[Session.call],
|
425
415
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
426
416
|
)
|
@@ -441,27 +431,24 @@ class MLPRegressor(BaseTransformer):
|
|
441
431
|
)
|
442
432
|
self._sklearn_object = model_trainer.train()
|
443
433
|
self._is_fitted = True
|
444
|
-
self.
|
434
|
+
self._generate_model_signatures(dataset)
|
445
435
|
return self
|
446
436
|
|
447
437
|
def _batch_inference_validate_snowpark(
|
448
438
|
self,
|
449
439
|
dataset: DataFrame,
|
450
440
|
inference_method: str,
|
451
|
-
) ->
|
452
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
453
|
-
return the available package that exists in the snowflake anaconda channel
|
441
|
+
) -> None:
|
442
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
454
443
|
|
455
444
|
Args:
|
456
445
|
dataset: snowpark dataframe
|
457
446
|
inference_method: the inference method such as predict, score...
|
458
|
-
|
447
|
+
|
459
448
|
Raises:
|
460
449
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
461
450
|
SnowflakeMLException: If the session is None, raise error
|
462
451
|
|
463
|
-
Returns:
|
464
|
-
A list of available package that exists in the snowflake anaconda channel
|
465
452
|
"""
|
466
453
|
if not self._is_fitted:
|
467
454
|
raise exceptions.SnowflakeMLException(
|
@@ -479,9 +466,7 @@ class MLPRegressor(BaseTransformer):
|
|
479
466
|
"Session must not specified for snowpark dataset."
|
480
467
|
),
|
481
468
|
)
|
482
|
-
|
483
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
484
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
469
|
+
|
485
470
|
|
486
471
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
487
472
|
@telemetry.send_api_usage_telemetry(
|
@@ -517,7 +502,9 @@ class MLPRegressor(BaseTransformer):
|
|
517
502
|
# when it is classifier, infer the datatype from label columns
|
518
503
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
519
504
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
520
|
-
label_cols_signatures = [
|
505
|
+
label_cols_signatures = [
|
506
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
507
|
+
]
|
521
508
|
if len(label_cols_signatures) == 0:
|
522
509
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
523
510
|
raise exceptions.SnowflakeMLException(
|
@@ -525,25 +512,23 @@ class MLPRegressor(BaseTransformer):
|
|
525
512
|
original_exception=ValueError(error_str),
|
526
513
|
)
|
527
514
|
|
528
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
529
|
-
label_cols_signatures[0].as_snowpark_type()
|
530
|
-
)
|
515
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
531
516
|
|
532
|
-
self.
|
533
|
-
|
517
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
518
|
+
self._deps = self._get_dependencies()
|
519
|
+
assert isinstance(
|
520
|
+
dataset._session, Session
|
521
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
534
522
|
|
535
523
|
transform_kwargs = dict(
|
536
|
-
session
|
537
|
-
dependencies
|
538
|
-
drop_input_cols
|
539
|
-
expected_output_cols_type
|
524
|
+
session=dataset._session,
|
525
|
+
dependencies=self._deps,
|
526
|
+
drop_input_cols=self._drop_input_cols,
|
527
|
+
expected_output_cols_type=expected_type_inferred,
|
540
528
|
)
|
541
529
|
|
542
530
|
elif isinstance(dataset, pd.DataFrame):
|
543
|
-
transform_kwargs = dict(
|
544
|
-
snowpark_input_cols = self._snowpark_cols,
|
545
|
-
drop_input_cols = self._drop_input_cols
|
546
|
-
)
|
531
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
547
532
|
|
548
533
|
transform_handlers = ModelTransformerBuilder.build(
|
549
534
|
dataset=dataset,
|
@@ -583,7 +568,7 @@ class MLPRegressor(BaseTransformer):
|
|
583
568
|
Transformed dataset.
|
584
569
|
"""
|
585
570
|
super()._check_dataset_type(dataset)
|
586
|
-
inference_method="transform"
|
571
|
+
inference_method = "transform"
|
587
572
|
|
588
573
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
589
574
|
# are specific to the type of dataset used.
|
@@ -613,24 +598,19 @@ class MLPRegressor(BaseTransformer):
|
|
613
598
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
614
599
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
615
600
|
|
616
|
-
self.
|
617
|
-
|
618
|
-
inference_method=inference_method,
|
619
|
-
)
|
601
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
602
|
+
self._deps = self._get_dependencies()
|
620
603
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
621
604
|
|
622
605
|
transform_kwargs = dict(
|
623
|
-
session
|
624
|
-
dependencies
|
625
|
-
drop_input_cols
|
626
|
-
expected_output_cols_type
|
606
|
+
session=dataset._session,
|
607
|
+
dependencies=self._deps,
|
608
|
+
drop_input_cols=self._drop_input_cols,
|
609
|
+
expected_output_cols_type=expected_dtype,
|
627
610
|
)
|
628
611
|
|
629
612
|
elif isinstance(dataset, pd.DataFrame):
|
630
|
-
transform_kwargs = dict(
|
631
|
-
snowpark_input_cols = self._snowpark_cols,
|
632
|
-
drop_input_cols = self._drop_input_cols
|
633
|
-
)
|
613
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
634
614
|
|
635
615
|
transform_handlers = ModelTransformerBuilder.build(
|
636
616
|
dataset=dataset,
|
@@ -649,7 +629,11 @@ class MLPRegressor(BaseTransformer):
|
|
649
629
|
return output_df
|
650
630
|
|
651
631
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
652
|
-
def fit_predict(
|
632
|
+
def fit_predict(
|
633
|
+
self,
|
634
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
635
|
+
output_cols_prefix: str = "fit_predict_",
|
636
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
653
637
|
""" Method not supported for this class.
|
654
638
|
|
655
639
|
|
@@ -674,22 +658,104 @@ class MLPRegressor(BaseTransformer):
|
|
674
658
|
)
|
675
659
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
676
660
|
drop_input_cols=self._drop_input_cols,
|
677
|
-
expected_output_cols_list=
|
661
|
+
expected_output_cols_list=(
|
662
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
663
|
+
),
|
678
664
|
)
|
679
665
|
self._sklearn_object = fitted_estimator
|
680
666
|
self._is_fitted = True
|
681
667
|
return output_result
|
682
668
|
|
669
|
+
|
670
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
671
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
672
|
+
""" Method not supported for this class.
|
673
|
+
|
683
674
|
|
684
|
-
|
685
|
-
|
686
|
-
|
675
|
+
Raises:
|
676
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
677
|
+
|
678
|
+
Args:
|
679
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
680
|
+
Snowpark or Pandas DataFrame.
|
681
|
+
output_cols_prefix: Prefix for the response columns
|
687
682
|
Returns:
|
688
683
|
Transformed dataset.
|
689
684
|
"""
|
690
|
-
self.
|
691
|
-
|
692
|
-
|
685
|
+
self._infer_input_output_cols(dataset)
|
686
|
+
super()._check_dataset_type(dataset)
|
687
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
688
|
+
estimator=self._sklearn_object,
|
689
|
+
dataset=dataset,
|
690
|
+
input_cols=self.input_cols,
|
691
|
+
label_cols=self.label_cols,
|
692
|
+
sample_weight_col=self.sample_weight_col,
|
693
|
+
autogenerated=self._autogenerated,
|
694
|
+
subproject=_SUBPROJECT,
|
695
|
+
)
|
696
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
697
|
+
drop_input_cols=self._drop_input_cols,
|
698
|
+
expected_output_cols_list=self.output_cols,
|
699
|
+
)
|
700
|
+
self._sklearn_object = fitted_estimator
|
701
|
+
self._is_fitted = True
|
702
|
+
return output_result
|
703
|
+
|
704
|
+
|
705
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
706
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
707
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
708
|
+
"""
|
709
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
710
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
711
|
+
if output_cols:
|
712
|
+
output_cols = [
|
713
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
714
|
+
for c in output_cols
|
715
|
+
]
|
716
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
717
|
+
output_cols = [output_cols_prefix]
|
718
|
+
elif self._sklearn_object is not None:
|
719
|
+
classes = self._sklearn_object.classes_
|
720
|
+
if isinstance(classes, numpy.ndarray):
|
721
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
722
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
723
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
724
|
+
output_cols = []
|
725
|
+
for i, cl in enumerate(classes):
|
726
|
+
# For binary classification, there is only one output column for each class
|
727
|
+
# ndarray as the two classes are complementary.
|
728
|
+
if len(cl) == 2:
|
729
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
730
|
+
else:
|
731
|
+
output_cols.extend([
|
732
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
733
|
+
])
|
734
|
+
else:
|
735
|
+
output_cols = []
|
736
|
+
|
737
|
+
# Make sure column names are valid snowflake identifiers.
|
738
|
+
assert output_cols is not None # Make MyPy happy
|
739
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
740
|
+
|
741
|
+
return rv
|
742
|
+
|
743
|
+
def _align_expected_output_names(
|
744
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
745
|
+
) -> List[str]:
|
746
|
+
# in case the inferred output column names dimension is different
|
747
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
748
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
749
|
+
output_df_columns = list(output_df_pd.columns)
|
750
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
751
|
+
if self.sample_weight_col:
|
752
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
753
|
+
# if the dimension of inferred output column names is correct; use it
|
754
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
755
|
+
return expected_output_cols_list
|
756
|
+
# otherwise, use the sklearn estimator's output
|
757
|
+
else:
|
758
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
693
759
|
|
694
760
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
695
761
|
@telemetry.send_api_usage_telemetry(
|
@@ -721,24 +787,26 @@ class MLPRegressor(BaseTransformer):
|
|
721
787
|
# are specific to the type of dataset used.
|
722
788
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
723
789
|
|
790
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
791
|
+
|
724
792
|
if isinstance(dataset, DataFrame):
|
725
|
-
self.
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
793
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
794
|
+
self._deps = self._get_dependencies()
|
795
|
+
assert isinstance(
|
796
|
+
dataset._session, Session
|
797
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
730
798
|
transform_kwargs = dict(
|
731
799
|
session=dataset._session,
|
732
800
|
dependencies=self._deps,
|
733
|
-
drop_input_cols
|
801
|
+
drop_input_cols=self._drop_input_cols,
|
734
802
|
expected_output_cols_type="float",
|
735
803
|
)
|
804
|
+
expected_output_cols = self._align_expected_output_names(
|
805
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
806
|
+
)
|
736
807
|
|
737
808
|
elif isinstance(dataset, pd.DataFrame):
|
738
|
-
transform_kwargs = dict(
|
739
|
-
snowpark_input_cols = self._snowpark_cols,
|
740
|
-
drop_input_cols = self._drop_input_cols
|
741
|
-
)
|
809
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
742
810
|
|
743
811
|
transform_handlers = ModelTransformerBuilder.build(
|
744
812
|
dataset=dataset,
|
@@ -750,7 +818,7 @@ class MLPRegressor(BaseTransformer):
|
|
750
818
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
751
819
|
inference_method=inference_method,
|
752
820
|
input_cols=self.input_cols,
|
753
|
-
expected_output_cols=
|
821
|
+
expected_output_cols=expected_output_cols,
|
754
822
|
**transform_kwargs
|
755
823
|
)
|
756
824
|
return output_df
|
@@ -780,29 +848,30 @@ class MLPRegressor(BaseTransformer):
|
|
780
848
|
Output dataset with log probability of the sample for each class in the model.
|
781
849
|
"""
|
782
850
|
super()._check_dataset_type(dataset)
|
783
|
-
inference_method="predict_log_proba"
|
851
|
+
inference_method = "predict_log_proba"
|
852
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
784
853
|
|
785
854
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
786
855
|
# are specific to the type of dataset used.
|
787
856
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
788
857
|
|
789
858
|
if isinstance(dataset, DataFrame):
|
790
|
-
self.
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
859
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
860
|
+
self._deps = self._get_dependencies()
|
861
|
+
assert isinstance(
|
862
|
+
dataset._session, Session
|
863
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
795
864
|
transform_kwargs = dict(
|
796
865
|
session=dataset._session,
|
797
866
|
dependencies=self._deps,
|
798
|
-
drop_input_cols
|
867
|
+
drop_input_cols=self._drop_input_cols,
|
799
868
|
expected_output_cols_type="float",
|
800
869
|
)
|
870
|
+
expected_output_cols = self._align_expected_output_names(
|
871
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
872
|
+
)
|
801
873
|
elif isinstance(dataset, pd.DataFrame):
|
802
|
-
transform_kwargs = dict(
|
803
|
-
snowpark_input_cols = self._snowpark_cols,
|
804
|
-
drop_input_cols = self._drop_input_cols
|
805
|
-
)
|
874
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
806
875
|
|
807
876
|
transform_handlers = ModelTransformerBuilder.build(
|
808
877
|
dataset=dataset,
|
@@ -815,7 +884,7 @@ class MLPRegressor(BaseTransformer):
|
|
815
884
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
816
885
|
inference_method=inference_method,
|
817
886
|
input_cols=self.input_cols,
|
818
|
-
expected_output_cols=
|
887
|
+
expected_output_cols=expected_output_cols,
|
819
888
|
**transform_kwargs
|
820
889
|
)
|
821
890
|
return output_df
|
@@ -841,30 +910,32 @@ class MLPRegressor(BaseTransformer):
|
|
841
910
|
Output dataset with results of the decision function for the samples in input dataset.
|
842
911
|
"""
|
843
912
|
super()._check_dataset_type(dataset)
|
844
|
-
inference_method="decision_function"
|
913
|
+
inference_method = "decision_function"
|
845
914
|
|
846
915
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
847
916
|
# are specific to the type of dataset used.
|
848
917
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
849
918
|
|
919
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
920
|
+
|
850
921
|
if isinstance(dataset, DataFrame):
|
851
|
-
self.
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
922
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
923
|
+
self._deps = self._get_dependencies()
|
924
|
+
assert isinstance(
|
925
|
+
dataset._session, Session
|
926
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
856
927
|
transform_kwargs = dict(
|
857
928
|
session=dataset._session,
|
858
929
|
dependencies=self._deps,
|
859
|
-
drop_input_cols
|
930
|
+
drop_input_cols=self._drop_input_cols,
|
860
931
|
expected_output_cols_type="float",
|
861
932
|
)
|
933
|
+
expected_output_cols = self._align_expected_output_names(
|
934
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
935
|
+
)
|
862
936
|
|
863
937
|
elif isinstance(dataset, pd.DataFrame):
|
864
|
-
transform_kwargs = dict(
|
865
|
-
snowpark_input_cols = self._snowpark_cols,
|
866
|
-
drop_input_cols = self._drop_input_cols
|
867
|
-
)
|
938
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
868
939
|
|
869
940
|
transform_handlers = ModelTransformerBuilder.build(
|
870
941
|
dataset=dataset,
|
@@ -877,7 +948,7 @@ class MLPRegressor(BaseTransformer):
|
|
877
948
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
878
949
|
inference_method=inference_method,
|
879
950
|
input_cols=self.input_cols,
|
880
|
-
expected_output_cols=
|
951
|
+
expected_output_cols=expected_output_cols,
|
881
952
|
**transform_kwargs
|
882
953
|
)
|
883
954
|
return output_df
|
@@ -906,17 +977,17 @@ class MLPRegressor(BaseTransformer):
|
|
906
977
|
Output dataset with probability of the sample for each class in the model.
|
907
978
|
"""
|
908
979
|
super()._check_dataset_type(dataset)
|
909
|
-
inference_method="score_samples"
|
980
|
+
inference_method = "score_samples"
|
910
981
|
|
911
982
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
912
983
|
# are specific to the type of dataset used.
|
913
984
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
914
985
|
|
986
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
987
|
+
|
915
988
|
if isinstance(dataset, DataFrame):
|
916
|
-
self.
|
917
|
-
|
918
|
-
inference_method=inference_method,
|
919
|
-
)
|
989
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
990
|
+
self._deps = self._get_dependencies()
|
920
991
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
921
992
|
transform_kwargs = dict(
|
922
993
|
session=dataset._session,
|
@@ -924,6 +995,9 @@ class MLPRegressor(BaseTransformer):
|
|
924
995
|
drop_input_cols = self._drop_input_cols,
|
925
996
|
expected_output_cols_type="float",
|
926
997
|
)
|
998
|
+
expected_output_cols = self._align_expected_output_names(
|
999
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1000
|
+
)
|
927
1001
|
|
928
1002
|
elif isinstance(dataset, pd.DataFrame):
|
929
1003
|
transform_kwargs = dict(
|
@@ -942,7 +1016,7 @@ class MLPRegressor(BaseTransformer):
|
|
942
1016
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
943
1017
|
inference_method=inference_method,
|
944
1018
|
input_cols=self.input_cols,
|
945
|
-
expected_output_cols=
|
1019
|
+
expected_output_cols=expected_output_cols,
|
946
1020
|
**transform_kwargs
|
947
1021
|
)
|
948
1022
|
return output_df
|
@@ -977,17 +1051,15 @@ class MLPRegressor(BaseTransformer):
|
|
977
1051
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
978
1052
|
|
979
1053
|
if isinstance(dataset, DataFrame):
|
980
|
-
self.
|
981
|
-
|
982
|
-
inference_method="score",
|
983
|
-
)
|
1054
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
1055
|
+
self._deps = self._get_dependencies()
|
984
1056
|
selected_cols = self._get_active_columns()
|
985
1057
|
if len(selected_cols) > 0:
|
986
1058
|
dataset = dataset.select(selected_cols)
|
987
1059
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
988
1060
|
transform_kwargs = dict(
|
989
1061
|
session=dataset._session,
|
990
|
-
dependencies=
|
1062
|
+
dependencies=self._deps,
|
991
1063
|
score_sproc_imports=['sklearn'],
|
992
1064
|
)
|
993
1065
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1052,11 +1124,8 @@ class MLPRegressor(BaseTransformer):
|
|
1052
1124
|
|
1053
1125
|
if isinstance(dataset, DataFrame):
|
1054
1126
|
|
1055
|
-
self.
|
1056
|
-
|
1057
|
-
inference_method=inference_method,
|
1058
|
-
|
1059
|
-
)
|
1127
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1128
|
+
self._deps = self._get_dependencies()
|
1060
1129
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
1061
1130
|
transform_kwargs = dict(
|
1062
1131
|
session = dataset._session,
|
@@ -1089,50 +1158,84 @@ class MLPRegressor(BaseTransformer):
|
|
1089
1158
|
)
|
1090
1159
|
return output_df
|
1091
1160
|
|
1161
|
+
|
1162
|
+
|
1163
|
+
def to_sklearn(self) -> Any:
|
1164
|
+
"""Get sklearn.neural_network.MLPRegressor object.
|
1165
|
+
"""
|
1166
|
+
if self._sklearn_object is None:
|
1167
|
+
self._sklearn_object = self._create_sklearn_object()
|
1168
|
+
return self._sklearn_object
|
1169
|
+
|
1170
|
+
def to_xgboost(self) -> Any:
|
1171
|
+
raise exceptions.SnowflakeMLException(
|
1172
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1173
|
+
original_exception=AttributeError(
|
1174
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1175
|
+
"to_xgboost()",
|
1176
|
+
"to_sklearn()"
|
1177
|
+
)
|
1178
|
+
),
|
1179
|
+
)
|
1180
|
+
|
1181
|
+
def to_lightgbm(self) -> Any:
|
1182
|
+
raise exceptions.SnowflakeMLException(
|
1183
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1184
|
+
original_exception=AttributeError(
|
1185
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1186
|
+
"to_lightgbm()",
|
1187
|
+
"to_sklearn()"
|
1188
|
+
)
|
1189
|
+
),
|
1190
|
+
)
|
1191
|
+
|
1192
|
+
def _get_dependencies(self) -> List[str]:
|
1193
|
+
return self._deps
|
1194
|
+
|
1092
1195
|
|
1093
|
-
def
|
1196
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1094
1197
|
self._model_signature_dict = dict()
|
1095
1198
|
|
1096
1199
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1097
1200
|
|
1098
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1201
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1099
1202
|
outputs: List[BaseFeatureSpec] = []
|
1100
1203
|
if hasattr(self, "predict"):
|
1101
1204
|
# keep mypy happy
|
1102
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1205
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1103
1206
|
# For classifier, the type of predict is the same as the type of label
|
1104
|
-
if self._sklearn_object._estimator_type ==
|
1105
|
-
|
1207
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1208
|
+
# label columns is the desired type for output
|
1106
1209
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1107
1210
|
# rename the output columns
|
1108
1211
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1109
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1110
|
-
|
1111
|
-
|
1212
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1213
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1214
|
+
)
|
1112
1215
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1113
1216
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1114
|
-
# Clusterer returns int64 cluster labels.
|
1217
|
+
# Clusterer returns int64 cluster labels.
|
1115
1218
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1116
1219
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1117
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1220
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1221
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1222
|
+
)
|
1223
|
+
|
1121
1224
|
# For regressor, the type of predict is float64
|
1122
|
-
elif self._sklearn_object._estimator_type ==
|
1225
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1123
1226
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1124
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1227
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1228
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1229
|
+
)
|
1230
|
+
|
1128
1231
|
for prob_func in PROB_FUNCTIONS:
|
1129
1232
|
if hasattr(self, prob_func):
|
1130
1233
|
output_cols_prefix: str = f"{prob_func}_"
|
1131
1234
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1132
1235
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1133
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1134
|
-
|
1135
|
-
|
1236
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1237
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1238
|
+
)
|
1136
1239
|
|
1137
1240
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1138
1241
|
items = list(self._model_signature_dict.items())
|
@@ -1145,10 +1248,10 @@ class MLPRegressor(BaseTransformer):
|
|
1145
1248
|
"""Returns model signature of current class.
|
1146
1249
|
|
1147
1250
|
Raises:
|
1148
|
-
|
1251
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1149
1252
|
|
1150
1253
|
Returns:
|
1151
|
-
Dict
|
1254
|
+
Dict with each method and its input output signature
|
1152
1255
|
"""
|
1153
1256
|
if self._model_signature_dict is None:
|
1154
1257
|
raise exceptions.SnowflakeMLException(
|
@@ -1156,35 +1259,3 @@ class MLPRegressor(BaseTransformer):
|
|
1156
1259
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1157
1260
|
)
|
1158
1261
|
return self._model_signature_dict
|
1159
|
-
|
1160
|
-
def to_sklearn(self) -> Any:
|
1161
|
-
"""Get sklearn.neural_network.MLPRegressor object.
|
1162
|
-
"""
|
1163
|
-
if self._sklearn_object is None:
|
1164
|
-
self._sklearn_object = self._create_sklearn_object()
|
1165
|
-
return self._sklearn_object
|
1166
|
-
|
1167
|
-
def to_xgboost(self) -> Any:
|
1168
|
-
raise exceptions.SnowflakeMLException(
|
1169
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1170
|
-
original_exception=AttributeError(
|
1171
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1172
|
-
"to_xgboost()",
|
1173
|
-
"to_sklearn()"
|
1174
|
-
)
|
1175
|
-
),
|
1176
|
-
)
|
1177
|
-
|
1178
|
-
def to_lightgbm(self) -> Any:
|
1179
|
-
raise exceptions.SnowflakeMLException(
|
1180
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1181
|
-
original_exception=AttributeError(
|
1182
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1183
|
-
"to_lightgbm()",
|
1184
|
-
"to_sklearn()"
|
1185
|
-
)
|
1186
|
-
),
|
1187
|
-
)
|
1188
|
-
|
1189
|
-
def _get_dependencies(self) -> List[str]:
|
1190
|
-
return self._deps
|