snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -383,12 +382,7 @@ class MLPRegressor(BaseTransformer):
|
|
383
382
|
)
|
384
383
|
return selected_cols
|
385
384
|
|
386
|
-
|
387
|
-
project=_PROJECT,
|
388
|
-
subproject=_SUBPROJECT,
|
389
|
-
custom_tags=dict([("autogen", True)]),
|
390
|
-
)
|
391
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MLPRegressor":
|
385
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MLPRegressor":
|
392
386
|
"""Fit the model to data matrix X and target(s) y
|
393
387
|
For more details on this function, see [sklearn.neural_network.MLPRegressor.fit]
|
394
388
|
(https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor.fit)
|
@@ -415,12 +409,14 @@ class MLPRegressor(BaseTransformer):
|
|
415
409
|
|
416
410
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
417
411
|
|
418
|
-
|
412
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
419
413
|
if SNOWML_SPROC_ENV in os.environ:
|
420
414
|
statement_params = telemetry.get_function_usage_statement_params(
|
421
415
|
project=_PROJECT,
|
422
416
|
subproject=_SUBPROJECT,
|
423
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
417
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
418
|
+
inspect.currentframe(), MLPRegressor.__class__.__name__
|
419
|
+
),
|
424
420
|
api_calls=[Session.call],
|
425
421
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
426
422
|
)
|
@@ -441,7 +437,7 @@ class MLPRegressor(BaseTransformer):
|
|
441
437
|
)
|
442
438
|
self._sklearn_object = model_trainer.train()
|
443
439
|
self._is_fitted = True
|
444
|
-
self.
|
440
|
+
self._generate_model_signatures(dataset)
|
445
441
|
return self
|
446
442
|
|
447
443
|
def _batch_inference_validate_snowpark(
|
@@ -517,7 +513,9 @@ class MLPRegressor(BaseTransformer):
|
|
517
513
|
# when it is classifier, infer the datatype from label columns
|
518
514
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
519
515
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
520
|
-
label_cols_signatures = [
|
516
|
+
label_cols_signatures = [
|
517
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
518
|
+
]
|
521
519
|
if len(label_cols_signatures) == 0:
|
522
520
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
523
521
|
raise exceptions.SnowflakeMLException(
|
@@ -525,25 +523,22 @@ class MLPRegressor(BaseTransformer):
|
|
525
523
|
original_exception=ValueError(error_str),
|
526
524
|
)
|
527
525
|
|
528
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
529
|
-
label_cols_signatures[0].as_snowpark_type()
|
530
|
-
)
|
526
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
531
527
|
|
532
528
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
533
|
-
assert isinstance(
|
529
|
+
assert isinstance(
|
530
|
+
dataset._session, Session
|
531
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
534
532
|
|
535
533
|
transform_kwargs = dict(
|
536
|
-
session
|
537
|
-
dependencies
|
538
|
-
drop_input_cols
|
539
|
-
expected_output_cols_type
|
534
|
+
session=dataset._session,
|
535
|
+
dependencies=self._deps,
|
536
|
+
drop_input_cols=self._drop_input_cols,
|
537
|
+
expected_output_cols_type=expected_type_inferred,
|
540
538
|
)
|
541
539
|
|
542
540
|
elif isinstance(dataset, pd.DataFrame):
|
543
|
-
transform_kwargs = dict(
|
544
|
-
snowpark_input_cols = self._snowpark_cols,
|
545
|
-
drop_input_cols = self._drop_input_cols
|
546
|
-
)
|
541
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
547
542
|
|
548
543
|
transform_handlers = ModelTransformerBuilder.build(
|
549
544
|
dataset=dataset,
|
@@ -583,7 +578,7 @@ class MLPRegressor(BaseTransformer):
|
|
583
578
|
Transformed dataset.
|
584
579
|
"""
|
585
580
|
super()._check_dataset_type(dataset)
|
586
|
-
inference_method="transform"
|
581
|
+
inference_method = "transform"
|
587
582
|
|
588
583
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
589
584
|
# are specific to the type of dataset used.
|
@@ -620,17 +615,14 @@ class MLPRegressor(BaseTransformer):
|
|
620
615
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
621
616
|
|
622
617
|
transform_kwargs = dict(
|
623
|
-
session
|
624
|
-
dependencies
|
625
|
-
drop_input_cols
|
626
|
-
expected_output_cols_type
|
618
|
+
session=dataset._session,
|
619
|
+
dependencies=self._deps,
|
620
|
+
drop_input_cols=self._drop_input_cols,
|
621
|
+
expected_output_cols_type=expected_dtype,
|
627
622
|
)
|
628
623
|
|
629
624
|
elif isinstance(dataset, pd.DataFrame):
|
630
|
-
transform_kwargs = dict(
|
631
|
-
snowpark_input_cols = self._snowpark_cols,
|
632
|
-
drop_input_cols = self._drop_input_cols
|
633
|
-
)
|
625
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
634
626
|
|
635
627
|
transform_handlers = ModelTransformerBuilder.build(
|
636
628
|
dataset=dataset,
|
@@ -649,7 +641,11 @@ class MLPRegressor(BaseTransformer):
|
|
649
641
|
return output_df
|
650
642
|
|
651
643
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
652
|
-
def fit_predict(
|
644
|
+
def fit_predict(
|
645
|
+
self,
|
646
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
647
|
+
output_cols_prefix: str = "fit_predict_",
|
648
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
653
649
|
""" Method not supported for this class.
|
654
650
|
|
655
651
|
|
@@ -674,7 +670,9 @@ class MLPRegressor(BaseTransformer):
|
|
674
670
|
)
|
675
671
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
676
672
|
drop_input_cols=self._drop_input_cols,
|
677
|
-
expected_output_cols_list=
|
673
|
+
expected_output_cols_list=(
|
674
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
675
|
+
),
|
678
676
|
)
|
679
677
|
self._sklearn_object = fitted_estimator
|
680
678
|
self._is_fitted = True
|
@@ -691,6 +689,62 @@ class MLPRegressor(BaseTransformer):
|
|
691
689
|
assert self._sklearn_object is not None
|
692
690
|
return self._sklearn_object.embedding_
|
693
691
|
|
692
|
+
|
693
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
694
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
695
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
696
|
+
"""
|
697
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
698
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
699
|
+
if output_cols:
|
700
|
+
output_cols = [
|
701
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
702
|
+
for c in output_cols
|
703
|
+
]
|
704
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
705
|
+
output_cols = [output_cols_prefix]
|
706
|
+
elif self._sklearn_object is not None:
|
707
|
+
classes = self._sklearn_object.classes_
|
708
|
+
if isinstance(classes, numpy.ndarray):
|
709
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
710
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
711
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
712
|
+
output_cols = []
|
713
|
+
for i, cl in enumerate(classes):
|
714
|
+
# For binary classification, there is only one output column for each class
|
715
|
+
# ndarray as the two classes are complementary.
|
716
|
+
if len(cl) == 2:
|
717
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
718
|
+
else:
|
719
|
+
output_cols.extend([
|
720
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
721
|
+
])
|
722
|
+
else:
|
723
|
+
output_cols = []
|
724
|
+
|
725
|
+
# Make sure column names are valid snowflake identifiers.
|
726
|
+
assert output_cols is not None # Make MyPy happy
|
727
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
728
|
+
|
729
|
+
return rv
|
730
|
+
|
731
|
+
def _align_expected_output_names(
|
732
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
733
|
+
) -> List[str]:
|
734
|
+
# in case the inferred output column names dimension is different
|
735
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
736
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
737
|
+
output_df_columns = list(output_df_pd.columns)
|
738
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
739
|
+
if self.sample_weight_col:
|
740
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
741
|
+
# if the dimension of inferred output column names is correct; use it
|
742
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
743
|
+
return expected_output_cols_list
|
744
|
+
# otherwise, use the sklearn estimator's output
|
745
|
+
else:
|
746
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
747
|
+
|
694
748
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
695
749
|
@telemetry.send_api_usage_telemetry(
|
696
750
|
project=_PROJECT,
|
@@ -721,24 +775,28 @@ class MLPRegressor(BaseTransformer):
|
|
721
775
|
# are specific to the type of dataset used.
|
722
776
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
723
777
|
|
778
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
779
|
+
|
724
780
|
if isinstance(dataset, DataFrame):
|
725
781
|
self._deps = self._batch_inference_validate_snowpark(
|
726
782
|
dataset=dataset,
|
727
783
|
inference_method=inference_method,
|
728
784
|
)
|
729
|
-
assert isinstance(
|
785
|
+
assert isinstance(
|
786
|
+
dataset._session, Session
|
787
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
730
788
|
transform_kwargs = dict(
|
731
789
|
session=dataset._session,
|
732
790
|
dependencies=self._deps,
|
733
|
-
drop_input_cols
|
791
|
+
drop_input_cols=self._drop_input_cols,
|
734
792
|
expected_output_cols_type="float",
|
735
793
|
)
|
794
|
+
expected_output_cols = self._align_expected_output_names(
|
795
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
796
|
+
)
|
736
797
|
|
737
798
|
elif isinstance(dataset, pd.DataFrame):
|
738
|
-
transform_kwargs = dict(
|
739
|
-
snowpark_input_cols = self._snowpark_cols,
|
740
|
-
drop_input_cols = self._drop_input_cols
|
741
|
-
)
|
799
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
742
800
|
|
743
801
|
transform_handlers = ModelTransformerBuilder.build(
|
744
802
|
dataset=dataset,
|
@@ -750,7 +808,7 @@ class MLPRegressor(BaseTransformer):
|
|
750
808
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
751
809
|
inference_method=inference_method,
|
752
810
|
input_cols=self.input_cols,
|
753
|
-
expected_output_cols=
|
811
|
+
expected_output_cols=expected_output_cols,
|
754
812
|
**transform_kwargs
|
755
813
|
)
|
756
814
|
return output_df
|
@@ -780,7 +838,8 @@ class MLPRegressor(BaseTransformer):
|
|
780
838
|
Output dataset with log probability of the sample for each class in the model.
|
781
839
|
"""
|
782
840
|
super()._check_dataset_type(dataset)
|
783
|
-
inference_method="predict_log_proba"
|
841
|
+
inference_method = "predict_log_proba"
|
842
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
784
843
|
|
785
844
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
786
845
|
# are specific to the type of dataset used.
|
@@ -791,18 +850,20 @@ class MLPRegressor(BaseTransformer):
|
|
791
850
|
dataset=dataset,
|
792
851
|
inference_method=inference_method,
|
793
852
|
)
|
794
|
-
assert isinstance(
|
853
|
+
assert isinstance(
|
854
|
+
dataset._session, Session
|
855
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
795
856
|
transform_kwargs = dict(
|
796
857
|
session=dataset._session,
|
797
858
|
dependencies=self._deps,
|
798
|
-
drop_input_cols
|
859
|
+
drop_input_cols=self._drop_input_cols,
|
799
860
|
expected_output_cols_type="float",
|
800
861
|
)
|
862
|
+
expected_output_cols = self._align_expected_output_names(
|
863
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
864
|
+
)
|
801
865
|
elif isinstance(dataset, pd.DataFrame):
|
802
|
-
transform_kwargs = dict(
|
803
|
-
snowpark_input_cols = self._snowpark_cols,
|
804
|
-
drop_input_cols = self._drop_input_cols
|
805
|
-
)
|
866
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
806
867
|
|
807
868
|
transform_handlers = ModelTransformerBuilder.build(
|
808
869
|
dataset=dataset,
|
@@ -815,7 +876,7 @@ class MLPRegressor(BaseTransformer):
|
|
815
876
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
816
877
|
inference_method=inference_method,
|
817
878
|
input_cols=self.input_cols,
|
818
|
-
expected_output_cols=
|
879
|
+
expected_output_cols=expected_output_cols,
|
819
880
|
**transform_kwargs
|
820
881
|
)
|
821
882
|
return output_df
|
@@ -841,30 +902,34 @@ class MLPRegressor(BaseTransformer):
|
|
841
902
|
Output dataset with results of the decision function for the samples in input dataset.
|
842
903
|
"""
|
843
904
|
super()._check_dataset_type(dataset)
|
844
|
-
inference_method="decision_function"
|
905
|
+
inference_method = "decision_function"
|
845
906
|
|
846
907
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
847
908
|
# are specific to the type of dataset used.
|
848
909
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
849
910
|
|
911
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
912
|
+
|
850
913
|
if isinstance(dataset, DataFrame):
|
851
914
|
self._deps = self._batch_inference_validate_snowpark(
|
852
915
|
dataset=dataset,
|
853
916
|
inference_method=inference_method,
|
854
917
|
)
|
855
|
-
assert isinstance(
|
918
|
+
assert isinstance(
|
919
|
+
dataset._session, Session
|
920
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
856
921
|
transform_kwargs = dict(
|
857
922
|
session=dataset._session,
|
858
923
|
dependencies=self._deps,
|
859
|
-
drop_input_cols
|
924
|
+
drop_input_cols=self._drop_input_cols,
|
860
925
|
expected_output_cols_type="float",
|
861
926
|
)
|
927
|
+
expected_output_cols = self._align_expected_output_names(
|
928
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
929
|
+
)
|
862
930
|
|
863
931
|
elif isinstance(dataset, pd.DataFrame):
|
864
|
-
transform_kwargs = dict(
|
865
|
-
snowpark_input_cols = self._snowpark_cols,
|
866
|
-
drop_input_cols = self._drop_input_cols
|
867
|
-
)
|
932
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
868
933
|
|
869
934
|
transform_handlers = ModelTransformerBuilder.build(
|
870
935
|
dataset=dataset,
|
@@ -877,7 +942,7 @@ class MLPRegressor(BaseTransformer):
|
|
877
942
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
878
943
|
inference_method=inference_method,
|
879
944
|
input_cols=self.input_cols,
|
880
|
-
expected_output_cols=
|
945
|
+
expected_output_cols=expected_output_cols,
|
881
946
|
**transform_kwargs
|
882
947
|
)
|
883
948
|
return output_df
|
@@ -906,12 +971,14 @@ class MLPRegressor(BaseTransformer):
|
|
906
971
|
Output dataset with probability of the sample for each class in the model.
|
907
972
|
"""
|
908
973
|
super()._check_dataset_type(dataset)
|
909
|
-
inference_method="score_samples"
|
974
|
+
inference_method = "score_samples"
|
910
975
|
|
911
976
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
912
977
|
# are specific to the type of dataset used.
|
913
978
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
914
979
|
|
980
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
981
|
+
|
915
982
|
if isinstance(dataset, DataFrame):
|
916
983
|
self._deps = self._batch_inference_validate_snowpark(
|
917
984
|
dataset=dataset,
|
@@ -924,6 +991,9 @@ class MLPRegressor(BaseTransformer):
|
|
924
991
|
drop_input_cols = self._drop_input_cols,
|
925
992
|
expected_output_cols_type="float",
|
926
993
|
)
|
994
|
+
expected_output_cols = self._align_expected_output_names(
|
995
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
996
|
+
)
|
927
997
|
|
928
998
|
elif isinstance(dataset, pd.DataFrame):
|
929
999
|
transform_kwargs = dict(
|
@@ -942,7 +1012,7 @@ class MLPRegressor(BaseTransformer):
|
|
942
1012
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
943
1013
|
inference_method=inference_method,
|
944
1014
|
input_cols=self.input_cols,
|
945
|
-
expected_output_cols=
|
1015
|
+
expected_output_cols=expected_output_cols,
|
946
1016
|
**transform_kwargs
|
947
1017
|
)
|
948
1018
|
return output_df
|
@@ -1089,50 +1159,84 @@ class MLPRegressor(BaseTransformer):
|
|
1089
1159
|
)
|
1090
1160
|
return output_df
|
1091
1161
|
|
1162
|
+
|
1163
|
+
|
1164
|
+
def to_sklearn(self) -> Any:
|
1165
|
+
"""Get sklearn.neural_network.MLPRegressor object.
|
1166
|
+
"""
|
1167
|
+
if self._sklearn_object is None:
|
1168
|
+
self._sklearn_object = self._create_sklearn_object()
|
1169
|
+
return self._sklearn_object
|
1170
|
+
|
1171
|
+
def to_xgboost(self) -> Any:
|
1172
|
+
raise exceptions.SnowflakeMLException(
|
1173
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1174
|
+
original_exception=AttributeError(
|
1175
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1176
|
+
"to_xgboost()",
|
1177
|
+
"to_sklearn()"
|
1178
|
+
)
|
1179
|
+
),
|
1180
|
+
)
|
1181
|
+
|
1182
|
+
def to_lightgbm(self) -> Any:
|
1183
|
+
raise exceptions.SnowflakeMLException(
|
1184
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1185
|
+
original_exception=AttributeError(
|
1186
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1187
|
+
"to_lightgbm()",
|
1188
|
+
"to_sklearn()"
|
1189
|
+
)
|
1190
|
+
),
|
1191
|
+
)
|
1092
1192
|
|
1093
|
-
def
|
1193
|
+
def _get_dependencies(self) -> List[str]:
|
1194
|
+
return self._deps
|
1195
|
+
|
1196
|
+
|
1197
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1094
1198
|
self._model_signature_dict = dict()
|
1095
1199
|
|
1096
1200
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1097
1201
|
|
1098
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1202
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1099
1203
|
outputs: List[BaseFeatureSpec] = []
|
1100
1204
|
if hasattr(self, "predict"):
|
1101
1205
|
# keep mypy happy
|
1102
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1206
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1103
1207
|
# For classifier, the type of predict is the same as the type of label
|
1104
|
-
if self._sklearn_object._estimator_type ==
|
1105
|
-
|
1208
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1209
|
+
# label columns is the desired type for output
|
1106
1210
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1107
1211
|
# rename the output columns
|
1108
1212
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1109
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1110
|
-
|
1111
|
-
|
1213
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1214
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1215
|
+
)
|
1112
1216
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1113
1217
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1114
|
-
# Clusterer returns int64 cluster labels.
|
1218
|
+
# Clusterer returns int64 cluster labels.
|
1115
1219
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1116
1220
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1117
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1221
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1222
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1223
|
+
)
|
1224
|
+
|
1121
1225
|
# For regressor, the type of predict is float64
|
1122
|
-
elif self._sklearn_object._estimator_type ==
|
1226
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1123
1227
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1124
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1228
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1229
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1230
|
+
)
|
1231
|
+
|
1128
1232
|
for prob_func in PROB_FUNCTIONS:
|
1129
1233
|
if hasattr(self, prob_func):
|
1130
1234
|
output_cols_prefix: str = f"{prob_func}_"
|
1131
1235
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1132
1236
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1133
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1134
|
-
|
1135
|
-
|
1237
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1238
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1239
|
+
)
|
1136
1240
|
|
1137
1241
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1138
1242
|
items = list(self._model_signature_dict.items())
|
@@ -1145,10 +1249,10 @@ class MLPRegressor(BaseTransformer):
|
|
1145
1249
|
"""Returns model signature of current class.
|
1146
1250
|
|
1147
1251
|
Raises:
|
1148
|
-
|
1252
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1149
1253
|
|
1150
1254
|
Returns:
|
1151
|
-
Dict
|
1255
|
+
Dict with each method and its input output signature
|
1152
1256
|
"""
|
1153
1257
|
if self._model_signature_dict is None:
|
1154
1258
|
raise exceptions.SnowflakeMLException(
|
@@ -1156,35 +1260,3 @@ class MLPRegressor(BaseTransformer):
|
|
1156
1260
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1157
1261
|
)
|
1158
1262
|
return self._model_signature_dict
|
1159
|
-
|
1160
|
-
def to_sklearn(self) -> Any:
|
1161
|
-
"""Get sklearn.neural_network.MLPRegressor object.
|
1162
|
-
"""
|
1163
|
-
if self._sklearn_object is None:
|
1164
|
-
self._sklearn_object = self._create_sklearn_object()
|
1165
|
-
return self._sklearn_object
|
1166
|
-
|
1167
|
-
def to_xgboost(self) -> Any:
|
1168
|
-
raise exceptions.SnowflakeMLException(
|
1169
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1170
|
-
original_exception=AttributeError(
|
1171
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1172
|
-
"to_xgboost()",
|
1173
|
-
"to_sklearn()"
|
1174
|
-
)
|
1175
|
-
),
|
1176
|
-
)
|
1177
|
-
|
1178
|
-
def to_lightgbm(self) -> Any:
|
1179
|
-
raise exceptions.SnowflakeMLException(
|
1180
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1181
|
-
original_exception=AttributeError(
|
1182
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1183
|
-
"to_lightgbm()",
|
1184
|
-
"to_sklearn()"
|
1185
|
-
)
|
1186
|
-
),
|
1187
|
-
)
|
1188
|
-
|
1189
|
-
def _get_dependencies(self) -> List[str]:
|
1190
|
-
return self._deps
|
@@ -254,7 +254,7 @@ class Pipeline(base.BaseTransformer):
|
|
254
254
|
step_name=estimator[0], all_cols=all_cols, input_cols=estimator[1].get_input_cols()
|
255
255
|
)
|
256
256
|
|
257
|
-
self.
|
257
|
+
self._generate_model_signatures(dataset=dataset)
|
258
258
|
self._is_fitted = True
|
259
259
|
return self
|
260
260
|
|
@@ -328,7 +328,7 @@ class Pipeline(base.BaseTransformer):
|
|
328
328
|
res = estimator[1].fit(transformed_dataset).transform(transformed_dataset)
|
329
329
|
return res
|
330
330
|
|
331
|
-
self.
|
331
|
+
self._generate_model_signatures(dataset=dataset)
|
332
332
|
self._is_fitted = True
|
333
333
|
return transformed_dataset
|
334
334
|
|
@@ -371,7 +371,7 @@ class Pipeline(base.BaseTransformer):
|
|
371
371
|
else:
|
372
372
|
transformed_dataset = estimator[1].fit(transformed_dataset).predict(transformed_dataset)
|
373
373
|
|
374
|
-
self.
|
374
|
+
self._generate_model_signatures(dataset=dataset)
|
375
375
|
self._is_fitted = True
|
376
376
|
return transformed_dataset
|
377
377
|
|
@@ -611,7 +611,7 @@ class Pipeline(base.BaseTransformer):
|
|
611
611
|
def _get_dependencies(self) -> List[str]:
|
612
612
|
return self._deps
|
613
613
|
|
614
|
-
def
|
614
|
+
def _generate_model_signatures(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> None:
|
615
615
|
self._model_signature_dict = dict()
|
616
616
|
|
617
617
|
input_columns = self._get_sanitized_list_of_columns(dataset.columns)
|
@@ -86,11 +86,7 @@ class Binarizer(base.BaseTransformer):
|
|
86
86
|
"""
|
87
87
|
super()._reset()
|
88
88
|
|
89
|
-
|
90
|
-
project=base.PROJECT,
|
91
|
-
subproject=base.SUBPROJECT,
|
92
|
-
)
|
93
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "Binarizer":
|
89
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "Binarizer":
|
94
90
|
"""
|
95
91
|
This is a stateless transformer, so there is nothing to fit. Validates the transformer arguments.
|
96
92
|
Returns the transformer instance.
|
@@ -146,11 +146,7 @@ class KBinsDiscretizer(base.BaseTransformer):
|
|
146
146
|
self.bin_edges_: Optional[npt.NDArray[np.float32]] = None
|
147
147
|
self.n_bins_: Optional[npt.NDArray[np.int32]] = None
|
148
148
|
|
149
|
-
|
150
|
-
project=base.PROJECT,
|
151
|
-
subproject=base.SUBPROJECT,
|
152
|
-
)
|
153
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> KBinsDiscretizer:
|
149
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> KBinsDiscretizer:
|
154
150
|
"""
|
155
151
|
Fit KBinsDiscretizer with dataset.
|
156
152
|
|
@@ -87,11 +87,7 @@ class LabelEncoder(base.BaseTransformer):
|
|
87
87
|
self._ordinal_encoder = None
|
88
88
|
self.classes_ = None
|
89
89
|
|
90
|
-
|
91
|
-
project=base.PROJECT,
|
92
|
-
subproject=base.SUBPROJECT,
|
93
|
-
)
|
94
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "LabelEncoder":
|
90
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "LabelEncoder":
|
95
91
|
"""
|
96
92
|
Fit label encoder with label column in dataset.
|
97
93
|
|
@@ -106,11 +106,7 @@ class MaxAbsScaler(base.BaseTransformer):
|
|
106
106
|
self.scale_ = {}
|
107
107
|
self.max_abs_ = {}
|
108
108
|
|
109
|
-
|
110
|
-
project=base.PROJECT,
|
111
|
-
subproject=base.SUBPROJECT,
|
112
|
-
)
|
113
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "MaxAbsScaler":
|
109
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "MaxAbsScaler":
|
114
110
|
"""
|
115
111
|
Compute the maximum absolute value to be used for later scaling.
|
116
112
|
|