snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +4 -0
- snowflake/cortex/_classify_text.py +2 -2
- snowflake/cortex/_embed_text_1024.py +37 -0
- snowflake/cortex/_embed_text_768.py +37 -0
- snowflake/cortex/_extract_answer.py +2 -2
- snowflake/cortex/_sentiment.py +2 -2
- snowflake/cortex/_summarize.py +2 -2
- snowflake/cortex/_translate.py +2 -2
- snowflake/cortex/_util.py +4 -4
- snowflake/ml/_internal/env_utils.py +5 -5
- snowflake/ml/_internal/exceptions/error_codes.py +2 -0
- snowflake/ml/_internal/telemetry.py +142 -20
- snowflake/ml/_internal/utils/db_utils.py +50 -0
- snowflake/ml/_internal/utils/identifier.py +48 -11
- snowflake/ml/_internal/utils/service_logger.py +63 -0
- snowflake/ml/_internal/utils/snowflake_env.py +23 -13
- snowflake/ml/_internal/utils/sql_identifier.py +26 -2
- snowflake/ml/_internal/utils/table_manager.py +19 -1
- snowflake/ml/data/_internal/arrow_ingestor.py +1 -11
- snowflake/ml/data/data_connector.py +33 -7
- snowflake/ml/data/ingestor_utils.py +20 -10
- snowflake/ml/data/torch_utils.py +68 -0
- snowflake/ml/dataset/dataset.py +1 -3
- snowflake/ml/feature_store/access_manager.py +3 -3
- snowflake/ml/feature_store/feature_store.py +60 -19
- snowflake/ml/feature_store/feature_view.py +84 -30
- snowflake/ml/fileset/embedded_stage_fs.py +1 -1
- snowflake/ml/fileset/fileset.py +1 -1
- snowflake/ml/fileset/sfcfs.py +9 -3
- snowflake/ml/fileset/stage_fs.py +2 -1
- snowflake/ml/lineage/lineage_node.py +7 -2
- snowflake/ml/model/__init__.py +1 -2
- snowflake/ml/model/_client/model/model_version_impl.py +96 -12
- snowflake/ml/model/_client/ops/model_ops.py +124 -6
- snowflake/ml/model/_client/ops/service_ops.py +309 -9
- snowflake/ml/model/_client/service/model_deployment_spec.py +8 -5
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -2
- snowflake/ml/model/_client/sql/_base.py +5 -0
- snowflake/ml/model/_client/sql/model.py +1 -0
- snowflake/ml/model/_client/sql/model_version.py +9 -5
- snowflake/ml/model/_client/sql/service.py +121 -20
- snowflake/ml/model/_model_composer/model_composer.py +11 -39
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -11
- snowflake/ml/model/_packager/model_env/model_env.py +4 -38
- snowflake/ml/model/_packager/model_handlers/_utils.py +134 -28
- snowflake/ml/model/_packager/model_handlers/catboost.py +31 -30
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +26 -18
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +31 -58
- snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -5
- snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +169 -0
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +15 -8
- snowflake/ml/model/_packager/model_handlers/sklearn.py +56 -60
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +141 -9
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +63 -48
- snowflake/ml/model/_packager/model_meta/model_meta.py +16 -42
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +1 -14
- snowflake/ml/model/_packager/model_packager.py +14 -8
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +11 -0
- snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -2
- snowflake/ml/model/_signatures/utils.py +9 -0
- snowflake/ml/model/type_hints.py +12 -145
- snowflake/ml/modeling/_internal/constants.py +1 -0
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
- snowflake/ml/modeling/_internal/model_specifications.py +2 -0
- snowflake/ml/modeling/_internal/model_trainer.py +1 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -4
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +130 -166
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +0 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +61 -21
- snowflake/ml/modeling/cluster/affinity_propagation.py +61 -21
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +61 -21
- snowflake/ml/modeling/cluster/birch.py +61 -21
- snowflake/ml/modeling/cluster/bisecting_k_means.py +61 -21
- snowflake/ml/modeling/cluster/dbscan.py +61 -21
- snowflake/ml/modeling/cluster/feature_agglomeration.py +61 -21
- snowflake/ml/modeling/cluster/k_means.py +61 -21
- snowflake/ml/modeling/cluster/mean_shift.py +61 -21
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +61 -21
- snowflake/ml/modeling/cluster/optics.py +61 -21
- snowflake/ml/modeling/cluster/spectral_biclustering.py +61 -21
- snowflake/ml/modeling/cluster/spectral_clustering.py +61 -21
- snowflake/ml/modeling/cluster/spectral_coclustering.py +61 -21
- snowflake/ml/modeling/compose/column_transformer.py +61 -21
- snowflake/ml/modeling/compose/transformed_target_regressor.py +61 -21
- snowflake/ml/modeling/covariance/elliptic_envelope.py +61 -21
- snowflake/ml/modeling/covariance/empirical_covariance.py +61 -21
- snowflake/ml/modeling/covariance/graphical_lasso.py +61 -21
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +61 -21
- snowflake/ml/modeling/covariance/ledoit_wolf.py +61 -21
- snowflake/ml/modeling/covariance/min_cov_det.py +61 -21
- snowflake/ml/modeling/covariance/oas.py +61 -21
- snowflake/ml/modeling/covariance/shrunk_covariance.py +61 -21
- snowflake/ml/modeling/decomposition/dictionary_learning.py +61 -21
- snowflake/ml/modeling/decomposition/factor_analysis.py +61 -21
- snowflake/ml/modeling/decomposition/fast_ica.py +61 -21
- snowflake/ml/modeling/decomposition/incremental_pca.py +61 -21
- snowflake/ml/modeling/decomposition/kernel_pca.py +61 -21
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +61 -21
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +61 -21
- snowflake/ml/modeling/decomposition/pca.py +61 -21
- snowflake/ml/modeling/decomposition/sparse_pca.py +61 -21
- snowflake/ml/modeling/decomposition/truncated_svd.py +61 -21
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +61 -21
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +61 -21
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +61 -21
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +61 -21
- snowflake/ml/modeling/ensemble/bagging_classifier.py +61 -21
- snowflake/ml/modeling/ensemble/bagging_regressor.py +61 -21
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +61 -21
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +61 -21
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +61 -21
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +61 -21
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +61 -21
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +61 -21
- snowflake/ml/modeling/ensemble/isolation_forest.py +61 -21
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +61 -21
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +61 -21
- snowflake/ml/modeling/ensemble/stacking_regressor.py +61 -21
- snowflake/ml/modeling/ensemble/voting_classifier.py +61 -21
- snowflake/ml/modeling/ensemble/voting_regressor.py +61 -21
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +61 -21
- snowflake/ml/modeling/feature_selection/select_fdr.py +61 -21
- snowflake/ml/modeling/feature_selection/select_fpr.py +61 -21
- snowflake/ml/modeling/feature_selection/select_fwe.py +61 -21
- snowflake/ml/modeling/feature_selection/select_k_best.py +61 -21
- snowflake/ml/modeling/feature_selection/select_percentile.py +61 -21
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +61 -21
- snowflake/ml/modeling/feature_selection/variance_threshold.py +61 -21
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +61 -21
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +61 -21
- snowflake/ml/modeling/impute/iterative_imputer.py +61 -21
- snowflake/ml/modeling/impute/knn_imputer.py +61 -21
- snowflake/ml/modeling/impute/missing_indicator.py +61 -21
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +61 -21
- snowflake/ml/modeling/kernel_approximation/nystroem.py +61 -21
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +61 -21
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +61 -21
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +61 -21
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +61 -21
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +61 -21
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/ard_regression.py +61 -21
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +61 -21
- snowflake/ml/modeling/linear_model/elastic_net.py +61 -21
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +61 -21
- snowflake/ml/modeling/linear_model/gamma_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/huber_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/lars.py +61 -21
- snowflake/ml/modeling/linear_model/lars_cv.py +61 -21
- snowflake/ml/modeling/linear_model/lasso.py +61 -21
- snowflake/ml/modeling/linear_model/lasso_cv.py +61 -21
- snowflake/ml/modeling/linear_model/lasso_lars.py +61 -21
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +61 -21
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +61 -21
- snowflake/ml/modeling/linear_model/linear_regression.py +61 -21
- snowflake/ml/modeling/linear_model/logistic_regression.py +61 -21
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +61 -21
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +61 -21
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +61 -21
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +61 -21
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +61 -21
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +61 -21
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +61 -21
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/perceptron.py +61 -21
- snowflake/ml/modeling/linear_model/poisson_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/ransac_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/ridge.py +61 -21
- snowflake/ml/modeling/linear_model/ridge_classifier.py +61 -21
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +61 -21
- snowflake/ml/modeling/linear_model/ridge_cv.py +61 -21
- snowflake/ml/modeling/linear_model/sgd_classifier.py +61 -21
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +61 -21
- snowflake/ml/modeling/linear_model/sgd_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +61 -21
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +61 -21
- snowflake/ml/modeling/manifold/isomap.py +61 -21
- snowflake/ml/modeling/manifold/mds.py +61 -21
- snowflake/ml/modeling/manifold/spectral_embedding.py +61 -21
- snowflake/ml/modeling/manifold/tsne.py +61 -21
- snowflake/ml/modeling/metrics/metrics_utils.py +2 -2
- snowflake/ml/modeling/metrics/ranking.py +0 -3
- snowflake/ml/modeling/metrics/regression.py +0 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +61 -21
- snowflake/ml/modeling/mixture/gaussian_mixture.py +61 -21
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +61 -21
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +61 -21
- snowflake/ml/modeling/multiclass/output_code_classifier.py +61 -21
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +61 -21
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +61 -21
- snowflake/ml/modeling/naive_bayes/complement_nb.py +61 -21
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +61 -21
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +61 -21
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +61 -21
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +61 -21
- snowflake/ml/modeling/neighbors/kernel_density.py +61 -21
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +61 -21
- snowflake/ml/modeling/neighbors/nearest_centroid.py +61 -21
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +61 -21
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +61 -21
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +61 -21
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +61 -21
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +61 -21
- snowflake/ml/modeling/neural_network/mlp_classifier.py +61 -21
- snowflake/ml/modeling/neural_network/mlp_regressor.py +61 -21
- snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +1 -13
- snowflake/ml/modeling/preprocessing/polynomial_features.py +61 -21
- snowflake/ml/modeling/semi_supervised/label_propagation.py +61 -21
- snowflake/ml/modeling/semi_supervised/label_spreading.py +61 -21
- snowflake/ml/modeling/svm/linear_svc.py +61 -21
- snowflake/ml/modeling/svm/linear_svr.py +61 -21
- snowflake/ml/modeling/svm/nu_svc.py +61 -21
- snowflake/ml/modeling/svm/nu_svr.py +61 -21
- snowflake/ml/modeling/svm/svc.py +61 -21
- snowflake/ml/modeling/svm/svr.py +61 -21
- snowflake/ml/modeling/tree/decision_tree_classifier.py +61 -21
- snowflake/ml/modeling/tree/decision_tree_regressor.py +61 -21
- snowflake/ml/modeling/tree/extra_tree_classifier.py +61 -21
- snowflake/ml/modeling/tree/extra_tree_regressor.py +61 -21
- snowflake/ml/modeling/xgboost/xgb_classifier.py +64 -23
- snowflake/ml/modeling/xgboost/xgb_regressor.py +64 -23
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +64 -23
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +64 -23
- snowflake/ml/monitoring/_client/model_monitor.py +126 -0
- snowflake/ml/monitoring/_client/model_monitor_manager.py +361 -0
- snowflake/ml/monitoring/_client/model_monitor_version.py +1 -0
- snowflake/ml/monitoring/_client/monitor_sql_client.py +1335 -0
- snowflake/ml/monitoring/_client/queries/record_count.ssql +14 -0
- snowflake/ml/monitoring/_client/queries/rmse.ssql +28 -0
- snowflake/ml/monitoring/entities/model_monitor_config.py +28 -0
- snowflake/ml/monitoring/entities/model_monitor_interval.py +46 -0
- snowflake/ml/monitoring/entities/output_score_type.py +90 -0
- snowflake/ml/registry/_manager/model_manager.py +4 -0
- snowflake/ml/registry/registry.py +166 -8
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/METADATA +43 -9
- snowflake_ml_python-1.6.3.dist-info/RECORD +400 -0
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/container_services/image_registry/credential.py +0 -84
- snowflake/ml/_internal/container_services/image_registry/http_client.py +0 -127
- snowflake/ml/_internal/container_services/image_registry/imagelib.py +0 -400
- snowflake/ml/_internal/container_services/image_registry/registry_client.py +0 -212
- snowflake/ml/_internal/utils/log_stream_processor.py +0 -30
- snowflake/ml/_internal/utils/session_token_manager.py +0 -46
- snowflake/ml/_internal/utils/spcs_attribution_utils.py +0 -122
- snowflake/ml/_internal/utils/uri.py +0 -77
- snowflake/ml/data/torch_dataset.py +0 -33
- snowflake/ml/model/_api.py +0 -568
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +0 -12
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +0 -249
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +0 -130
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +0 -36
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +0 -268
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +0 -215
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +0 -53
- snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +0 -38
- snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +0 -105
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +0 -611
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +0 -116
- snowflake/ml/model/_deploy_client/snowservice/instance_types.py +0 -10
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +0 -28
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template_with_model +0 -21
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -48
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +0 -280
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +0 -202
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +0 -99
- snowflake/ml/model/_packager/model_handlers/llm.py +0 -267
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +0 -11
- snowflake/ml/model/deploy_platforms.py +0 -6
- snowflake/ml/model/models/llm.py +0 -104
- snowflake/ml/monitoring/monitor.py +0 -203
- snowflake/ml/registry/_initial_schema.py +0 -142
- snowflake/ml/registry/_schema.py +0 -82
- snowflake/ml/registry/_schema_upgrade_plans.py +0 -116
- snowflake/ml/registry/_schema_version_manager.py +0 -163
- snowflake/ml/registry/model_registry.py +0 -2048
- snowflake_ml_python-1.6.1.dist-info/RECORD +0 -422
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/top_level.txt +0 -0
@@ -4,14 +4,12 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
24
22
|
from snowflake.ml._internal import telemetry
|
25
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
26
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
27
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
28
26
|
from snowflake.snowpark import DataFrame, Session
|
29
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
32
|
-
ModelTransformHandlers,
|
33
30
|
BatchInferenceKwargsTypedDict,
|
34
31
|
ScoreKwargsTypedDict
|
35
32
|
)
|
@@ -507,12 +504,23 @@ class BernoulliRBM(BaseTransformer):
|
|
507
504
|
autogenerated=self._autogenerated,
|
508
505
|
subproject=_SUBPROJECT,
|
509
506
|
)
|
510
|
-
|
511
|
-
|
512
|
-
expected_output_cols_list=(
|
513
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
514
|
-
),
|
507
|
+
expected_output_cols = (
|
508
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
515
509
|
)
|
510
|
+
if isinstance(dataset, DataFrame):
|
511
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
512
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
513
|
+
)
|
514
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
515
|
+
drop_input_cols=self._drop_input_cols,
|
516
|
+
expected_output_cols_list=expected_output_cols,
|
517
|
+
example_output_pd_df=example_output_pd_df,
|
518
|
+
)
|
519
|
+
else:
|
520
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
521
|
+
drop_input_cols=self._drop_input_cols,
|
522
|
+
expected_output_cols_list=expected_output_cols,
|
523
|
+
)
|
516
524
|
self._sklearn_object = fitted_estimator
|
517
525
|
self._is_fitted = True
|
518
526
|
return output_result
|
@@ -537,6 +545,7 @@ class BernoulliRBM(BaseTransformer):
|
|
537
545
|
"""
|
538
546
|
self._infer_input_output_cols(dataset)
|
539
547
|
super()._check_dataset_type(dataset)
|
548
|
+
|
540
549
|
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
541
550
|
estimator=self._sklearn_object,
|
542
551
|
dataset=dataset,
|
@@ -593,12 +602,41 @@ class BernoulliRBM(BaseTransformer):
|
|
593
602
|
|
594
603
|
return rv
|
595
604
|
|
596
|
-
def
|
597
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
598
|
-
) -> List[str]:
|
605
|
+
def _align_expected_output(
|
606
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
607
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
608
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
609
|
+
and output dataframe with 1 line.
|
610
|
+
If the method is fit_predict, run 2 lines of data.
|
611
|
+
"""
|
599
612
|
# in case the inferred output column names dimension is different
|
600
613
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
601
|
-
|
614
|
+
|
615
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
616
|
+
# so change the minimum of number of rows to 2
|
617
|
+
num_examples = 2
|
618
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
619
|
+
project=_PROJECT,
|
620
|
+
subproject=_SUBPROJECT,
|
621
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
622
|
+
inspect.currentframe(), BernoulliRBM.__class__.__name__
|
623
|
+
),
|
624
|
+
api_calls=[Session.call],
|
625
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
626
|
+
)
|
627
|
+
if output_cols_prefix == "fit_predict_":
|
628
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
629
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
630
|
+
num_examples = self._sklearn_object.n_clusters
|
631
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
632
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
633
|
+
num_examples = self._sklearn_object.min_samples
|
634
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
635
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
636
|
+
num_examples = self._sklearn_object.n_neighbors
|
637
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
638
|
+
else:
|
639
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
602
640
|
|
603
641
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
604
642
|
# seen during the fit.
|
@@ -610,12 +648,14 @@ class BernoulliRBM(BaseTransformer):
|
|
610
648
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
611
649
|
if self.sample_weight_col:
|
612
650
|
output_df_columns_set -= set(self.sample_weight_col)
|
651
|
+
|
613
652
|
# if the dimension of inferred output column names is correct; use it
|
614
653
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
615
|
-
return expected_output_cols_list
|
654
|
+
return expected_output_cols_list, output_df_pd
|
616
655
|
# otherwise, use the sklearn estimator's output
|
617
656
|
else:
|
618
|
-
|
657
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
658
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
619
659
|
|
620
660
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
621
661
|
@telemetry.send_api_usage_telemetry(
|
@@ -661,7 +701,7 @@ class BernoulliRBM(BaseTransformer):
|
|
661
701
|
drop_input_cols=self._drop_input_cols,
|
662
702
|
expected_output_cols_type="float",
|
663
703
|
)
|
664
|
-
expected_output_cols = self.
|
704
|
+
expected_output_cols, _ = self._align_expected_output(
|
665
705
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
666
706
|
)
|
667
707
|
|
@@ -727,7 +767,7 @@ class BernoulliRBM(BaseTransformer):
|
|
727
767
|
drop_input_cols=self._drop_input_cols,
|
728
768
|
expected_output_cols_type="float",
|
729
769
|
)
|
730
|
-
expected_output_cols = self.
|
770
|
+
expected_output_cols, _ = self._align_expected_output(
|
731
771
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
732
772
|
)
|
733
773
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -790,7 +830,7 @@ class BernoulliRBM(BaseTransformer):
|
|
790
830
|
drop_input_cols=self._drop_input_cols,
|
791
831
|
expected_output_cols_type="float",
|
792
832
|
)
|
793
|
-
expected_output_cols = self.
|
833
|
+
expected_output_cols, _ = self._align_expected_output(
|
794
834
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
795
835
|
)
|
796
836
|
|
@@ -857,7 +897,7 @@ class BernoulliRBM(BaseTransformer):
|
|
857
897
|
drop_input_cols = self._drop_input_cols,
|
858
898
|
expected_output_cols_type="float",
|
859
899
|
)
|
860
|
-
expected_output_cols = self.
|
900
|
+
expected_output_cols, _ = self._align_expected_output(
|
861
901
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
862
902
|
)
|
863
903
|
|
@@ -4,14 +4,12 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
24
22
|
from snowflake.ml._internal import telemetry
|
25
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
26
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
27
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
28
26
|
from snowflake.snowpark import DataFrame, Session
|
29
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
32
|
-
ModelTransformHandlers,
|
33
30
|
BatchInferenceKwargsTypedDict,
|
34
31
|
ScoreKwargsTypedDict
|
35
32
|
)
|
@@ -660,12 +657,23 @@ class MLPClassifier(BaseTransformer):
|
|
660
657
|
autogenerated=self._autogenerated,
|
661
658
|
subproject=_SUBPROJECT,
|
662
659
|
)
|
663
|
-
|
664
|
-
|
665
|
-
expected_output_cols_list=(
|
666
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
667
|
-
),
|
660
|
+
expected_output_cols = (
|
661
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
668
662
|
)
|
663
|
+
if isinstance(dataset, DataFrame):
|
664
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
665
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
666
|
+
)
|
667
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
668
|
+
drop_input_cols=self._drop_input_cols,
|
669
|
+
expected_output_cols_list=expected_output_cols,
|
670
|
+
example_output_pd_df=example_output_pd_df,
|
671
|
+
)
|
672
|
+
else:
|
673
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
674
|
+
drop_input_cols=self._drop_input_cols,
|
675
|
+
expected_output_cols_list=expected_output_cols,
|
676
|
+
)
|
669
677
|
self._sklearn_object = fitted_estimator
|
670
678
|
self._is_fitted = True
|
671
679
|
return output_result
|
@@ -688,6 +696,7 @@ class MLPClassifier(BaseTransformer):
|
|
688
696
|
"""
|
689
697
|
self._infer_input_output_cols(dataset)
|
690
698
|
super()._check_dataset_type(dataset)
|
699
|
+
|
691
700
|
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
692
701
|
estimator=self._sklearn_object,
|
693
702
|
dataset=dataset,
|
@@ -744,12 +753,41 @@ class MLPClassifier(BaseTransformer):
|
|
744
753
|
|
745
754
|
return rv
|
746
755
|
|
747
|
-
def
|
748
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
749
|
-
) -> List[str]:
|
756
|
+
def _align_expected_output(
|
757
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
758
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
759
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
760
|
+
and output dataframe with 1 line.
|
761
|
+
If the method is fit_predict, run 2 lines of data.
|
762
|
+
"""
|
750
763
|
# in case the inferred output column names dimension is different
|
751
764
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
752
|
-
|
765
|
+
|
766
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
767
|
+
# so change the minimum of number of rows to 2
|
768
|
+
num_examples = 2
|
769
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
770
|
+
project=_PROJECT,
|
771
|
+
subproject=_SUBPROJECT,
|
772
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
773
|
+
inspect.currentframe(), MLPClassifier.__class__.__name__
|
774
|
+
),
|
775
|
+
api_calls=[Session.call],
|
776
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
777
|
+
)
|
778
|
+
if output_cols_prefix == "fit_predict_":
|
779
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
780
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
781
|
+
num_examples = self._sklearn_object.n_clusters
|
782
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
783
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
784
|
+
num_examples = self._sklearn_object.min_samples
|
785
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
786
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
787
|
+
num_examples = self._sklearn_object.n_neighbors
|
788
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
789
|
+
else:
|
790
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
753
791
|
|
754
792
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
755
793
|
# seen during the fit.
|
@@ -761,12 +799,14 @@ class MLPClassifier(BaseTransformer):
|
|
761
799
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
762
800
|
if self.sample_weight_col:
|
763
801
|
output_df_columns_set -= set(self.sample_weight_col)
|
802
|
+
|
764
803
|
# if the dimension of inferred output column names is correct; use it
|
765
804
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
766
|
-
return expected_output_cols_list
|
805
|
+
return expected_output_cols_list, output_df_pd
|
767
806
|
# otherwise, use the sklearn estimator's output
|
768
807
|
else:
|
769
|
-
|
808
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
809
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
770
810
|
|
771
811
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
772
812
|
@telemetry.send_api_usage_telemetry(
|
@@ -814,7 +854,7 @@ class MLPClassifier(BaseTransformer):
|
|
814
854
|
drop_input_cols=self._drop_input_cols,
|
815
855
|
expected_output_cols_type="float",
|
816
856
|
)
|
817
|
-
expected_output_cols = self.
|
857
|
+
expected_output_cols, _ = self._align_expected_output(
|
818
858
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
819
859
|
)
|
820
860
|
|
@@ -882,7 +922,7 @@ class MLPClassifier(BaseTransformer):
|
|
882
922
|
drop_input_cols=self._drop_input_cols,
|
883
923
|
expected_output_cols_type="float",
|
884
924
|
)
|
885
|
-
expected_output_cols = self.
|
925
|
+
expected_output_cols, _ = self._align_expected_output(
|
886
926
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
887
927
|
)
|
888
928
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -945,7 +985,7 @@ class MLPClassifier(BaseTransformer):
|
|
945
985
|
drop_input_cols=self._drop_input_cols,
|
946
986
|
expected_output_cols_type="float",
|
947
987
|
)
|
948
|
-
expected_output_cols = self.
|
988
|
+
expected_output_cols, _ = self._align_expected_output(
|
949
989
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
950
990
|
)
|
951
991
|
|
@@ -1010,7 +1050,7 @@ class MLPClassifier(BaseTransformer):
|
|
1010
1050
|
drop_input_cols = self._drop_input_cols,
|
1011
1051
|
expected_output_cols_type="float",
|
1012
1052
|
)
|
1013
|
-
expected_output_cols = self.
|
1053
|
+
expected_output_cols, _ = self._align_expected_output(
|
1014
1054
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1015
1055
|
)
|
1016
1056
|
|
@@ -4,14 +4,12 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
24
22
|
from snowflake.ml._internal import telemetry
|
25
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
26
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
27
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
28
26
|
from snowflake.snowpark import DataFrame, Session
|
29
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
32
|
-
ModelTransformHandlers,
|
33
30
|
BatchInferenceKwargsTypedDict,
|
34
31
|
ScoreKwargsTypedDict
|
35
32
|
)
|
@@ -656,12 +653,23 @@ class MLPRegressor(BaseTransformer):
|
|
656
653
|
autogenerated=self._autogenerated,
|
657
654
|
subproject=_SUBPROJECT,
|
658
655
|
)
|
659
|
-
|
660
|
-
|
661
|
-
expected_output_cols_list=(
|
662
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
663
|
-
),
|
656
|
+
expected_output_cols = (
|
657
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
664
658
|
)
|
659
|
+
if isinstance(dataset, DataFrame):
|
660
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
661
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
662
|
+
)
|
663
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
664
|
+
drop_input_cols=self._drop_input_cols,
|
665
|
+
expected_output_cols_list=expected_output_cols,
|
666
|
+
example_output_pd_df=example_output_pd_df,
|
667
|
+
)
|
668
|
+
else:
|
669
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
670
|
+
drop_input_cols=self._drop_input_cols,
|
671
|
+
expected_output_cols_list=expected_output_cols,
|
672
|
+
)
|
665
673
|
self._sklearn_object = fitted_estimator
|
666
674
|
self._is_fitted = True
|
667
675
|
return output_result
|
@@ -684,6 +692,7 @@ class MLPRegressor(BaseTransformer):
|
|
684
692
|
"""
|
685
693
|
self._infer_input_output_cols(dataset)
|
686
694
|
super()._check_dataset_type(dataset)
|
695
|
+
|
687
696
|
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
688
697
|
estimator=self._sklearn_object,
|
689
698
|
dataset=dataset,
|
@@ -740,12 +749,41 @@ class MLPRegressor(BaseTransformer):
|
|
740
749
|
|
741
750
|
return rv
|
742
751
|
|
743
|
-
def
|
744
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
745
|
-
) -> List[str]:
|
752
|
+
def _align_expected_output(
|
753
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
754
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
755
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
756
|
+
and output dataframe with 1 line.
|
757
|
+
If the method is fit_predict, run 2 lines of data.
|
758
|
+
"""
|
746
759
|
# in case the inferred output column names dimension is different
|
747
760
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
748
|
-
|
761
|
+
|
762
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
763
|
+
# so change the minimum of number of rows to 2
|
764
|
+
num_examples = 2
|
765
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
766
|
+
project=_PROJECT,
|
767
|
+
subproject=_SUBPROJECT,
|
768
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
769
|
+
inspect.currentframe(), MLPRegressor.__class__.__name__
|
770
|
+
),
|
771
|
+
api_calls=[Session.call],
|
772
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
773
|
+
)
|
774
|
+
if output_cols_prefix == "fit_predict_":
|
775
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
776
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
777
|
+
num_examples = self._sklearn_object.n_clusters
|
778
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
779
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
780
|
+
num_examples = self._sklearn_object.min_samples
|
781
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
782
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
783
|
+
num_examples = self._sklearn_object.n_neighbors
|
784
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
785
|
+
else:
|
786
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
749
787
|
|
750
788
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
751
789
|
# seen during the fit.
|
@@ -757,12 +795,14 @@ class MLPRegressor(BaseTransformer):
|
|
757
795
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
758
796
|
if self.sample_weight_col:
|
759
797
|
output_df_columns_set -= set(self.sample_weight_col)
|
798
|
+
|
760
799
|
# if the dimension of inferred output column names is correct; use it
|
761
800
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
762
|
-
return expected_output_cols_list
|
801
|
+
return expected_output_cols_list, output_df_pd
|
763
802
|
# otherwise, use the sklearn estimator's output
|
764
803
|
else:
|
765
|
-
|
804
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
805
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
766
806
|
|
767
807
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
768
808
|
@telemetry.send_api_usage_telemetry(
|
@@ -808,7 +848,7 @@ class MLPRegressor(BaseTransformer):
|
|
808
848
|
drop_input_cols=self._drop_input_cols,
|
809
849
|
expected_output_cols_type="float",
|
810
850
|
)
|
811
|
-
expected_output_cols = self.
|
851
|
+
expected_output_cols, _ = self._align_expected_output(
|
812
852
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
813
853
|
)
|
814
854
|
|
@@ -874,7 +914,7 @@ class MLPRegressor(BaseTransformer):
|
|
874
914
|
drop_input_cols=self._drop_input_cols,
|
875
915
|
expected_output_cols_type="float",
|
876
916
|
)
|
877
|
-
expected_output_cols = self.
|
917
|
+
expected_output_cols, _ = self._align_expected_output(
|
878
918
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
879
919
|
)
|
880
920
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -937,7 +977,7 @@ class MLPRegressor(BaseTransformer):
|
|
937
977
|
drop_input_cols=self._drop_input_cols,
|
938
978
|
expected_output_cols_type="float",
|
939
979
|
)
|
940
|
-
expected_output_cols = self.
|
980
|
+
expected_output_cols, _ = self._align_expected_output(
|
941
981
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
942
982
|
)
|
943
983
|
|
@@ -1002,7 +1042,7 @@ class MLPRegressor(BaseTransformer):
|
|
1002
1042
|
drop_input_cols = self._drop_input_cols,
|
1003
1043
|
expected_output_cols_type="float",
|
1004
1044
|
)
|
1005
|
-
expected_output_cols = self.
|
1045
|
+
expected_output_cols, _ = self._align_expected_output(
|
1006
1046
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1007
1047
|
)
|
1008
1048
|
|
@@ -379,7 +379,6 @@ class Pipeline(base.BaseTransformer):
|
|
379
379
|
anonymous=True,
|
380
380
|
imports=imports, # type: ignore[arg-type]
|
381
381
|
statement_params=sproc_statement_params,
|
382
|
-
execute_as="caller",
|
383
382
|
)
|
384
383
|
|
385
384
|
sproc_export_file_name: str = pipeline_within_one_sproc(
|
@@ -418,9 +417,6 @@ class Pipeline(base.BaseTransformer):
|
|
418
417
|
|
419
418
|
Returns:
|
420
419
|
Fitted pipeline.
|
421
|
-
|
422
|
-
Raises:
|
423
|
-
ValueError: A pipeline incompatible with sklearn is used on MLRS
|
424
420
|
"""
|
425
421
|
|
426
422
|
self._validate_steps()
|
@@ -437,8 +433,6 @@ class Pipeline(base.BaseTransformer):
|
|
437
433
|
lineage_utils.set_data_sources(self, data_sources)
|
438
434
|
|
439
435
|
if self._can_be_trained_in_ml_runtime(dataset):
|
440
|
-
if not self._is_convertible_to_sklearn:
|
441
|
-
raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
|
442
436
|
self._fit_ml_runtime(dataset)
|
443
437
|
|
444
438
|
elif squash and isinstance(dataset, snowpark.DataFrame):
|
@@ -611,14 +605,8 @@ class Pipeline(base.BaseTransformer):
|
|
611
605
|
|
612
606
|
Returns:
|
613
607
|
Output dataset.
|
614
|
-
|
615
|
-
Raises:
|
616
|
-
ValueError: An sklearn object has not been fit and stored before calling this function.
|
617
608
|
"""
|
618
|
-
if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
|
619
|
-
if self._sklearn_object is None:
|
620
|
-
raise ValueError("Model must be fit before inference.")
|
621
|
-
|
609
|
+
if os.environ.get(IN_ML_RUNTIME_ENV_VAR) and self._sklearn_object is not None:
|
622
610
|
expected_output_cols = self._infer_output_cols()
|
623
611
|
handler = ModelTransformerBuilder.build(
|
624
612
|
dataset=dataset,
|