snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -368,7 +370,6 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
368
370
|
sample_weight_col: Optional[str] = None,
|
369
371
|
) -> None:
|
370
372
|
super().__init__()
|
371
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
372
373
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
373
374
|
|
374
375
|
self._deps = list(deps)
|
@@ -408,6 +409,15 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
408
409
|
self.set_drop_input_cols(drop_input_cols)
|
409
410
|
self.set_sample_weight_col(sample_weight_col)
|
410
411
|
|
412
|
+
def _get_rand_id(self) -> str:
|
413
|
+
"""
|
414
|
+
Generate random id to be used in sproc and stage names.
|
415
|
+
|
416
|
+
Returns:
|
417
|
+
Random id string usable in sproc, table, and stage names.
|
418
|
+
"""
|
419
|
+
return str(uuid4()).replace("-", "_").upper()
|
420
|
+
|
411
421
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
412
422
|
"""
|
413
423
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -486,7 +496,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
486
496
|
cp.dump(self._sklearn_object, local_transform_file)
|
487
497
|
|
488
498
|
# Create temp stage to run fit.
|
489
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
499
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
490
500
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
491
501
|
SqlResultValidator(
|
492
502
|
session=session,
|
@@ -499,11 +509,12 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
499
509
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
500
510
|
).validate()
|
501
511
|
|
502
|
-
|
512
|
+
# Use posixpath to construct stage paths
|
513
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
514
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
503
515
|
local_result_file_name = get_temp_file_path()
|
504
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
505
516
|
|
506
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
517
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
507
518
|
statement_params = telemetry.get_function_usage_statement_params(
|
508
519
|
project=_PROJECT,
|
509
520
|
subproject=_SUBPROJECT,
|
@@ -529,6 +540,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
529
540
|
replace=True,
|
530
541
|
session=session,
|
531
542
|
statement_params=statement_params,
|
543
|
+
anonymous=True
|
532
544
|
)
|
533
545
|
def fit_wrapper_sproc(
|
534
546
|
session: Session,
|
@@ -537,7 +549,8 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
537
549
|
stage_result_file_name: str,
|
538
550
|
input_cols: List[str],
|
539
551
|
label_cols: List[str],
|
540
|
-
sample_weight_col: Optional[str]
|
552
|
+
sample_weight_col: Optional[str],
|
553
|
+
statement_params: Dict[str, str]
|
541
554
|
) -> str:
|
542
555
|
import cloudpickle as cp
|
543
556
|
import numpy as np
|
@@ -604,15 +617,15 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
604
617
|
api_calls=[Session.call],
|
605
618
|
custom_tags=dict([("autogen", True)]),
|
606
619
|
)
|
607
|
-
sproc_export_file_name =
|
608
|
-
|
620
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
621
|
+
session,
|
609
622
|
query,
|
610
623
|
stage_transform_file_name,
|
611
624
|
stage_result_file_name,
|
612
625
|
identifier.get_unescaped_names(self.input_cols),
|
613
626
|
identifier.get_unescaped_names(self.label_cols),
|
614
627
|
identifier.get_unescaped_names(self.sample_weight_col),
|
615
|
-
statement_params
|
628
|
+
statement_params,
|
616
629
|
)
|
617
630
|
|
618
631
|
if "|" in sproc_export_file_name:
|
@@ -622,7 +635,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
622
635
|
print("\n".join(fields[1:]))
|
623
636
|
|
624
637
|
session.file.get(
|
625
|
-
|
638
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
626
639
|
local_result_file_name,
|
627
640
|
statement_params=statement_params
|
628
641
|
)
|
@@ -668,7 +681,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
668
681
|
|
669
682
|
# Register vectorized UDF for batch inference
|
670
683
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
671
|
-
safe_id=self.
|
684
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
672
685
|
|
673
686
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
674
687
|
# will try to pickle all of self which fails.
|
@@ -760,7 +773,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
760
773
|
return transformed_pandas_df.to_dict("records")
|
761
774
|
|
762
775
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
763
|
-
safe_id=self.
|
776
|
+
safe_id=self._get_rand_id()
|
764
777
|
)
|
765
778
|
|
766
779
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -816,26 +829,37 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
816
829
|
# input cols need to match unquoted / quoted
|
817
830
|
input_cols = self.input_cols
|
818
831
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
832
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
819
833
|
|
820
834
|
estimator = self._sklearn_object
|
821
835
|
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
836
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
837
|
+
missing_features = []
|
838
|
+
features_in_dataset = set(dataset.columns)
|
839
|
+
columns_to_select = []
|
840
|
+
for i, f in enumerate(features_required_by_estimator):
|
841
|
+
if (
|
842
|
+
i >= len(input_cols)
|
843
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
844
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
845
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
846
|
+
):
|
847
|
+
missing_features.append(f)
|
848
|
+
elif input_cols[i] in features_in_dataset:
|
849
|
+
columns_to_select.append(input_cols[i])
|
850
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
851
|
+
columns_to_select.append(unquoted_input_cols[i])
|
852
|
+
else:
|
853
|
+
columns_to_select.append(quoted_input_cols[i])
|
854
|
+
|
855
|
+
if len(missing_features) > 0:
|
856
|
+
raise ValueError(
|
857
|
+
"The feature names should match with those that were passed during fit.\n"
|
858
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
859
|
+
f"Features in the input dataframe : {input_cols}\n"
|
860
|
+
)
|
861
|
+
input_df = dataset[columns_to_select]
|
862
|
+
input_df.columns = features_required_by_estimator
|
839
863
|
|
840
864
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
841
865
|
input_df
|
@@ -916,11 +940,18 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
916
940
|
Transformed dataset.
|
917
941
|
"""
|
918
942
|
if isinstance(dataset, DataFrame):
|
943
|
+
expected_type_inferred = "float"
|
944
|
+
# when it is classifier, infer the datatype from label columns
|
945
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
946
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
947
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
948
|
+
)
|
949
|
+
|
919
950
|
output_df = self._batch_inference(
|
920
951
|
dataset=dataset,
|
921
952
|
inference_method="predict",
|
922
953
|
expected_output_cols_list=self.output_cols,
|
923
|
-
expected_output_cols_type=
|
954
|
+
expected_output_cols_type=expected_type_inferred,
|
924
955
|
)
|
925
956
|
elif isinstance(dataset, pd.DataFrame):
|
926
957
|
output_df = self._sklearn_inference(
|
@@ -991,10 +1022,10 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
991
1022
|
|
992
1023
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
993
1024
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
994
|
-
Returns
|
1025
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
995
1026
|
"""
|
996
1027
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
997
|
-
return []
|
1028
|
+
return [output_cols_prefix]
|
998
1029
|
|
999
1030
|
classes = self._sklearn_object.classes_
|
1000
1031
|
if isinstance(classes, numpy.ndarray):
|
@@ -1219,7 +1250,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1219
1250
|
cp.dump(self._sklearn_object, local_score_file)
|
1220
1251
|
|
1221
1252
|
# Create temp stage to run score.
|
1222
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1253
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1223
1254
|
session = dataset._session
|
1224
1255
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1225
1256
|
SqlResultValidator(
|
@@ -1233,8 +1264,9 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1233
1264
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1234
1265
|
).validate()
|
1235
1266
|
|
1236
|
-
|
1237
|
-
|
1267
|
+
# Use posixpath to construct stage paths
|
1268
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1269
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1238
1270
|
statement_params = telemetry.get_function_usage_statement_params(
|
1239
1271
|
project=_PROJECT,
|
1240
1272
|
subproject=_SUBPROJECT,
|
@@ -1260,6 +1292,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1260
1292
|
replace=True,
|
1261
1293
|
session=session,
|
1262
1294
|
statement_params=statement_params,
|
1295
|
+
anonymous=True
|
1263
1296
|
)
|
1264
1297
|
def score_wrapper_sproc(
|
1265
1298
|
session: Session,
|
@@ -1267,7 +1300,8 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1267
1300
|
stage_score_file_name: str,
|
1268
1301
|
input_cols: List[str],
|
1269
1302
|
label_cols: List[str],
|
1270
|
-
sample_weight_col: Optional[str]
|
1303
|
+
sample_weight_col: Optional[str],
|
1304
|
+
statement_params: Dict[str, str]
|
1271
1305
|
) -> float:
|
1272
1306
|
import cloudpickle as cp
|
1273
1307
|
import numpy as np
|
@@ -1317,14 +1351,14 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1317
1351
|
api_calls=[Session.call],
|
1318
1352
|
custom_tags=dict([("autogen", True)]),
|
1319
1353
|
)
|
1320
|
-
score =
|
1321
|
-
|
1354
|
+
score = score_wrapper_sproc(
|
1355
|
+
session,
|
1322
1356
|
query,
|
1323
1357
|
stage_score_file_name,
|
1324
1358
|
identifier.get_unescaped_names(self.input_cols),
|
1325
1359
|
identifier.get_unescaped_names(self.label_cols),
|
1326
1360
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1327
|
-
statement_params
|
1361
|
+
statement_params,
|
1328
1362
|
)
|
1329
1363
|
|
1330
1364
|
cleanup_temp_files([local_score_file_name])
|
@@ -1342,18 +1376,20 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1342
1376
|
if self._sklearn_object._estimator_type == 'classifier':
|
1343
1377
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1344
1378
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1345
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1379
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1380
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1346
1381
|
# For regressor, the type of predict is float64
|
1347
1382
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1348
1383
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1349
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1350
|
-
|
1384
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1385
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1351
1386
|
for prob_func in PROB_FUNCTIONS:
|
1352
1387
|
if hasattr(self, prob_func):
|
1353
1388
|
output_cols_prefix: str = f"{prob_func}_"
|
1354
1389
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1355
1390
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1356
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1391
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1392
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1357
1393
|
|
1358
1394
|
@property
|
1359
1395
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -338,7 +340,6 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
338
340
|
sample_weight_col: Optional[str] = None,
|
339
341
|
) -> None:
|
340
342
|
super().__init__()
|
341
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
342
343
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
343
344
|
|
344
345
|
self._deps = list(deps)
|
@@ -377,6 +378,15 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
377
378
|
self.set_drop_input_cols(drop_input_cols)
|
378
379
|
self.set_sample_weight_col(sample_weight_col)
|
379
380
|
|
381
|
+
def _get_rand_id(self) -> str:
|
382
|
+
"""
|
383
|
+
Generate random id to be used in sproc and stage names.
|
384
|
+
|
385
|
+
Returns:
|
386
|
+
Random id string usable in sproc, table, and stage names.
|
387
|
+
"""
|
388
|
+
return str(uuid4()).replace("-", "_").upper()
|
389
|
+
|
380
390
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
381
391
|
"""
|
382
392
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -455,7 +465,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
455
465
|
cp.dump(self._sklearn_object, local_transform_file)
|
456
466
|
|
457
467
|
# Create temp stage to run fit.
|
458
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
468
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
459
469
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
460
470
|
SqlResultValidator(
|
461
471
|
session=session,
|
@@ -468,11 +478,12 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
468
478
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
469
479
|
).validate()
|
470
480
|
|
471
|
-
|
481
|
+
# Use posixpath to construct stage paths
|
482
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
483
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
472
484
|
local_result_file_name = get_temp_file_path()
|
473
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
474
485
|
|
475
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
486
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
476
487
|
statement_params = telemetry.get_function_usage_statement_params(
|
477
488
|
project=_PROJECT,
|
478
489
|
subproject=_SUBPROJECT,
|
@@ -498,6 +509,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
498
509
|
replace=True,
|
499
510
|
session=session,
|
500
511
|
statement_params=statement_params,
|
512
|
+
anonymous=True
|
501
513
|
)
|
502
514
|
def fit_wrapper_sproc(
|
503
515
|
session: Session,
|
@@ -506,7 +518,8 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
506
518
|
stage_result_file_name: str,
|
507
519
|
input_cols: List[str],
|
508
520
|
label_cols: List[str],
|
509
|
-
sample_weight_col: Optional[str]
|
521
|
+
sample_weight_col: Optional[str],
|
522
|
+
statement_params: Dict[str, str]
|
510
523
|
) -> str:
|
511
524
|
import cloudpickle as cp
|
512
525
|
import numpy as np
|
@@ -573,15 +586,15 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
573
586
|
api_calls=[Session.call],
|
574
587
|
custom_tags=dict([("autogen", True)]),
|
575
588
|
)
|
576
|
-
sproc_export_file_name =
|
577
|
-
|
589
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
590
|
+
session,
|
578
591
|
query,
|
579
592
|
stage_transform_file_name,
|
580
593
|
stage_result_file_name,
|
581
594
|
identifier.get_unescaped_names(self.input_cols),
|
582
595
|
identifier.get_unescaped_names(self.label_cols),
|
583
596
|
identifier.get_unescaped_names(self.sample_weight_col),
|
584
|
-
statement_params
|
597
|
+
statement_params,
|
585
598
|
)
|
586
599
|
|
587
600
|
if "|" in sproc_export_file_name:
|
@@ -591,7 +604,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
591
604
|
print("\n".join(fields[1:]))
|
592
605
|
|
593
606
|
session.file.get(
|
594
|
-
|
607
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
595
608
|
local_result_file_name,
|
596
609
|
statement_params=statement_params
|
597
610
|
)
|
@@ -637,7 +650,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
637
650
|
|
638
651
|
# Register vectorized UDF for batch inference
|
639
652
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
640
|
-
safe_id=self.
|
653
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
641
654
|
|
642
655
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
643
656
|
# will try to pickle all of self which fails.
|
@@ -729,7 +742,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
729
742
|
return transformed_pandas_df.to_dict("records")
|
730
743
|
|
731
744
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
732
|
-
safe_id=self.
|
745
|
+
safe_id=self._get_rand_id()
|
733
746
|
)
|
734
747
|
|
735
748
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -785,26 +798,37 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
785
798
|
# input cols need to match unquoted / quoted
|
786
799
|
input_cols = self.input_cols
|
787
800
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
801
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
788
802
|
|
789
803
|
estimator = self._sklearn_object
|
790
804
|
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
805
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
806
|
+
missing_features = []
|
807
|
+
features_in_dataset = set(dataset.columns)
|
808
|
+
columns_to_select = []
|
809
|
+
for i, f in enumerate(features_required_by_estimator):
|
810
|
+
if (
|
811
|
+
i >= len(input_cols)
|
812
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
813
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
814
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
815
|
+
):
|
816
|
+
missing_features.append(f)
|
817
|
+
elif input_cols[i] in features_in_dataset:
|
818
|
+
columns_to_select.append(input_cols[i])
|
819
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
820
|
+
columns_to_select.append(unquoted_input_cols[i])
|
821
|
+
else:
|
822
|
+
columns_to_select.append(quoted_input_cols[i])
|
823
|
+
|
824
|
+
if len(missing_features) > 0:
|
825
|
+
raise ValueError(
|
826
|
+
"The feature names should match with those that were passed during fit.\n"
|
827
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
828
|
+
f"Features in the input dataframe : {input_cols}\n"
|
829
|
+
)
|
830
|
+
input_df = dataset[columns_to_select]
|
831
|
+
input_df.columns = features_required_by_estimator
|
808
832
|
|
809
833
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
810
834
|
input_df
|
@@ -885,11 +909,18 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
885
909
|
Transformed dataset.
|
886
910
|
"""
|
887
911
|
if isinstance(dataset, DataFrame):
|
912
|
+
expected_type_inferred = ""
|
913
|
+
# when it is classifier, infer the datatype from label columns
|
914
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
915
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
916
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
917
|
+
)
|
918
|
+
|
888
919
|
output_df = self._batch_inference(
|
889
920
|
dataset=dataset,
|
890
921
|
inference_method="predict",
|
891
922
|
expected_output_cols_list=self.output_cols,
|
892
|
-
expected_output_cols_type=
|
923
|
+
expected_output_cols_type=expected_type_inferred,
|
893
924
|
)
|
894
925
|
elif isinstance(dataset, pd.DataFrame):
|
895
926
|
output_df = self._sklearn_inference(
|
@@ -960,10 +991,10 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
960
991
|
|
961
992
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
962
993
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
963
|
-
Returns
|
994
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
964
995
|
"""
|
965
996
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
966
|
-
return []
|
997
|
+
return [output_cols_prefix]
|
967
998
|
|
968
999
|
classes = self._sklearn_object.classes_
|
969
1000
|
if isinstance(classes, numpy.ndarray):
|
@@ -1194,7 +1225,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1194
1225
|
cp.dump(self._sklearn_object, local_score_file)
|
1195
1226
|
|
1196
1227
|
# Create temp stage to run score.
|
1197
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1228
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1198
1229
|
session = dataset._session
|
1199
1230
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1200
1231
|
SqlResultValidator(
|
@@ -1208,8 +1239,9 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1208
1239
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1209
1240
|
).validate()
|
1210
1241
|
|
1211
|
-
|
1212
|
-
|
1242
|
+
# Use posixpath to construct stage paths
|
1243
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1244
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1213
1245
|
statement_params = telemetry.get_function_usage_statement_params(
|
1214
1246
|
project=_PROJECT,
|
1215
1247
|
subproject=_SUBPROJECT,
|
@@ -1235,6 +1267,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1235
1267
|
replace=True,
|
1236
1268
|
session=session,
|
1237
1269
|
statement_params=statement_params,
|
1270
|
+
anonymous=True
|
1238
1271
|
)
|
1239
1272
|
def score_wrapper_sproc(
|
1240
1273
|
session: Session,
|
@@ -1242,7 +1275,8 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1242
1275
|
stage_score_file_name: str,
|
1243
1276
|
input_cols: List[str],
|
1244
1277
|
label_cols: List[str],
|
1245
|
-
sample_weight_col: Optional[str]
|
1278
|
+
sample_weight_col: Optional[str],
|
1279
|
+
statement_params: Dict[str, str]
|
1246
1280
|
) -> float:
|
1247
1281
|
import cloudpickle as cp
|
1248
1282
|
import numpy as np
|
@@ -1292,14 +1326,14 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1292
1326
|
api_calls=[Session.call],
|
1293
1327
|
custom_tags=dict([("autogen", True)]),
|
1294
1328
|
)
|
1295
|
-
score =
|
1296
|
-
|
1329
|
+
score = score_wrapper_sproc(
|
1330
|
+
session,
|
1297
1331
|
query,
|
1298
1332
|
stage_score_file_name,
|
1299
1333
|
identifier.get_unescaped_names(self.input_cols),
|
1300
1334
|
identifier.get_unescaped_names(self.label_cols),
|
1301
1335
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1302
|
-
statement_params
|
1336
|
+
statement_params,
|
1303
1337
|
)
|
1304
1338
|
|
1305
1339
|
cleanup_temp_files([local_score_file_name])
|
@@ -1317,18 +1351,20 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1317
1351
|
if self._sklearn_object._estimator_type == 'classifier':
|
1318
1352
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1319
1353
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1320
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1354
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1355
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1321
1356
|
# For regressor, the type of predict is float64
|
1322
1357
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1323
1358
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1324
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1325
|
-
|
1359
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1360
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1326
1361
|
for prob_func in PROB_FUNCTIONS:
|
1327
1362
|
if hasattr(self, prob_func):
|
1328
1363
|
output_cols_prefix: str = f"{prob_func}_"
|
1329
1364
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1330
1365
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1331
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1366
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1367
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1332
1368
|
|
1333
1369
|
@property
|
1334
1370
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|