snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -328,7 +330,6 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
328
330
|
sample_weight_col: Optional[str] = None,
|
329
331
|
) -> None:
|
330
332
|
super().__init__()
|
331
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
332
333
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
333
334
|
|
334
335
|
self._deps = list(deps)
|
@@ -367,6 +368,15 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
367
368
|
self.set_drop_input_cols(drop_input_cols)
|
368
369
|
self.set_sample_weight_col(sample_weight_col)
|
369
370
|
|
371
|
+
def _get_rand_id(self) -> str:
|
372
|
+
"""
|
373
|
+
Generate random id to be used in sproc and stage names.
|
374
|
+
|
375
|
+
Returns:
|
376
|
+
Random id string usable in sproc, table, and stage names.
|
377
|
+
"""
|
378
|
+
return str(uuid4()).replace("-", "_").upper()
|
379
|
+
|
370
380
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
371
381
|
"""
|
372
382
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -445,7 +455,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
445
455
|
cp.dump(self._sklearn_object, local_transform_file)
|
446
456
|
|
447
457
|
# Create temp stage to run fit.
|
448
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
458
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
449
459
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
450
460
|
SqlResultValidator(
|
451
461
|
session=session,
|
@@ -458,11 +468,12 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
458
468
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
459
469
|
).validate()
|
460
470
|
|
461
|
-
|
471
|
+
# Use posixpath to construct stage paths
|
472
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
473
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
462
474
|
local_result_file_name = get_temp_file_path()
|
463
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
464
475
|
|
465
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
476
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
466
477
|
statement_params = telemetry.get_function_usage_statement_params(
|
467
478
|
project=_PROJECT,
|
468
479
|
subproject=_SUBPROJECT,
|
@@ -488,6 +499,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
488
499
|
replace=True,
|
489
500
|
session=session,
|
490
501
|
statement_params=statement_params,
|
502
|
+
anonymous=True
|
491
503
|
)
|
492
504
|
def fit_wrapper_sproc(
|
493
505
|
session: Session,
|
@@ -496,7 +508,8 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
496
508
|
stage_result_file_name: str,
|
497
509
|
input_cols: List[str],
|
498
510
|
label_cols: List[str],
|
499
|
-
sample_weight_col: Optional[str]
|
511
|
+
sample_weight_col: Optional[str],
|
512
|
+
statement_params: Dict[str, str]
|
500
513
|
) -> str:
|
501
514
|
import cloudpickle as cp
|
502
515
|
import numpy as np
|
@@ -563,15 +576,15 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
563
576
|
api_calls=[Session.call],
|
564
577
|
custom_tags=dict([("autogen", True)]),
|
565
578
|
)
|
566
|
-
sproc_export_file_name =
|
567
|
-
|
579
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
580
|
+
session,
|
568
581
|
query,
|
569
582
|
stage_transform_file_name,
|
570
583
|
stage_result_file_name,
|
571
584
|
identifier.get_unescaped_names(self.input_cols),
|
572
585
|
identifier.get_unescaped_names(self.label_cols),
|
573
586
|
identifier.get_unescaped_names(self.sample_weight_col),
|
574
|
-
statement_params
|
587
|
+
statement_params,
|
575
588
|
)
|
576
589
|
|
577
590
|
if "|" in sproc_export_file_name:
|
@@ -581,7 +594,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
581
594
|
print("\n".join(fields[1:]))
|
582
595
|
|
583
596
|
session.file.get(
|
584
|
-
|
597
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
585
598
|
local_result_file_name,
|
586
599
|
statement_params=statement_params
|
587
600
|
)
|
@@ -627,7 +640,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
627
640
|
|
628
641
|
# Register vectorized UDF for batch inference
|
629
642
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
630
|
-
safe_id=self.
|
643
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
631
644
|
|
632
645
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
633
646
|
# will try to pickle all of self which fails.
|
@@ -719,7 +732,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
719
732
|
return transformed_pandas_df.to_dict("records")
|
720
733
|
|
721
734
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
722
|
-
safe_id=self.
|
735
|
+
safe_id=self._get_rand_id()
|
723
736
|
)
|
724
737
|
|
725
738
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -775,26 +788,37 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
775
788
|
# input cols need to match unquoted / quoted
|
776
789
|
input_cols = self.input_cols
|
777
790
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
791
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
778
792
|
|
779
793
|
estimator = self._sklearn_object
|
780
794
|
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
795
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
796
|
+
missing_features = []
|
797
|
+
features_in_dataset = set(dataset.columns)
|
798
|
+
columns_to_select = []
|
799
|
+
for i, f in enumerate(features_required_by_estimator):
|
800
|
+
if (
|
801
|
+
i >= len(input_cols)
|
802
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
803
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
804
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
805
|
+
):
|
806
|
+
missing_features.append(f)
|
807
|
+
elif input_cols[i] in features_in_dataset:
|
808
|
+
columns_to_select.append(input_cols[i])
|
809
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
810
|
+
columns_to_select.append(unquoted_input_cols[i])
|
811
|
+
else:
|
812
|
+
columns_to_select.append(quoted_input_cols[i])
|
813
|
+
|
814
|
+
if len(missing_features) > 0:
|
815
|
+
raise ValueError(
|
816
|
+
"The feature names should match with those that were passed during fit.\n"
|
817
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
818
|
+
f"Features in the input dataframe : {input_cols}\n"
|
819
|
+
)
|
820
|
+
input_df = dataset[columns_to_select]
|
821
|
+
input_df.columns = features_required_by_estimator
|
798
822
|
|
799
823
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
800
824
|
input_df
|
@@ -875,11 +899,18 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
875
899
|
Transformed dataset.
|
876
900
|
"""
|
877
901
|
if isinstance(dataset, DataFrame):
|
902
|
+
expected_type_inferred = "float"
|
903
|
+
# when it is classifier, infer the datatype from label columns
|
904
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
905
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
906
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
907
|
+
)
|
908
|
+
|
878
909
|
output_df = self._batch_inference(
|
879
910
|
dataset=dataset,
|
880
911
|
inference_method="predict",
|
881
912
|
expected_output_cols_list=self.output_cols,
|
882
|
-
expected_output_cols_type=
|
913
|
+
expected_output_cols_type=expected_type_inferred,
|
883
914
|
)
|
884
915
|
elif isinstance(dataset, pd.DataFrame):
|
885
916
|
output_df = self._sklearn_inference(
|
@@ -950,10 +981,10 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
950
981
|
|
951
982
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
952
983
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
953
|
-
Returns
|
984
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
954
985
|
"""
|
955
986
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
956
|
-
return []
|
987
|
+
return [output_cols_prefix]
|
957
988
|
|
958
989
|
classes = self._sklearn_object.classes_
|
959
990
|
if isinstance(classes, numpy.ndarray):
|
@@ -1178,7 +1209,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1178
1209
|
cp.dump(self._sklearn_object, local_score_file)
|
1179
1210
|
|
1180
1211
|
# Create temp stage to run score.
|
1181
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1212
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1182
1213
|
session = dataset._session
|
1183
1214
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1184
1215
|
SqlResultValidator(
|
@@ -1192,8 +1223,9 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1192
1223
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1193
1224
|
).validate()
|
1194
1225
|
|
1195
|
-
|
1196
|
-
|
1226
|
+
# Use posixpath to construct stage paths
|
1227
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1228
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1197
1229
|
statement_params = telemetry.get_function_usage_statement_params(
|
1198
1230
|
project=_PROJECT,
|
1199
1231
|
subproject=_SUBPROJECT,
|
@@ -1219,6 +1251,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1219
1251
|
replace=True,
|
1220
1252
|
session=session,
|
1221
1253
|
statement_params=statement_params,
|
1254
|
+
anonymous=True
|
1222
1255
|
)
|
1223
1256
|
def score_wrapper_sproc(
|
1224
1257
|
session: Session,
|
@@ -1226,7 +1259,8 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1226
1259
|
stage_score_file_name: str,
|
1227
1260
|
input_cols: List[str],
|
1228
1261
|
label_cols: List[str],
|
1229
|
-
sample_weight_col: Optional[str]
|
1262
|
+
sample_weight_col: Optional[str],
|
1263
|
+
statement_params: Dict[str, str]
|
1230
1264
|
) -> float:
|
1231
1265
|
import cloudpickle as cp
|
1232
1266
|
import numpy as np
|
@@ -1276,14 +1310,14 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1276
1310
|
api_calls=[Session.call],
|
1277
1311
|
custom_tags=dict([("autogen", True)]),
|
1278
1312
|
)
|
1279
|
-
score =
|
1280
|
-
|
1313
|
+
score = score_wrapper_sproc(
|
1314
|
+
session,
|
1281
1315
|
query,
|
1282
1316
|
stage_score_file_name,
|
1283
1317
|
identifier.get_unescaped_names(self.input_cols),
|
1284
1318
|
identifier.get_unescaped_names(self.label_cols),
|
1285
1319
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1286
|
-
statement_params
|
1320
|
+
statement_params,
|
1287
1321
|
)
|
1288
1322
|
|
1289
1323
|
cleanup_temp_files([local_score_file_name])
|
@@ -1301,18 +1335,20 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1301
1335
|
if self._sklearn_object._estimator_type == 'classifier':
|
1302
1336
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1303
1337
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1304
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1338
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1339
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1305
1340
|
# For regressor, the type of predict is float64
|
1306
1341
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1307
1342
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1308
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1309
|
-
|
1343
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1344
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1310
1345
|
for prob_func in PROB_FUNCTIONS:
|
1311
1346
|
if hasattr(self, prob_func):
|
1312
1347
|
output_cols_prefix: str = f"{prob_func}_"
|
1313
1348
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1314
1349
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1315
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1350
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1351
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1316
1352
|
|
1317
1353
|
@property
|
1318
1354
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -244,7 +246,6 @@ class IsolationForest(BaseTransformer):
|
|
244
246
|
sample_weight_col: Optional[str] = None,
|
245
247
|
) -> None:
|
246
248
|
super().__init__()
|
247
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
248
249
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
249
250
|
|
250
251
|
self._deps = list(deps)
|
@@ -272,6 +273,15 @@ class IsolationForest(BaseTransformer):
|
|
272
273
|
self.set_drop_input_cols(drop_input_cols)
|
273
274
|
self.set_sample_weight_col(sample_weight_col)
|
274
275
|
|
276
|
+
def _get_rand_id(self) -> str:
|
277
|
+
"""
|
278
|
+
Generate random id to be used in sproc and stage names.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
Random id string usable in sproc, table, and stage names.
|
282
|
+
"""
|
283
|
+
return str(uuid4()).replace("-", "_").upper()
|
284
|
+
|
275
285
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
276
286
|
"""
|
277
287
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -350,7 +360,7 @@ class IsolationForest(BaseTransformer):
|
|
350
360
|
cp.dump(self._sklearn_object, local_transform_file)
|
351
361
|
|
352
362
|
# Create temp stage to run fit.
|
353
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
363
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
354
364
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
355
365
|
SqlResultValidator(
|
356
366
|
session=session,
|
@@ -363,11 +373,12 @@ class IsolationForest(BaseTransformer):
|
|
363
373
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
364
374
|
).validate()
|
365
375
|
|
366
|
-
|
376
|
+
# Use posixpath to construct stage paths
|
377
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
379
|
local_result_file_name = get_temp_file_path()
|
368
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
380
|
|
370
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
381
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
371
382
|
statement_params = telemetry.get_function_usage_statement_params(
|
372
383
|
project=_PROJECT,
|
373
384
|
subproject=_SUBPROJECT,
|
@@ -393,6 +404,7 @@ class IsolationForest(BaseTransformer):
|
|
393
404
|
replace=True,
|
394
405
|
session=session,
|
395
406
|
statement_params=statement_params,
|
407
|
+
anonymous=True
|
396
408
|
)
|
397
409
|
def fit_wrapper_sproc(
|
398
410
|
session: Session,
|
@@ -401,7 +413,8 @@ class IsolationForest(BaseTransformer):
|
|
401
413
|
stage_result_file_name: str,
|
402
414
|
input_cols: List[str],
|
403
415
|
label_cols: List[str],
|
404
|
-
sample_weight_col: Optional[str]
|
416
|
+
sample_weight_col: Optional[str],
|
417
|
+
statement_params: Dict[str, str]
|
405
418
|
) -> str:
|
406
419
|
import cloudpickle as cp
|
407
420
|
import numpy as np
|
@@ -468,15 +481,15 @@ class IsolationForest(BaseTransformer):
|
|
468
481
|
api_calls=[Session.call],
|
469
482
|
custom_tags=dict([("autogen", True)]),
|
470
483
|
)
|
471
|
-
sproc_export_file_name =
|
472
|
-
|
484
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
485
|
+
session,
|
473
486
|
query,
|
474
487
|
stage_transform_file_name,
|
475
488
|
stage_result_file_name,
|
476
489
|
identifier.get_unescaped_names(self.input_cols),
|
477
490
|
identifier.get_unescaped_names(self.label_cols),
|
478
491
|
identifier.get_unescaped_names(self.sample_weight_col),
|
479
|
-
statement_params
|
492
|
+
statement_params,
|
480
493
|
)
|
481
494
|
|
482
495
|
if "|" in sproc_export_file_name:
|
@@ -486,7 +499,7 @@ class IsolationForest(BaseTransformer):
|
|
486
499
|
print("\n".join(fields[1:]))
|
487
500
|
|
488
501
|
session.file.get(
|
489
|
-
|
502
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
490
503
|
local_result_file_name,
|
491
504
|
statement_params=statement_params
|
492
505
|
)
|
@@ -532,7 +545,7 @@ class IsolationForest(BaseTransformer):
|
|
532
545
|
|
533
546
|
# Register vectorized UDF for batch inference
|
534
547
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
535
|
-
safe_id=self.
|
548
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
536
549
|
|
537
550
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
538
551
|
# will try to pickle all of self which fails.
|
@@ -624,7 +637,7 @@ class IsolationForest(BaseTransformer):
|
|
624
637
|
return transformed_pandas_df.to_dict("records")
|
625
638
|
|
626
639
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
627
|
-
safe_id=self.
|
640
|
+
safe_id=self._get_rand_id()
|
628
641
|
)
|
629
642
|
|
630
643
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -680,26 +693,37 @@ class IsolationForest(BaseTransformer):
|
|
680
693
|
# input cols need to match unquoted / quoted
|
681
694
|
input_cols = self.input_cols
|
682
695
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
696
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
683
697
|
|
684
698
|
estimator = self._sklearn_object
|
685
699
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
700
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
701
|
+
missing_features = []
|
702
|
+
features_in_dataset = set(dataset.columns)
|
703
|
+
columns_to_select = []
|
704
|
+
for i, f in enumerate(features_required_by_estimator):
|
705
|
+
if (
|
706
|
+
i >= len(input_cols)
|
707
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
708
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
709
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
710
|
+
):
|
711
|
+
missing_features.append(f)
|
712
|
+
elif input_cols[i] in features_in_dataset:
|
713
|
+
columns_to_select.append(input_cols[i])
|
714
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
715
|
+
columns_to_select.append(unquoted_input_cols[i])
|
716
|
+
else:
|
717
|
+
columns_to_select.append(quoted_input_cols[i])
|
718
|
+
|
719
|
+
if len(missing_features) > 0:
|
720
|
+
raise ValueError(
|
721
|
+
"The feature names should match with those that were passed during fit.\n"
|
722
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
723
|
+
f"Features in the input dataframe : {input_cols}\n"
|
724
|
+
)
|
725
|
+
input_df = dataset[columns_to_select]
|
726
|
+
input_df.columns = features_required_by_estimator
|
703
727
|
|
704
728
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
705
729
|
input_df
|
@@ -780,11 +804,18 @@ class IsolationForest(BaseTransformer):
|
|
780
804
|
Transformed dataset.
|
781
805
|
"""
|
782
806
|
if isinstance(dataset, DataFrame):
|
807
|
+
expected_type_inferred = ""
|
808
|
+
# when it is classifier, infer the datatype from label columns
|
809
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
810
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
811
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
812
|
+
)
|
813
|
+
|
783
814
|
output_df = self._batch_inference(
|
784
815
|
dataset=dataset,
|
785
816
|
inference_method="predict",
|
786
817
|
expected_output_cols_list=self.output_cols,
|
787
|
-
expected_output_cols_type=
|
818
|
+
expected_output_cols_type=expected_type_inferred,
|
788
819
|
)
|
789
820
|
elif isinstance(dataset, pd.DataFrame):
|
790
821
|
output_df = self._sklearn_inference(
|
@@ -855,10 +886,10 @@ class IsolationForest(BaseTransformer):
|
|
855
886
|
|
856
887
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
857
888
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
858
|
-
Returns
|
889
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
859
890
|
"""
|
860
891
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
861
|
-
return []
|
892
|
+
return [output_cols_prefix]
|
862
893
|
|
863
894
|
classes = self._sklearn_object.classes_
|
864
895
|
if isinstance(classes, numpy.ndarray):
|
@@ -1085,7 +1116,7 @@ class IsolationForest(BaseTransformer):
|
|
1085
1116
|
cp.dump(self._sklearn_object, local_score_file)
|
1086
1117
|
|
1087
1118
|
# Create temp stage to run score.
|
1088
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1119
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1089
1120
|
session = dataset._session
|
1090
1121
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1091
1122
|
SqlResultValidator(
|
@@ -1099,8 +1130,9 @@ class IsolationForest(BaseTransformer):
|
|
1099
1130
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1100
1131
|
).validate()
|
1101
1132
|
|
1102
|
-
|
1103
|
-
|
1133
|
+
# Use posixpath to construct stage paths
|
1134
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1135
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1104
1136
|
statement_params = telemetry.get_function_usage_statement_params(
|
1105
1137
|
project=_PROJECT,
|
1106
1138
|
subproject=_SUBPROJECT,
|
@@ -1126,6 +1158,7 @@ class IsolationForest(BaseTransformer):
|
|
1126
1158
|
replace=True,
|
1127
1159
|
session=session,
|
1128
1160
|
statement_params=statement_params,
|
1161
|
+
anonymous=True
|
1129
1162
|
)
|
1130
1163
|
def score_wrapper_sproc(
|
1131
1164
|
session: Session,
|
@@ -1133,7 +1166,8 @@ class IsolationForest(BaseTransformer):
|
|
1133
1166
|
stage_score_file_name: str,
|
1134
1167
|
input_cols: List[str],
|
1135
1168
|
label_cols: List[str],
|
1136
|
-
sample_weight_col: Optional[str]
|
1169
|
+
sample_weight_col: Optional[str],
|
1170
|
+
statement_params: Dict[str, str]
|
1137
1171
|
) -> float:
|
1138
1172
|
import cloudpickle as cp
|
1139
1173
|
import numpy as np
|
@@ -1183,14 +1217,14 @@ class IsolationForest(BaseTransformer):
|
|
1183
1217
|
api_calls=[Session.call],
|
1184
1218
|
custom_tags=dict([("autogen", True)]),
|
1185
1219
|
)
|
1186
|
-
score =
|
1187
|
-
|
1220
|
+
score = score_wrapper_sproc(
|
1221
|
+
session,
|
1188
1222
|
query,
|
1189
1223
|
stage_score_file_name,
|
1190
1224
|
identifier.get_unescaped_names(self.input_cols),
|
1191
1225
|
identifier.get_unescaped_names(self.label_cols),
|
1192
1226
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1193
|
-
statement_params
|
1227
|
+
statement_params,
|
1194
1228
|
)
|
1195
1229
|
|
1196
1230
|
cleanup_temp_files([local_score_file_name])
|
@@ -1208,18 +1242,20 @@ class IsolationForest(BaseTransformer):
|
|
1208
1242
|
if self._sklearn_object._estimator_type == 'classifier':
|
1209
1243
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1210
1244
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1211
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1245
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1246
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1212
1247
|
# For regressor, the type of predict is float64
|
1213
1248
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1214
1249
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1215
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1216
|
-
|
1250
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1251
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1252
|
for prob_func in PROB_FUNCTIONS:
|
1218
1253
|
if hasattr(self, prob_func):
|
1219
1254
|
output_cols_prefix: str = f"{prob_func}_"
|
1220
1255
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1221
1256
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1222
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1257
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1258
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1223
1259
|
|
1224
1260
|
@property
|
1225
1261
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|