snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -323,7 +325,6 @@ class SGDRegressor(BaseTransformer):
|
|
323
325
|
sample_weight_col: Optional[str] = None,
|
324
326
|
) -> None:
|
325
327
|
super().__init__()
|
326
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
327
328
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
328
329
|
|
329
330
|
self._deps = list(deps)
|
@@ -361,6 +362,15 @@ class SGDRegressor(BaseTransformer):
|
|
361
362
|
self.set_drop_input_cols(drop_input_cols)
|
362
363
|
self.set_sample_weight_col(sample_weight_col)
|
363
364
|
|
365
|
+
def _get_rand_id(self) -> str:
|
366
|
+
"""
|
367
|
+
Generate random id to be used in sproc and stage names.
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
Random id string usable in sproc, table, and stage names.
|
371
|
+
"""
|
372
|
+
return str(uuid4()).replace("-", "_").upper()
|
373
|
+
|
364
374
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
365
375
|
"""
|
366
376
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -439,7 +449,7 @@ class SGDRegressor(BaseTransformer):
|
|
439
449
|
cp.dump(self._sklearn_object, local_transform_file)
|
440
450
|
|
441
451
|
# Create temp stage to run fit.
|
442
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
452
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
443
453
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
444
454
|
SqlResultValidator(
|
445
455
|
session=session,
|
@@ -452,11 +462,12 @@ class SGDRegressor(BaseTransformer):
|
|
452
462
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
453
463
|
).validate()
|
454
464
|
|
455
|
-
|
465
|
+
# Use posixpath to construct stage paths
|
466
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
467
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
456
468
|
local_result_file_name = get_temp_file_path()
|
457
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
458
469
|
|
459
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
470
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
460
471
|
statement_params = telemetry.get_function_usage_statement_params(
|
461
472
|
project=_PROJECT,
|
462
473
|
subproject=_SUBPROJECT,
|
@@ -482,6 +493,7 @@ class SGDRegressor(BaseTransformer):
|
|
482
493
|
replace=True,
|
483
494
|
session=session,
|
484
495
|
statement_params=statement_params,
|
496
|
+
anonymous=True
|
485
497
|
)
|
486
498
|
def fit_wrapper_sproc(
|
487
499
|
session: Session,
|
@@ -490,7 +502,8 @@ class SGDRegressor(BaseTransformer):
|
|
490
502
|
stage_result_file_name: str,
|
491
503
|
input_cols: List[str],
|
492
504
|
label_cols: List[str],
|
493
|
-
sample_weight_col: Optional[str]
|
505
|
+
sample_weight_col: Optional[str],
|
506
|
+
statement_params: Dict[str, str]
|
494
507
|
) -> str:
|
495
508
|
import cloudpickle as cp
|
496
509
|
import numpy as np
|
@@ -557,15 +570,15 @@ class SGDRegressor(BaseTransformer):
|
|
557
570
|
api_calls=[Session.call],
|
558
571
|
custom_tags=dict([("autogen", True)]),
|
559
572
|
)
|
560
|
-
sproc_export_file_name =
|
561
|
-
|
573
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
574
|
+
session,
|
562
575
|
query,
|
563
576
|
stage_transform_file_name,
|
564
577
|
stage_result_file_name,
|
565
578
|
identifier.get_unescaped_names(self.input_cols),
|
566
579
|
identifier.get_unescaped_names(self.label_cols),
|
567
580
|
identifier.get_unescaped_names(self.sample_weight_col),
|
568
|
-
statement_params
|
581
|
+
statement_params,
|
569
582
|
)
|
570
583
|
|
571
584
|
if "|" in sproc_export_file_name:
|
@@ -575,7 +588,7 @@ class SGDRegressor(BaseTransformer):
|
|
575
588
|
print("\n".join(fields[1:]))
|
576
589
|
|
577
590
|
session.file.get(
|
578
|
-
|
591
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
579
592
|
local_result_file_name,
|
580
593
|
statement_params=statement_params
|
581
594
|
)
|
@@ -621,7 +634,7 @@ class SGDRegressor(BaseTransformer):
|
|
621
634
|
|
622
635
|
# Register vectorized UDF for batch inference
|
623
636
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
624
|
-
safe_id=self.
|
637
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
625
638
|
|
626
639
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
627
640
|
# will try to pickle all of self which fails.
|
@@ -713,7 +726,7 @@ class SGDRegressor(BaseTransformer):
|
|
713
726
|
return transformed_pandas_df.to_dict("records")
|
714
727
|
|
715
728
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
716
|
-
safe_id=self.
|
729
|
+
safe_id=self._get_rand_id()
|
717
730
|
)
|
718
731
|
|
719
732
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -769,26 +782,37 @@ class SGDRegressor(BaseTransformer):
|
|
769
782
|
# input cols need to match unquoted / quoted
|
770
783
|
input_cols = self.input_cols
|
771
784
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
785
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
772
786
|
|
773
787
|
estimator = self._sklearn_object
|
774
788
|
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
789
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
790
|
+
missing_features = []
|
791
|
+
features_in_dataset = set(dataset.columns)
|
792
|
+
columns_to_select = []
|
793
|
+
for i, f in enumerate(features_required_by_estimator):
|
794
|
+
if (
|
795
|
+
i >= len(input_cols)
|
796
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
797
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
798
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
799
|
+
):
|
800
|
+
missing_features.append(f)
|
801
|
+
elif input_cols[i] in features_in_dataset:
|
802
|
+
columns_to_select.append(input_cols[i])
|
803
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
804
|
+
columns_to_select.append(unquoted_input_cols[i])
|
805
|
+
else:
|
806
|
+
columns_to_select.append(quoted_input_cols[i])
|
807
|
+
|
808
|
+
if len(missing_features) > 0:
|
809
|
+
raise ValueError(
|
810
|
+
"The feature names should match with those that were passed during fit.\n"
|
811
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
812
|
+
f"Features in the input dataframe : {input_cols}\n"
|
813
|
+
)
|
814
|
+
input_df = dataset[columns_to_select]
|
815
|
+
input_df.columns = features_required_by_estimator
|
792
816
|
|
793
817
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
794
818
|
input_df
|
@@ -869,11 +893,18 @@ class SGDRegressor(BaseTransformer):
|
|
869
893
|
Transformed dataset.
|
870
894
|
"""
|
871
895
|
if isinstance(dataset, DataFrame):
|
896
|
+
expected_type_inferred = "float"
|
897
|
+
# when it is classifier, infer the datatype from label columns
|
898
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
899
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
900
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
901
|
+
)
|
902
|
+
|
872
903
|
output_df = self._batch_inference(
|
873
904
|
dataset=dataset,
|
874
905
|
inference_method="predict",
|
875
906
|
expected_output_cols_list=self.output_cols,
|
876
|
-
expected_output_cols_type=
|
907
|
+
expected_output_cols_type=expected_type_inferred,
|
877
908
|
)
|
878
909
|
elif isinstance(dataset, pd.DataFrame):
|
879
910
|
output_df = self._sklearn_inference(
|
@@ -944,10 +975,10 @@ class SGDRegressor(BaseTransformer):
|
|
944
975
|
|
945
976
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
946
977
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
947
|
-
Returns
|
978
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
948
979
|
"""
|
949
980
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
950
|
-
return []
|
981
|
+
return [output_cols_prefix]
|
951
982
|
|
952
983
|
classes = self._sklearn_object.classes_
|
953
984
|
if isinstance(classes, numpy.ndarray):
|
@@ -1172,7 +1203,7 @@ class SGDRegressor(BaseTransformer):
|
|
1172
1203
|
cp.dump(self._sklearn_object, local_score_file)
|
1173
1204
|
|
1174
1205
|
# Create temp stage to run score.
|
1175
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1206
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1176
1207
|
session = dataset._session
|
1177
1208
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1178
1209
|
SqlResultValidator(
|
@@ -1186,8 +1217,9 @@ class SGDRegressor(BaseTransformer):
|
|
1186
1217
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1187
1218
|
).validate()
|
1188
1219
|
|
1189
|
-
|
1190
|
-
|
1220
|
+
# Use posixpath to construct stage paths
|
1221
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1222
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1191
1223
|
statement_params = telemetry.get_function_usage_statement_params(
|
1192
1224
|
project=_PROJECT,
|
1193
1225
|
subproject=_SUBPROJECT,
|
@@ -1213,6 +1245,7 @@ class SGDRegressor(BaseTransformer):
|
|
1213
1245
|
replace=True,
|
1214
1246
|
session=session,
|
1215
1247
|
statement_params=statement_params,
|
1248
|
+
anonymous=True
|
1216
1249
|
)
|
1217
1250
|
def score_wrapper_sproc(
|
1218
1251
|
session: Session,
|
@@ -1220,7 +1253,8 @@ class SGDRegressor(BaseTransformer):
|
|
1220
1253
|
stage_score_file_name: str,
|
1221
1254
|
input_cols: List[str],
|
1222
1255
|
label_cols: List[str],
|
1223
|
-
sample_weight_col: Optional[str]
|
1256
|
+
sample_weight_col: Optional[str],
|
1257
|
+
statement_params: Dict[str, str]
|
1224
1258
|
) -> float:
|
1225
1259
|
import cloudpickle as cp
|
1226
1260
|
import numpy as np
|
@@ -1270,14 +1304,14 @@ class SGDRegressor(BaseTransformer):
|
|
1270
1304
|
api_calls=[Session.call],
|
1271
1305
|
custom_tags=dict([("autogen", True)]),
|
1272
1306
|
)
|
1273
|
-
score =
|
1274
|
-
|
1307
|
+
score = score_wrapper_sproc(
|
1308
|
+
session,
|
1275
1309
|
query,
|
1276
1310
|
stage_score_file_name,
|
1277
1311
|
identifier.get_unescaped_names(self.input_cols),
|
1278
1312
|
identifier.get_unescaped_names(self.label_cols),
|
1279
1313
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1280
|
-
statement_params
|
1314
|
+
statement_params,
|
1281
1315
|
)
|
1282
1316
|
|
1283
1317
|
cleanup_temp_files([local_score_file_name])
|
@@ -1295,18 +1329,20 @@ class SGDRegressor(BaseTransformer):
|
|
1295
1329
|
if self._sklearn_object._estimator_type == 'classifier':
|
1296
1330
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1297
1331
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1298
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1332
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1333
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1299
1334
|
# For regressor, the type of predict is float64
|
1300
1335
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1301
1336
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1302
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1303
|
-
|
1337
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1338
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1304
1339
|
for prob_func in PROB_FUNCTIONS:
|
1305
1340
|
if hasattr(self, prob_func):
|
1306
1341
|
output_cols_prefix: str = f"{prob_func}_"
|
1307
1342
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1308
1343
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1309
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1344
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1345
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1310
1346
|
|
1311
1347
|
@property
|
1312
1348
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -235,7 +237,6 @@ class TheilSenRegressor(BaseTransformer):
|
|
235
237
|
sample_weight_col: Optional[str] = None,
|
236
238
|
) -> None:
|
237
239
|
super().__init__()
|
238
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
239
240
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
240
241
|
|
241
242
|
self._deps = list(deps)
|
@@ -263,6 +264,15 @@ class TheilSenRegressor(BaseTransformer):
|
|
263
264
|
self.set_drop_input_cols(drop_input_cols)
|
264
265
|
self.set_sample_weight_col(sample_weight_col)
|
265
266
|
|
267
|
+
def _get_rand_id(self) -> str:
|
268
|
+
"""
|
269
|
+
Generate random id to be used in sproc and stage names.
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
Random id string usable in sproc, table, and stage names.
|
273
|
+
"""
|
274
|
+
return str(uuid4()).replace("-", "_").upper()
|
275
|
+
|
266
276
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
267
277
|
"""
|
268
278
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -341,7 +351,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
341
351
|
cp.dump(self._sklearn_object, local_transform_file)
|
342
352
|
|
343
353
|
# Create temp stage to run fit.
|
344
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
354
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
345
355
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
346
356
|
SqlResultValidator(
|
347
357
|
session=session,
|
@@ -354,11 +364,12 @@ class TheilSenRegressor(BaseTransformer):
|
|
354
364
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
355
365
|
).validate()
|
356
366
|
|
357
|
-
|
367
|
+
# Use posixpath to construct stage paths
|
368
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
358
370
|
local_result_file_name = get_temp_file_path()
|
359
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
360
371
|
|
361
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
372
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
362
373
|
statement_params = telemetry.get_function_usage_statement_params(
|
363
374
|
project=_PROJECT,
|
364
375
|
subproject=_SUBPROJECT,
|
@@ -384,6 +395,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
384
395
|
replace=True,
|
385
396
|
session=session,
|
386
397
|
statement_params=statement_params,
|
398
|
+
anonymous=True
|
387
399
|
)
|
388
400
|
def fit_wrapper_sproc(
|
389
401
|
session: Session,
|
@@ -392,7 +404,8 @@ class TheilSenRegressor(BaseTransformer):
|
|
392
404
|
stage_result_file_name: str,
|
393
405
|
input_cols: List[str],
|
394
406
|
label_cols: List[str],
|
395
|
-
sample_weight_col: Optional[str]
|
407
|
+
sample_weight_col: Optional[str],
|
408
|
+
statement_params: Dict[str, str]
|
396
409
|
) -> str:
|
397
410
|
import cloudpickle as cp
|
398
411
|
import numpy as np
|
@@ -459,15 +472,15 @@ class TheilSenRegressor(BaseTransformer):
|
|
459
472
|
api_calls=[Session.call],
|
460
473
|
custom_tags=dict([("autogen", True)]),
|
461
474
|
)
|
462
|
-
sproc_export_file_name =
|
463
|
-
|
475
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
476
|
+
session,
|
464
477
|
query,
|
465
478
|
stage_transform_file_name,
|
466
479
|
stage_result_file_name,
|
467
480
|
identifier.get_unescaped_names(self.input_cols),
|
468
481
|
identifier.get_unescaped_names(self.label_cols),
|
469
482
|
identifier.get_unescaped_names(self.sample_weight_col),
|
470
|
-
statement_params
|
483
|
+
statement_params,
|
471
484
|
)
|
472
485
|
|
473
486
|
if "|" in sproc_export_file_name:
|
@@ -477,7 +490,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
477
490
|
print("\n".join(fields[1:]))
|
478
491
|
|
479
492
|
session.file.get(
|
480
|
-
|
493
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
481
494
|
local_result_file_name,
|
482
495
|
statement_params=statement_params
|
483
496
|
)
|
@@ -523,7 +536,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
523
536
|
|
524
537
|
# Register vectorized UDF for batch inference
|
525
538
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
526
|
-
safe_id=self.
|
539
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
527
540
|
|
528
541
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
529
542
|
# will try to pickle all of self which fails.
|
@@ -615,7 +628,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
615
628
|
return transformed_pandas_df.to_dict("records")
|
616
629
|
|
617
630
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
618
|
-
safe_id=self.
|
631
|
+
safe_id=self._get_rand_id()
|
619
632
|
)
|
620
633
|
|
621
634
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -671,26 +684,37 @@ class TheilSenRegressor(BaseTransformer):
|
|
671
684
|
# input cols need to match unquoted / quoted
|
672
685
|
input_cols = self.input_cols
|
673
686
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
687
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
674
688
|
|
675
689
|
estimator = self._sklearn_object
|
676
690
|
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
691
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
692
|
+
missing_features = []
|
693
|
+
features_in_dataset = set(dataset.columns)
|
694
|
+
columns_to_select = []
|
695
|
+
for i, f in enumerate(features_required_by_estimator):
|
696
|
+
if (
|
697
|
+
i >= len(input_cols)
|
698
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
699
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
700
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
701
|
+
):
|
702
|
+
missing_features.append(f)
|
703
|
+
elif input_cols[i] in features_in_dataset:
|
704
|
+
columns_to_select.append(input_cols[i])
|
705
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
706
|
+
columns_to_select.append(unquoted_input_cols[i])
|
707
|
+
else:
|
708
|
+
columns_to_select.append(quoted_input_cols[i])
|
709
|
+
|
710
|
+
if len(missing_features) > 0:
|
711
|
+
raise ValueError(
|
712
|
+
"The feature names should match with those that were passed during fit.\n"
|
713
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
714
|
+
f"Features in the input dataframe : {input_cols}\n"
|
715
|
+
)
|
716
|
+
input_df = dataset[columns_to_select]
|
717
|
+
input_df.columns = features_required_by_estimator
|
694
718
|
|
695
719
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
696
720
|
input_df
|
@@ -771,11 +795,18 @@ class TheilSenRegressor(BaseTransformer):
|
|
771
795
|
Transformed dataset.
|
772
796
|
"""
|
773
797
|
if isinstance(dataset, DataFrame):
|
798
|
+
expected_type_inferred = "float"
|
799
|
+
# when it is classifier, infer the datatype from label columns
|
800
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
801
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
802
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
803
|
+
)
|
804
|
+
|
774
805
|
output_df = self._batch_inference(
|
775
806
|
dataset=dataset,
|
776
807
|
inference_method="predict",
|
777
808
|
expected_output_cols_list=self.output_cols,
|
778
|
-
expected_output_cols_type=
|
809
|
+
expected_output_cols_type=expected_type_inferred,
|
779
810
|
)
|
780
811
|
elif isinstance(dataset, pd.DataFrame):
|
781
812
|
output_df = self._sklearn_inference(
|
@@ -846,10 +877,10 @@ class TheilSenRegressor(BaseTransformer):
|
|
846
877
|
|
847
878
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
848
879
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
849
|
-
Returns
|
880
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
850
881
|
"""
|
851
882
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
852
|
-
return []
|
883
|
+
return [output_cols_prefix]
|
853
884
|
|
854
885
|
classes = self._sklearn_object.classes_
|
855
886
|
if isinstance(classes, numpy.ndarray):
|
@@ -1074,7 +1105,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
1074
1105
|
cp.dump(self._sklearn_object, local_score_file)
|
1075
1106
|
|
1076
1107
|
# Create temp stage to run score.
|
1077
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1108
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1078
1109
|
session = dataset._session
|
1079
1110
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1080
1111
|
SqlResultValidator(
|
@@ -1088,8 +1119,9 @@ class TheilSenRegressor(BaseTransformer):
|
|
1088
1119
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1089
1120
|
).validate()
|
1090
1121
|
|
1091
|
-
|
1092
|
-
|
1122
|
+
# Use posixpath to construct stage paths
|
1123
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1124
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1093
1125
|
statement_params = telemetry.get_function_usage_statement_params(
|
1094
1126
|
project=_PROJECT,
|
1095
1127
|
subproject=_SUBPROJECT,
|
@@ -1115,6 +1147,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
1115
1147
|
replace=True,
|
1116
1148
|
session=session,
|
1117
1149
|
statement_params=statement_params,
|
1150
|
+
anonymous=True
|
1118
1151
|
)
|
1119
1152
|
def score_wrapper_sproc(
|
1120
1153
|
session: Session,
|
@@ -1122,7 +1155,8 @@ class TheilSenRegressor(BaseTransformer):
|
|
1122
1155
|
stage_score_file_name: str,
|
1123
1156
|
input_cols: List[str],
|
1124
1157
|
label_cols: List[str],
|
1125
|
-
sample_weight_col: Optional[str]
|
1158
|
+
sample_weight_col: Optional[str],
|
1159
|
+
statement_params: Dict[str, str]
|
1126
1160
|
) -> float:
|
1127
1161
|
import cloudpickle as cp
|
1128
1162
|
import numpy as np
|
@@ -1172,14 +1206,14 @@ class TheilSenRegressor(BaseTransformer):
|
|
1172
1206
|
api_calls=[Session.call],
|
1173
1207
|
custom_tags=dict([("autogen", True)]),
|
1174
1208
|
)
|
1175
|
-
score =
|
1176
|
-
|
1209
|
+
score = score_wrapper_sproc(
|
1210
|
+
session,
|
1177
1211
|
query,
|
1178
1212
|
stage_score_file_name,
|
1179
1213
|
identifier.get_unescaped_names(self.input_cols),
|
1180
1214
|
identifier.get_unescaped_names(self.label_cols),
|
1181
1215
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1182
|
-
statement_params
|
1216
|
+
statement_params,
|
1183
1217
|
)
|
1184
1218
|
|
1185
1219
|
cleanup_temp_files([local_score_file_name])
|
@@ -1197,18 +1231,20 @@ class TheilSenRegressor(BaseTransformer):
|
|
1197
1231
|
if self._sklearn_object._estimator_type == 'classifier':
|
1198
1232
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1199
1233
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1200
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1234
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1235
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1201
1236
|
# For regressor, the type of predict is float64
|
1202
1237
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1203
1238
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1204
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1205
|
-
|
1239
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1240
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1206
1241
|
for prob_func in PROB_FUNCTIONS:
|
1207
1242
|
if hasattr(self, prob_func):
|
1208
1243
|
output_cols_prefix: str = f"{prob_func}_"
|
1209
1244
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1210
1245
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1211
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1246
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1247
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1212
1248
|
|
1213
1249
|
@property
|
1214
1250
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|