snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -203,7 +205,6 @@ class VotingRegressor(BaseTransformer):
|
|
203
205
|
sample_weight_col: Optional[str] = None,
|
204
206
|
) -> None:
|
205
207
|
super().__init__()
|
206
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
207
208
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
208
209
|
deps = deps | _gather_dependencies(estimators)
|
209
210
|
self._deps = list(deps)
|
@@ -226,6 +227,15 @@ class VotingRegressor(BaseTransformer):
|
|
226
227
|
self.set_drop_input_cols(drop_input_cols)
|
227
228
|
self.set_sample_weight_col(sample_weight_col)
|
228
229
|
|
230
|
+
def _get_rand_id(self) -> str:
|
231
|
+
"""
|
232
|
+
Generate random id to be used in sproc and stage names.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
Random id string usable in sproc, table, and stage names.
|
236
|
+
"""
|
237
|
+
return str(uuid4()).replace("-", "_").upper()
|
238
|
+
|
229
239
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
230
240
|
"""
|
231
241
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -304,7 +314,7 @@ class VotingRegressor(BaseTransformer):
|
|
304
314
|
cp.dump(self._sklearn_object, local_transform_file)
|
305
315
|
|
306
316
|
# Create temp stage to run fit.
|
307
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
317
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
308
318
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
309
319
|
SqlResultValidator(
|
310
320
|
session=session,
|
@@ -317,11 +327,12 @@ class VotingRegressor(BaseTransformer):
|
|
317
327
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
318
328
|
).validate()
|
319
329
|
|
320
|
-
|
330
|
+
# Use posixpath to construct stage paths
|
331
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
332
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
321
333
|
local_result_file_name = get_temp_file_path()
|
322
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
323
334
|
|
324
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
335
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
325
336
|
statement_params = telemetry.get_function_usage_statement_params(
|
326
337
|
project=_PROJECT,
|
327
338
|
subproject=_SUBPROJECT,
|
@@ -347,6 +358,7 @@ class VotingRegressor(BaseTransformer):
|
|
347
358
|
replace=True,
|
348
359
|
session=session,
|
349
360
|
statement_params=statement_params,
|
361
|
+
anonymous=True
|
350
362
|
)
|
351
363
|
def fit_wrapper_sproc(
|
352
364
|
session: Session,
|
@@ -355,7 +367,8 @@ class VotingRegressor(BaseTransformer):
|
|
355
367
|
stage_result_file_name: str,
|
356
368
|
input_cols: List[str],
|
357
369
|
label_cols: List[str],
|
358
|
-
sample_weight_col: Optional[str]
|
370
|
+
sample_weight_col: Optional[str],
|
371
|
+
statement_params: Dict[str, str]
|
359
372
|
) -> str:
|
360
373
|
import cloudpickle as cp
|
361
374
|
import numpy as np
|
@@ -422,15 +435,15 @@ class VotingRegressor(BaseTransformer):
|
|
422
435
|
api_calls=[Session.call],
|
423
436
|
custom_tags=dict([("autogen", True)]),
|
424
437
|
)
|
425
|
-
sproc_export_file_name =
|
426
|
-
|
438
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
439
|
+
session,
|
427
440
|
query,
|
428
441
|
stage_transform_file_name,
|
429
442
|
stage_result_file_name,
|
430
443
|
identifier.get_unescaped_names(self.input_cols),
|
431
444
|
identifier.get_unescaped_names(self.label_cols),
|
432
445
|
identifier.get_unescaped_names(self.sample_weight_col),
|
433
|
-
statement_params
|
446
|
+
statement_params,
|
434
447
|
)
|
435
448
|
|
436
449
|
if "|" in sproc_export_file_name:
|
@@ -440,7 +453,7 @@ class VotingRegressor(BaseTransformer):
|
|
440
453
|
print("\n".join(fields[1:]))
|
441
454
|
|
442
455
|
session.file.get(
|
443
|
-
|
456
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
444
457
|
local_result_file_name,
|
445
458
|
statement_params=statement_params
|
446
459
|
)
|
@@ -486,7 +499,7 @@ class VotingRegressor(BaseTransformer):
|
|
486
499
|
|
487
500
|
# Register vectorized UDF for batch inference
|
488
501
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
489
|
-
safe_id=self.
|
502
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
490
503
|
|
491
504
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
492
505
|
# will try to pickle all of self which fails.
|
@@ -578,7 +591,7 @@ class VotingRegressor(BaseTransformer):
|
|
578
591
|
return transformed_pandas_df.to_dict("records")
|
579
592
|
|
580
593
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
581
|
-
safe_id=self.
|
594
|
+
safe_id=self._get_rand_id()
|
582
595
|
)
|
583
596
|
|
584
597
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -634,26 +647,37 @@ class VotingRegressor(BaseTransformer):
|
|
634
647
|
# input cols need to match unquoted / quoted
|
635
648
|
input_cols = self.input_cols
|
636
649
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
650
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
637
651
|
|
638
652
|
estimator = self._sklearn_object
|
639
653
|
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
654
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
655
|
+
missing_features = []
|
656
|
+
features_in_dataset = set(dataset.columns)
|
657
|
+
columns_to_select = []
|
658
|
+
for i, f in enumerate(features_required_by_estimator):
|
659
|
+
if (
|
660
|
+
i >= len(input_cols)
|
661
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
662
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
663
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
664
|
+
):
|
665
|
+
missing_features.append(f)
|
666
|
+
elif input_cols[i] in features_in_dataset:
|
667
|
+
columns_to_select.append(input_cols[i])
|
668
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
669
|
+
columns_to_select.append(unquoted_input_cols[i])
|
670
|
+
else:
|
671
|
+
columns_to_select.append(quoted_input_cols[i])
|
672
|
+
|
673
|
+
if len(missing_features) > 0:
|
674
|
+
raise ValueError(
|
675
|
+
"The feature names should match with those that were passed during fit.\n"
|
676
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
677
|
+
f"Features in the input dataframe : {input_cols}\n"
|
678
|
+
)
|
679
|
+
input_df = dataset[columns_to_select]
|
680
|
+
input_df.columns = features_required_by_estimator
|
657
681
|
|
658
682
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
659
683
|
input_df
|
@@ -734,11 +758,18 @@ class VotingRegressor(BaseTransformer):
|
|
734
758
|
Transformed dataset.
|
735
759
|
"""
|
736
760
|
if isinstance(dataset, DataFrame):
|
761
|
+
expected_type_inferred = "float"
|
762
|
+
# when it is classifier, infer the datatype from label columns
|
763
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
764
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
765
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
766
|
+
)
|
767
|
+
|
737
768
|
output_df = self._batch_inference(
|
738
769
|
dataset=dataset,
|
739
770
|
inference_method="predict",
|
740
771
|
expected_output_cols_list=self.output_cols,
|
741
|
-
expected_output_cols_type=
|
772
|
+
expected_output_cols_type=expected_type_inferred,
|
742
773
|
)
|
743
774
|
elif isinstance(dataset, pd.DataFrame):
|
744
775
|
output_df = self._sklearn_inference(
|
@@ -811,10 +842,10 @@ class VotingRegressor(BaseTransformer):
|
|
811
842
|
|
812
843
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
813
844
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
814
|
-
Returns
|
845
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
815
846
|
"""
|
816
847
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
817
|
-
return []
|
848
|
+
return [output_cols_prefix]
|
818
849
|
|
819
850
|
classes = self._sklearn_object.classes_
|
820
851
|
if isinstance(classes, numpy.ndarray):
|
@@ -1039,7 +1070,7 @@ class VotingRegressor(BaseTransformer):
|
|
1039
1070
|
cp.dump(self._sklearn_object, local_score_file)
|
1040
1071
|
|
1041
1072
|
# Create temp stage to run score.
|
1042
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1073
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1043
1074
|
session = dataset._session
|
1044
1075
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1045
1076
|
SqlResultValidator(
|
@@ -1053,8 +1084,9 @@ class VotingRegressor(BaseTransformer):
|
|
1053
1084
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1054
1085
|
).validate()
|
1055
1086
|
|
1056
|
-
|
1057
|
-
|
1087
|
+
# Use posixpath to construct stage paths
|
1088
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1089
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1058
1090
|
statement_params = telemetry.get_function_usage_statement_params(
|
1059
1091
|
project=_PROJECT,
|
1060
1092
|
subproject=_SUBPROJECT,
|
@@ -1080,6 +1112,7 @@ class VotingRegressor(BaseTransformer):
|
|
1080
1112
|
replace=True,
|
1081
1113
|
session=session,
|
1082
1114
|
statement_params=statement_params,
|
1115
|
+
anonymous=True
|
1083
1116
|
)
|
1084
1117
|
def score_wrapper_sproc(
|
1085
1118
|
session: Session,
|
@@ -1087,7 +1120,8 @@ class VotingRegressor(BaseTransformer):
|
|
1087
1120
|
stage_score_file_name: str,
|
1088
1121
|
input_cols: List[str],
|
1089
1122
|
label_cols: List[str],
|
1090
|
-
sample_weight_col: Optional[str]
|
1123
|
+
sample_weight_col: Optional[str],
|
1124
|
+
statement_params: Dict[str, str]
|
1091
1125
|
) -> float:
|
1092
1126
|
import cloudpickle as cp
|
1093
1127
|
import numpy as np
|
@@ -1137,14 +1171,14 @@ class VotingRegressor(BaseTransformer):
|
|
1137
1171
|
api_calls=[Session.call],
|
1138
1172
|
custom_tags=dict([("autogen", True)]),
|
1139
1173
|
)
|
1140
|
-
score =
|
1141
|
-
|
1174
|
+
score = score_wrapper_sproc(
|
1175
|
+
session,
|
1142
1176
|
query,
|
1143
1177
|
stage_score_file_name,
|
1144
1178
|
identifier.get_unescaped_names(self.input_cols),
|
1145
1179
|
identifier.get_unescaped_names(self.label_cols),
|
1146
1180
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1147
|
-
statement_params
|
1181
|
+
statement_params,
|
1148
1182
|
)
|
1149
1183
|
|
1150
1184
|
cleanup_temp_files([local_score_file_name])
|
@@ -1162,18 +1196,20 @@ class VotingRegressor(BaseTransformer):
|
|
1162
1196
|
if self._sklearn_object._estimator_type == 'classifier':
|
1163
1197
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1164
1198
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1165
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1199
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1200
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1166
1201
|
# For regressor, the type of predict is float64
|
1167
1202
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1168
1203
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1169
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1170
|
-
|
1204
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1205
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1171
1206
|
for prob_func in PROB_FUNCTIONS:
|
1172
1207
|
if hasattr(self, prob_func):
|
1173
1208
|
output_cols_prefix: str = f"{prob_func}_"
|
1174
1209
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1175
1210
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1176
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1211
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1212
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1177
1213
|
|
1178
1214
|
@property
|
1179
1215
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -28,6 +29,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
28
29
|
from snowflake.snowpark import DataFrame, Session
|
29
30
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
30
31
|
from snowflake.snowpark.types import PandasSeries
|
32
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
31
33
|
|
32
34
|
from snowflake.ml.model.model_signature import (
|
33
35
|
DataType,
|
@@ -194,7 +196,6 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
194
196
|
sample_weight_col: Optional[str] = None,
|
195
197
|
) -> None:
|
196
198
|
super().__init__()
|
197
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
198
199
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
199
200
|
|
200
201
|
self._deps = list(deps)
|
@@ -216,6 +217,15 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
216
217
|
self.set_drop_input_cols(drop_input_cols)
|
217
218
|
self.set_sample_weight_col(sample_weight_col)
|
218
219
|
|
220
|
+
def _get_rand_id(self) -> str:
|
221
|
+
"""
|
222
|
+
Generate random id to be used in sproc and stage names.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
Random id string usable in sproc, table, and stage names.
|
226
|
+
"""
|
227
|
+
return str(uuid4()).replace("-", "_").upper()
|
228
|
+
|
219
229
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
220
230
|
"""
|
221
231
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -294,7 +304,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
294
304
|
cp.dump(self._sklearn_object, local_transform_file)
|
295
305
|
|
296
306
|
# Create temp stage to run fit.
|
297
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
307
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
298
308
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
299
309
|
SqlResultValidator(
|
300
310
|
session=session,
|
@@ -307,11 +317,12 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
307
317
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
308
318
|
).validate()
|
309
319
|
|
310
|
-
|
320
|
+
# Use posixpath to construct stage paths
|
321
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
322
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
311
323
|
local_result_file_name = get_temp_file_path()
|
312
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
313
324
|
|
314
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
325
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
315
326
|
statement_params = telemetry.get_function_usage_statement_params(
|
316
327
|
project=_PROJECT,
|
317
328
|
subproject=_SUBPROJECT,
|
@@ -337,6 +348,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
337
348
|
replace=True,
|
338
349
|
session=session,
|
339
350
|
statement_params=statement_params,
|
351
|
+
anonymous=True
|
340
352
|
)
|
341
353
|
def fit_wrapper_sproc(
|
342
354
|
session: Session,
|
@@ -345,7 +357,8 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
345
357
|
stage_result_file_name: str,
|
346
358
|
input_cols: List[str],
|
347
359
|
label_cols: List[str],
|
348
|
-
sample_weight_col: Optional[str]
|
360
|
+
sample_weight_col: Optional[str],
|
361
|
+
statement_params: Dict[str, str]
|
349
362
|
) -> str:
|
350
363
|
import cloudpickle as cp
|
351
364
|
import numpy as np
|
@@ -412,15 +425,15 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
412
425
|
api_calls=[Session.call],
|
413
426
|
custom_tags=dict([("autogen", True)]),
|
414
427
|
)
|
415
|
-
sproc_export_file_name =
|
416
|
-
|
428
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
429
|
+
session,
|
417
430
|
query,
|
418
431
|
stage_transform_file_name,
|
419
432
|
stage_result_file_name,
|
420
433
|
identifier.get_unescaped_names(self.input_cols),
|
421
434
|
identifier.get_unescaped_names(self.label_cols),
|
422
435
|
identifier.get_unescaped_names(self.sample_weight_col),
|
423
|
-
statement_params
|
436
|
+
statement_params,
|
424
437
|
)
|
425
438
|
|
426
439
|
if "|" in sproc_export_file_name:
|
@@ -430,7 +443,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
430
443
|
print("\n".join(fields[1:]))
|
431
444
|
|
432
445
|
session.file.get(
|
433
|
-
|
446
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
434
447
|
local_result_file_name,
|
435
448
|
statement_params=statement_params
|
436
449
|
)
|
@@ -476,7 +489,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
476
489
|
|
477
490
|
# Register vectorized UDF for batch inference
|
478
491
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
479
|
-
safe_id=self.
|
492
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
480
493
|
|
481
494
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
482
495
|
# will try to pickle all of self which fails.
|
@@ -568,7 +581,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
568
581
|
return transformed_pandas_df.to_dict("records")
|
569
582
|
|
570
583
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
571
|
-
safe_id=self.
|
584
|
+
safe_id=self._get_rand_id()
|
572
585
|
)
|
573
586
|
|
574
587
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -624,26 +637,37 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
624
637
|
# input cols need to match unquoted / quoted
|
625
638
|
input_cols = self.input_cols
|
626
639
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
640
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
627
641
|
|
628
642
|
estimator = self._sklearn_object
|
629
643
|
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
644
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
645
|
+
missing_features = []
|
646
|
+
features_in_dataset = set(dataset.columns)
|
647
|
+
columns_to_select = []
|
648
|
+
for i, f in enumerate(features_required_by_estimator):
|
649
|
+
if (
|
650
|
+
i >= len(input_cols)
|
651
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
652
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
653
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
654
|
+
):
|
655
|
+
missing_features.append(f)
|
656
|
+
elif input_cols[i] in features_in_dataset:
|
657
|
+
columns_to_select.append(input_cols[i])
|
658
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
659
|
+
columns_to_select.append(unquoted_input_cols[i])
|
660
|
+
else:
|
661
|
+
columns_to_select.append(quoted_input_cols[i])
|
662
|
+
|
663
|
+
if len(missing_features) > 0:
|
664
|
+
raise ValueError(
|
665
|
+
"The feature names should match with those that were passed during fit.\n"
|
666
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
667
|
+
f"Features in the input dataframe : {input_cols}\n"
|
668
|
+
)
|
669
|
+
input_df = dataset[columns_to_select]
|
670
|
+
input_df.columns = features_required_by_estimator
|
647
671
|
|
648
672
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
649
673
|
input_df
|
@@ -722,11 +746,18 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
722
746
|
Transformed dataset.
|
723
747
|
"""
|
724
748
|
if isinstance(dataset, DataFrame):
|
749
|
+
expected_type_inferred = ""
|
750
|
+
# when it is classifier, infer the datatype from label columns
|
751
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
752
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
753
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
754
|
+
)
|
755
|
+
|
725
756
|
output_df = self._batch_inference(
|
726
757
|
dataset=dataset,
|
727
758
|
inference_method="predict",
|
728
759
|
expected_output_cols_list=self.output_cols,
|
729
|
-
expected_output_cols_type=
|
760
|
+
expected_output_cols_type=expected_type_inferred,
|
730
761
|
)
|
731
762
|
elif isinstance(dataset, pd.DataFrame):
|
732
763
|
output_df = self._sklearn_inference(
|
@@ -799,10 +830,10 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
799
830
|
|
800
831
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
801
832
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
802
|
-
Returns
|
833
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
803
834
|
"""
|
804
835
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
805
|
-
return []
|
836
|
+
return [output_cols_prefix]
|
806
837
|
|
807
838
|
classes = self._sklearn_object.classes_
|
808
839
|
if isinstance(classes, numpy.ndarray):
|
@@ -1027,7 +1058,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1027
1058
|
cp.dump(self._sklearn_object, local_score_file)
|
1028
1059
|
|
1029
1060
|
# Create temp stage to run score.
|
1030
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1061
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1031
1062
|
session = dataset._session
|
1032
1063
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1033
1064
|
SqlResultValidator(
|
@@ -1041,8 +1072,9 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1041
1072
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1042
1073
|
).validate()
|
1043
1074
|
|
1044
|
-
|
1045
|
-
|
1075
|
+
# Use posixpath to construct stage paths
|
1076
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1077
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1046
1078
|
statement_params = telemetry.get_function_usage_statement_params(
|
1047
1079
|
project=_PROJECT,
|
1048
1080
|
subproject=_SUBPROJECT,
|
@@ -1068,6 +1100,7 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1068
1100
|
replace=True,
|
1069
1101
|
session=session,
|
1070
1102
|
statement_params=statement_params,
|
1103
|
+
anonymous=True
|
1071
1104
|
)
|
1072
1105
|
def score_wrapper_sproc(
|
1073
1106
|
session: Session,
|
@@ -1075,7 +1108,8 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1075
1108
|
stage_score_file_name: str,
|
1076
1109
|
input_cols: List[str],
|
1077
1110
|
label_cols: List[str],
|
1078
|
-
sample_weight_col: Optional[str]
|
1111
|
+
sample_weight_col: Optional[str],
|
1112
|
+
statement_params: Dict[str, str]
|
1079
1113
|
) -> float:
|
1080
1114
|
import cloudpickle as cp
|
1081
1115
|
import numpy as np
|
@@ -1125,14 +1159,14 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1125
1159
|
api_calls=[Session.call],
|
1126
1160
|
custom_tags=dict([("autogen", True)]),
|
1127
1161
|
)
|
1128
|
-
score =
|
1129
|
-
|
1162
|
+
score = score_wrapper_sproc(
|
1163
|
+
session,
|
1130
1164
|
query,
|
1131
1165
|
stage_score_file_name,
|
1132
1166
|
identifier.get_unescaped_names(self.input_cols),
|
1133
1167
|
identifier.get_unescaped_names(self.label_cols),
|
1134
1168
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1135
|
-
statement_params
|
1169
|
+
statement_params,
|
1136
1170
|
)
|
1137
1171
|
|
1138
1172
|
cleanup_temp_files([local_score_file_name])
|
@@ -1150,18 +1184,20 @@ class GenericUnivariateSelect(BaseTransformer):
|
|
1150
1184
|
if self._sklearn_object._estimator_type == 'classifier':
|
1151
1185
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1152
1186
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1153
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1187
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1188
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1154
1189
|
# For regressor, the type of predict is float64
|
1155
1190
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1156
1191
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1157
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1158
|
-
|
1192
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1193
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1159
1194
|
for prob_func in PROB_FUNCTIONS:
|
1160
1195
|
if hasattr(self, prob_func):
|
1161
1196
|
output_cols_prefix: str = f"{prob_func}_"
|
1162
1197
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1163
1198
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1164
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1199
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1200
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1165
1201
|
|
1166
1202
|
@property
|
1167
1203
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|