snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -28,6 +29,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
28
29
|
from snowflake.snowpark import DataFrame, Session
|
29
30
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
30
31
|
from snowflake.snowpark.types import PandasSeries
|
32
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
31
33
|
|
32
34
|
from snowflake.ml.model.model_signature import (
|
33
35
|
DataType,
|
@@ -191,7 +193,6 @@ class SelectPercentile(BaseTransformer):
|
|
191
193
|
sample_weight_col: Optional[str] = None,
|
192
194
|
) -> None:
|
193
195
|
super().__init__()
|
194
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
195
196
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
196
197
|
|
197
198
|
self._deps = list(deps)
|
@@ -212,6 +213,15 @@ class SelectPercentile(BaseTransformer):
|
|
212
213
|
self.set_drop_input_cols(drop_input_cols)
|
213
214
|
self.set_sample_weight_col(sample_weight_col)
|
214
215
|
|
216
|
+
def _get_rand_id(self) -> str:
|
217
|
+
"""
|
218
|
+
Generate random id to be used in sproc and stage names.
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
Random id string usable in sproc, table, and stage names.
|
222
|
+
"""
|
223
|
+
return str(uuid4()).replace("-", "_").upper()
|
224
|
+
|
215
225
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
216
226
|
"""
|
217
227
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -290,7 +300,7 @@ class SelectPercentile(BaseTransformer):
|
|
290
300
|
cp.dump(self._sklearn_object, local_transform_file)
|
291
301
|
|
292
302
|
# Create temp stage to run fit.
|
293
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
303
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
294
304
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
295
305
|
SqlResultValidator(
|
296
306
|
session=session,
|
@@ -303,11 +313,12 @@ class SelectPercentile(BaseTransformer):
|
|
303
313
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
304
314
|
).validate()
|
305
315
|
|
306
|
-
|
316
|
+
# Use posixpath to construct stage paths
|
317
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
318
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
307
319
|
local_result_file_name = get_temp_file_path()
|
308
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
309
320
|
|
310
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
321
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
311
322
|
statement_params = telemetry.get_function_usage_statement_params(
|
312
323
|
project=_PROJECT,
|
313
324
|
subproject=_SUBPROJECT,
|
@@ -333,6 +344,7 @@ class SelectPercentile(BaseTransformer):
|
|
333
344
|
replace=True,
|
334
345
|
session=session,
|
335
346
|
statement_params=statement_params,
|
347
|
+
anonymous=True
|
336
348
|
)
|
337
349
|
def fit_wrapper_sproc(
|
338
350
|
session: Session,
|
@@ -341,7 +353,8 @@ class SelectPercentile(BaseTransformer):
|
|
341
353
|
stage_result_file_name: str,
|
342
354
|
input_cols: List[str],
|
343
355
|
label_cols: List[str],
|
344
|
-
sample_weight_col: Optional[str]
|
356
|
+
sample_weight_col: Optional[str],
|
357
|
+
statement_params: Dict[str, str]
|
345
358
|
) -> str:
|
346
359
|
import cloudpickle as cp
|
347
360
|
import numpy as np
|
@@ -408,15 +421,15 @@ class SelectPercentile(BaseTransformer):
|
|
408
421
|
api_calls=[Session.call],
|
409
422
|
custom_tags=dict([("autogen", True)]),
|
410
423
|
)
|
411
|
-
sproc_export_file_name =
|
412
|
-
|
424
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
425
|
+
session,
|
413
426
|
query,
|
414
427
|
stage_transform_file_name,
|
415
428
|
stage_result_file_name,
|
416
429
|
identifier.get_unescaped_names(self.input_cols),
|
417
430
|
identifier.get_unescaped_names(self.label_cols),
|
418
431
|
identifier.get_unescaped_names(self.sample_weight_col),
|
419
|
-
statement_params
|
432
|
+
statement_params,
|
420
433
|
)
|
421
434
|
|
422
435
|
if "|" in sproc_export_file_name:
|
@@ -426,7 +439,7 @@ class SelectPercentile(BaseTransformer):
|
|
426
439
|
print("\n".join(fields[1:]))
|
427
440
|
|
428
441
|
session.file.get(
|
429
|
-
|
442
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
430
443
|
local_result_file_name,
|
431
444
|
statement_params=statement_params
|
432
445
|
)
|
@@ -472,7 +485,7 @@ class SelectPercentile(BaseTransformer):
|
|
472
485
|
|
473
486
|
# Register vectorized UDF for batch inference
|
474
487
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
475
|
-
safe_id=self.
|
488
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
476
489
|
|
477
490
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
478
491
|
# will try to pickle all of self which fails.
|
@@ -564,7 +577,7 @@ class SelectPercentile(BaseTransformer):
|
|
564
577
|
return transformed_pandas_df.to_dict("records")
|
565
578
|
|
566
579
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
567
|
-
safe_id=self.
|
580
|
+
safe_id=self._get_rand_id()
|
568
581
|
)
|
569
582
|
|
570
583
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -620,26 +633,37 @@ class SelectPercentile(BaseTransformer):
|
|
620
633
|
# input cols need to match unquoted / quoted
|
621
634
|
input_cols = self.input_cols
|
622
635
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
636
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
623
637
|
|
624
638
|
estimator = self._sklearn_object
|
625
639
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
640
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
641
|
+
missing_features = []
|
642
|
+
features_in_dataset = set(dataset.columns)
|
643
|
+
columns_to_select = []
|
644
|
+
for i, f in enumerate(features_required_by_estimator):
|
645
|
+
if (
|
646
|
+
i >= len(input_cols)
|
647
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
648
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
649
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
650
|
+
):
|
651
|
+
missing_features.append(f)
|
652
|
+
elif input_cols[i] in features_in_dataset:
|
653
|
+
columns_to_select.append(input_cols[i])
|
654
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
655
|
+
columns_to_select.append(unquoted_input_cols[i])
|
656
|
+
else:
|
657
|
+
columns_to_select.append(quoted_input_cols[i])
|
658
|
+
|
659
|
+
if len(missing_features) > 0:
|
660
|
+
raise ValueError(
|
661
|
+
"The feature names should match with those that were passed during fit.\n"
|
662
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
663
|
+
f"Features in the input dataframe : {input_cols}\n"
|
664
|
+
)
|
665
|
+
input_df = dataset[columns_to_select]
|
666
|
+
input_df.columns = features_required_by_estimator
|
643
667
|
|
644
668
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
645
669
|
input_df
|
@@ -718,11 +742,18 @@ class SelectPercentile(BaseTransformer):
|
|
718
742
|
Transformed dataset.
|
719
743
|
"""
|
720
744
|
if isinstance(dataset, DataFrame):
|
745
|
+
expected_type_inferred = ""
|
746
|
+
# when it is classifier, infer the datatype from label columns
|
747
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
748
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
749
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
750
|
+
)
|
751
|
+
|
721
752
|
output_df = self._batch_inference(
|
722
753
|
dataset=dataset,
|
723
754
|
inference_method="predict",
|
724
755
|
expected_output_cols_list=self.output_cols,
|
725
|
-
expected_output_cols_type=
|
756
|
+
expected_output_cols_type=expected_type_inferred,
|
726
757
|
)
|
727
758
|
elif isinstance(dataset, pd.DataFrame):
|
728
759
|
output_df = self._sklearn_inference(
|
@@ -795,10 +826,10 @@ class SelectPercentile(BaseTransformer):
|
|
795
826
|
|
796
827
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
797
828
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
798
|
-
Returns
|
829
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
799
830
|
"""
|
800
831
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
801
|
-
return []
|
832
|
+
return [output_cols_prefix]
|
802
833
|
|
803
834
|
classes = self._sklearn_object.classes_
|
804
835
|
if isinstance(classes, numpy.ndarray):
|
@@ -1023,7 +1054,7 @@ class SelectPercentile(BaseTransformer):
|
|
1023
1054
|
cp.dump(self._sklearn_object, local_score_file)
|
1024
1055
|
|
1025
1056
|
# Create temp stage to run score.
|
1026
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1057
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1027
1058
|
session = dataset._session
|
1028
1059
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1029
1060
|
SqlResultValidator(
|
@@ -1037,8 +1068,9 @@ class SelectPercentile(BaseTransformer):
|
|
1037
1068
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1038
1069
|
).validate()
|
1039
1070
|
|
1040
|
-
|
1041
|
-
|
1071
|
+
# Use posixpath to construct stage paths
|
1072
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1073
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1042
1074
|
statement_params = telemetry.get_function_usage_statement_params(
|
1043
1075
|
project=_PROJECT,
|
1044
1076
|
subproject=_SUBPROJECT,
|
@@ -1064,6 +1096,7 @@ class SelectPercentile(BaseTransformer):
|
|
1064
1096
|
replace=True,
|
1065
1097
|
session=session,
|
1066
1098
|
statement_params=statement_params,
|
1099
|
+
anonymous=True
|
1067
1100
|
)
|
1068
1101
|
def score_wrapper_sproc(
|
1069
1102
|
session: Session,
|
@@ -1071,7 +1104,8 @@ class SelectPercentile(BaseTransformer):
|
|
1071
1104
|
stage_score_file_name: str,
|
1072
1105
|
input_cols: List[str],
|
1073
1106
|
label_cols: List[str],
|
1074
|
-
sample_weight_col: Optional[str]
|
1107
|
+
sample_weight_col: Optional[str],
|
1108
|
+
statement_params: Dict[str, str]
|
1075
1109
|
) -> float:
|
1076
1110
|
import cloudpickle as cp
|
1077
1111
|
import numpy as np
|
@@ -1121,14 +1155,14 @@ class SelectPercentile(BaseTransformer):
|
|
1121
1155
|
api_calls=[Session.call],
|
1122
1156
|
custom_tags=dict([("autogen", True)]),
|
1123
1157
|
)
|
1124
|
-
score =
|
1125
|
-
|
1158
|
+
score = score_wrapper_sproc(
|
1159
|
+
session,
|
1126
1160
|
query,
|
1127
1161
|
stage_score_file_name,
|
1128
1162
|
identifier.get_unescaped_names(self.input_cols),
|
1129
1163
|
identifier.get_unescaped_names(self.label_cols),
|
1130
1164
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1131
|
-
statement_params
|
1165
|
+
statement_params,
|
1132
1166
|
)
|
1133
1167
|
|
1134
1168
|
cleanup_temp_files([local_score_file_name])
|
@@ -1146,18 +1180,20 @@ class SelectPercentile(BaseTransformer):
|
|
1146
1180
|
if self._sklearn_object._estimator_type == 'classifier':
|
1147
1181
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1148
1182
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1149
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1183
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1184
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1150
1185
|
# For regressor, the type of predict is float64
|
1151
1186
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1152
1187
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1153
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1154
|
-
|
1188
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1189
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1155
1190
|
for prob_func in PROB_FUNCTIONS:
|
1156
1191
|
if hasattr(self, prob_func):
|
1157
1192
|
output_cols_prefix: str = f"{prob_func}_"
|
1158
1193
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1159
1194
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1160
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1195
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1196
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1161
1197
|
|
1162
1198
|
@property
|
1163
1199
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -246,7 +248,6 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
246
248
|
sample_weight_col: Optional[str] = None,
|
247
249
|
) -> None:
|
248
250
|
super().__init__()
|
249
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
250
251
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
251
252
|
deps = deps | _gather_dependencies(estimator)
|
252
253
|
self._deps = list(deps)
|
@@ -272,6 +273,15 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
272
273
|
self.set_drop_input_cols(drop_input_cols)
|
273
274
|
self.set_sample_weight_col(sample_weight_col)
|
274
275
|
|
276
|
+
def _get_rand_id(self) -> str:
|
277
|
+
"""
|
278
|
+
Generate random id to be used in sproc and stage names.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
Random id string usable in sproc, table, and stage names.
|
282
|
+
"""
|
283
|
+
return str(uuid4()).replace("-", "_").upper()
|
284
|
+
|
275
285
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
276
286
|
"""
|
277
287
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -350,7 +360,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
350
360
|
cp.dump(self._sklearn_object, local_transform_file)
|
351
361
|
|
352
362
|
# Create temp stage to run fit.
|
353
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
363
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
354
364
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
355
365
|
SqlResultValidator(
|
356
366
|
session=session,
|
@@ -363,11 +373,12 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
363
373
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
364
374
|
).validate()
|
365
375
|
|
366
|
-
|
376
|
+
# Use posixpath to construct stage paths
|
377
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
378
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
379
|
local_result_file_name = get_temp_file_path()
|
368
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
380
|
|
370
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
381
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
371
382
|
statement_params = telemetry.get_function_usage_statement_params(
|
372
383
|
project=_PROJECT,
|
373
384
|
subproject=_SUBPROJECT,
|
@@ -393,6 +404,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
393
404
|
replace=True,
|
394
405
|
session=session,
|
395
406
|
statement_params=statement_params,
|
407
|
+
anonymous=True
|
396
408
|
)
|
397
409
|
def fit_wrapper_sproc(
|
398
410
|
session: Session,
|
@@ -401,7 +413,8 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
401
413
|
stage_result_file_name: str,
|
402
414
|
input_cols: List[str],
|
403
415
|
label_cols: List[str],
|
404
|
-
sample_weight_col: Optional[str]
|
416
|
+
sample_weight_col: Optional[str],
|
417
|
+
statement_params: Dict[str, str]
|
405
418
|
) -> str:
|
406
419
|
import cloudpickle as cp
|
407
420
|
import numpy as np
|
@@ -468,15 +481,15 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
468
481
|
api_calls=[Session.call],
|
469
482
|
custom_tags=dict([("autogen", True)]),
|
470
483
|
)
|
471
|
-
sproc_export_file_name =
|
472
|
-
|
484
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
485
|
+
session,
|
473
486
|
query,
|
474
487
|
stage_transform_file_name,
|
475
488
|
stage_result_file_name,
|
476
489
|
identifier.get_unescaped_names(self.input_cols),
|
477
490
|
identifier.get_unescaped_names(self.label_cols),
|
478
491
|
identifier.get_unescaped_names(self.sample_weight_col),
|
479
|
-
statement_params
|
492
|
+
statement_params,
|
480
493
|
)
|
481
494
|
|
482
495
|
if "|" in sproc_export_file_name:
|
@@ -486,7 +499,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
486
499
|
print("\n".join(fields[1:]))
|
487
500
|
|
488
501
|
session.file.get(
|
489
|
-
|
502
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
490
503
|
local_result_file_name,
|
491
504
|
statement_params=statement_params
|
492
505
|
)
|
@@ -532,7 +545,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
532
545
|
|
533
546
|
# Register vectorized UDF for batch inference
|
534
547
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
535
|
-
safe_id=self.
|
548
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
536
549
|
|
537
550
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
538
551
|
# will try to pickle all of self which fails.
|
@@ -624,7 +637,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
624
637
|
return transformed_pandas_df.to_dict("records")
|
625
638
|
|
626
639
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
627
|
-
safe_id=self.
|
640
|
+
safe_id=self._get_rand_id()
|
628
641
|
)
|
629
642
|
|
630
643
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -680,26 +693,37 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
680
693
|
# input cols need to match unquoted / quoted
|
681
694
|
input_cols = self.input_cols
|
682
695
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
696
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
683
697
|
|
684
698
|
estimator = self._sklearn_object
|
685
699
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
700
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
701
|
+
missing_features = []
|
702
|
+
features_in_dataset = set(dataset.columns)
|
703
|
+
columns_to_select = []
|
704
|
+
for i, f in enumerate(features_required_by_estimator):
|
705
|
+
if (
|
706
|
+
i >= len(input_cols)
|
707
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
708
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
709
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
710
|
+
):
|
711
|
+
missing_features.append(f)
|
712
|
+
elif input_cols[i] in features_in_dataset:
|
713
|
+
columns_to_select.append(input_cols[i])
|
714
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
715
|
+
columns_to_select.append(unquoted_input_cols[i])
|
716
|
+
else:
|
717
|
+
columns_to_select.append(quoted_input_cols[i])
|
718
|
+
|
719
|
+
if len(missing_features) > 0:
|
720
|
+
raise ValueError(
|
721
|
+
"The feature names should match with those that were passed during fit.\n"
|
722
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
723
|
+
f"Features in the input dataframe : {input_cols}\n"
|
724
|
+
)
|
725
|
+
input_df = dataset[columns_to_select]
|
726
|
+
input_df.columns = features_required_by_estimator
|
703
727
|
|
704
728
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
705
729
|
input_df
|
@@ -778,11 +802,18 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
778
802
|
Transformed dataset.
|
779
803
|
"""
|
780
804
|
if isinstance(dataset, DataFrame):
|
805
|
+
expected_type_inferred = ""
|
806
|
+
# when it is classifier, infer the datatype from label columns
|
807
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
808
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
809
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
810
|
+
)
|
811
|
+
|
781
812
|
output_df = self._batch_inference(
|
782
813
|
dataset=dataset,
|
783
814
|
inference_method="predict",
|
784
815
|
expected_output_cols_list=self.output_cols,
|
785
|
-
expected_output_cols_type=
|
816
|
+
expected_output_cols_type=expected_type_inferred,
|
786
817
|
)
|
787
818
|
elif isinstance(dataset, pd.DataFrame):
|
788
819
|
output_df = self._sklearn_inference(
|
@@ -855,10 +886,10 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
855
886
|
|
856
887
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
857
888
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
858
|
-
Returns
|
889
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
859
890
|
"""
|
860
891
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
861
|
-
return []
|
892
|
+
return [output_cols_prefix]
|
862
893
|
|
863
894
|
classes = self._sklearn_object.classes_
|
864
895
|
if isinstance(classes, numpy.ndarray):
|
@@ -1083,7 +1114,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
1083
1114
|
cp.dump(self._sklearn_object, local_score_file)
|
1084
1115
|
|
1085
1116
|
# Create temp stage to run score.
|
1086
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1117
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1087
1118
|
session = dataset._session
|
1088
1119
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1089
1120
|
SqlResultValidator(
|
@@ -1097,8 +1128,9 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
1097
1128
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1098
1129
|
).validate()
|
1099
1130
|
|
1100
|
-
|
1101
|
-
|
1131
|
+
# Use posixpath to construct stage paths
|
1132
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1133
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1102
1134
|
statement_params = telemetry.get_function_usage_statement_params(
|
1103
1135
|
project=_PROJECT,
|
1104
1136
|
subproject=_SUBPROJECT,
|
@@ -1124,6 +1156,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
1124
1156
|
replace=True,
|
1125
1157
|
session=session,
|
1126
1158
|
statement_params=statement_params,
|
1159
|
+
anonymous=True
|
1127
1160
|
)
|
1128
1161
|
def score_wrapper_sproc(
|
1129
1162
|
session: Session,
|
@@ -1131,7 +1164,8 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
1131
1164
|
stage_score_file_name: str,
|
1132
1165
|
input_cols: List[str],
|
1133
1166
|
label_cols: List[str],
|
1134
|
-
sample_weight_col: Optional[str]
|
1167
|
+
sample_weight_col: Optional[str],
|
1168
|
+
statement_params: Dict[str, str]
|
1135
1169
|
) -> float:
|
1136
1170
|
import cloudpickle as cp
|
1137
1171
|
import numpy as np
|
@@ -1181,14 +1215,14 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
1181
1215
|
api_calls=[Session.call],
|
1182
1216
|
custom_tags=dict([("autogen", True)]),
|
1183
1217
|
)
|
1184
|
-
score =
|
1185
|
-
|
1218
|
+
score = score_wrapper_sproc(
|
1219
|
+
session,
|
1186
1220
|
query,
|
1187
1221
|
stage_score_file_name,
|
1188
1222
|
identifier.get_unescaped_names(self.input_cols),
|
1189
1223
|
identifier.get_unescaped_names(self.label_cols),
|
1190
1224
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1191
|
-
statement_params
|
1225
|
+
statement_params,
|
1192
1226
|
)
|
1193
1227
|
|
1194
1228
|
cleanup_temp_files([local_score_file_name])
|
@@ -1206,18 +1240,20 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
1206
1240
|
if self._sklearn_object._estimator_type == 'classifier':
|
1207
1241
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1208
1242
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1209
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1243
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1210
1245
|
# For regressor, the type of predict is float64
|
1211
1246
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1212
1247
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1213
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1214
|
-
|
1248
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1249
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1215
1250
|
for prob_func in PROB_FUNCTIONS:
|
1216
1251
|
if hasattr(self, prob_func):
|
1217
1252
|
output_cols_prefix: str = f"{prob_func}_"
|
1218
1253
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1219
1254
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1220
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1255
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1256
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1221
1257
|
|
1222
1258
|
@property
|
1223
1259
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|