snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -185,7 +187,6 @@ class VarianceThreshold(BaseTransformer):
|
|
185
187
|
sample_weight_col: Optional[str] = None,
|
186
188
|
) -> None:
|
187
189
|
super().__init__()
|
188
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
189
190
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
190
191
|
|
191
192
|
self._deps = list(deps)
|
@@ -205,6 +206,15 @@ class VarianceThreshold(BaseTransformer):
|
|
205
206
|
self.set_drop_input_cols(drop_input_cols)
|
206
207
|
self.set_sample_weight_col(sample_weight_col)
|
207
208
|
|
209
|
+
def _get_rand_id(self) -> str:
|
210
|
+
"""
|
211
|
+
Generate random id to be used in sproc and stage names.
|
212
|
+
|
213
|
+
Returns:
|
214
|
+
Random id string usable in sproc, table, and stage names.
|
215
|
+
"""
|
216
|
+
return str(uuid4()).replace("-", "_").upper()
|
217
|
+
|
208
218
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
209
219
|
"""
|
210
220
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -283,7 +293,7 @@ class VarianceThreshold(BaseTransformer):
|
|
283
293
|
cp.dump(self._sklearn_object, local_transform_file)
|
284
294
|
|
285
295
|
# Create temp stage to run fit.
|
286
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
296
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
287
297
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
288
298
|
SqlResultValidator(
|
289
299
|
session=session,
|
@@ -296,11 +306,12 @@ class VarianceThreshold(BaseTransformer):
|
|
296
306
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
297
307
|
).validate()
|
298
308
|
|
299
|
-
|
309
|
+
# Use posixpath to construct stage paths
|
310
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
311
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
300
312
|
local_result_file_name = get_temp_file_path()
|
301
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
302
313
|
|
303
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
314
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
304
315
|
statement_params = telemetry.get_function_usage_statement_params(
|
305
316
|
project=_PROJECT,
|
306
317
|
subproject=_SUBPROJECT,
|
@@ -326,6 +337,7 @@ class VarianceThreshold(BaseTransformer):
|
|
326
337
|
replace=True,
|
327
338
|
session=session,
|
328
339
|
statement_params=statement_params,
|
340
|
+
anonymous=True
|
329
341
|
)
|
330
342
|
def fit_wrapper_sproc(
|
331
343
|
session: Session,
|
@@ -334,7 +346,8 @@ class VarianceThreshold(BaseTransformer):
|
|
334
346
|
stage_result_file_name: str,
|
335
347
|
input_cols: List[str],
|
336
348
|
label_cols: List[str],
|
337
|
-
sample_weight_col: Optional[str]
|
349
|
+
sample_weight_col: Optional[str],
|
350
|
+
statement_params: Dict[str, str]
|
338
351
|
) -> str:
|
339
352
|
import cloudpickle as cp
|
340
353
|
import numpy as np
|
@@ -401,15 +414,15 @@ class VarianceThreshold(BaseTransformer):
|
|
401
414
|
api_calls=[Session.call],
|
402
415
|
custom_tags=dict([("autogen", True)]),
|
403
416
|
)
|
404
|
-
sproc_export_file_name =
|
405
|
-
|
417
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
418
|
+
session,
|
406
419
|
query,
|
407
420
|
stage_transform_file_name,
|
408
421
|
stage_result_file_name,
|
409
422
|
identifier.get_unescaped_names(self.input_cols),
|
410
423
|
identifier.get_unescaped_names(self.label_cols),
|
411
424
|
identifier.get_unescaped_names(self.sample_weight_col),
|
412
|
-
statement_params
|
425
|
+
statement_params,
|
413
426
|
)
|
414
427
|
|
415
428
|
if "|" in sproc_export_file_name:
|
@@ -419,7 +432,7 @@ class VarianceThreshold(BaseTransformer):
|
|
419
432
|
print("\n".join(fields[1:]))
|
420
433
|
|
421
434
|
session.file.get(
|
422
|
-
|
435
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
423
436
|
local_result_file_name,
|
424
437
|
statement_params=statement_params
|
425
438
|
)
|
@@ -465,7 +478,7 @@ class VarianceThreshold(BaseTransformer):
|
|
465
478
|
|
466
479
|
# Register vectorized UDF for batch inference
|
467
480
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
468
|
-
safe_id=self.
|
481
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
469
482
|
|
470
483
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
471
484
|
# will try to pickle all of self which fails.
|
@@ -557,7 +570,7 @@ class VarianceThreshold(BaseTransformer):
|
|
557
570
|
return transformed_pandas_df.to_dict("records")
|
558
571
|
|
559
572
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
560
|
-
safe_id=self.
|
573
|
+
safe_id=self._get_rand_id()
|
561
574
|
)
|
562
575
|
|
563
576
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -613,26 +626,37 @@ class VarianceThreshold(BaseTransformer):
|
|
613
626
|
# input cols need to match unquoted / quoted
|
614
627
|
input_cols = self.input_cols
|
615
628
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
629
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
616
630
|
|
617
631
|
estimator = self._sklearn_object
|
618
632
|
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
633
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
634
|
+
missing_features = []
|
635
|
+
features_in_dataset = set(dataset.columns)
|
636
|
+
columns_to_select = []
|
637
|
+
for i, f in enumerate(features_required_by_estimator):
|
638
|
+
if (
|
639
|
+
i >= len(input_cols)
|
640
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
641
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
642
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
643
|
+
):
|
644
|
+
missing_features.append(f)
|
645
|
+
elif input_cols[i] in features_in_dataset:
|
646
|
+
columns_to_select.append(input_cols[i])
|
647
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
648
|
+
columns_to_select.append(unquoted_input_cols[i])
|
649
|
+
else:
|
650
|
+
columns_to_select.append(quoted_input_cols[i])
|
651
|
+
|
652
|
+
if len(missing_features) > 0:
|
653
|
+
raise ValueError(
|
654
|
+
"The feature names should match with those that were passed during fit.\n"
|
655
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
656
|
+
f"Features in the input dataframe : {input_cols}\n"
|
657
|
+
)
|
658
|
+
input_df = dataset[columns_to_select]
|
659
|
+
input_df.columns = features_required_by_estimator
|
636
660
|
|
637
661
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
638
662
|
input_df
|
@@ -711,11 +735,18 @@ class VarianceThreshold(BaseTransformer):
|
|
711
735
|
Transformed dataset.
|
712
736
|
"""
|
713
737
|
if isinstance(dataset, DataFrame):
|
738
|
+
expected_type_inferred = ""
|
739
|
+
# when it is classifier, infer the datatype from label columns
|
740
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
741
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
742
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
743
|
+
)
|
744
|
+
|
714
745
|
output_df = self._batch_inference(
|
715
746
|
dataset=dataset,
|
716
747
|
inference_method="predict",
|
717
748
|
expected_output_cols_list=self.output_cols,
|
718
|
-
expected_output_cols_type=
|
749
|
+
expected_output_cols_type=expected_type_inferred,
|
719
750
|
)
|
720
751
|
elif isinstance(dataset, pd.DataFrame):
|
721
752
|
output_df = self._sklearn_inference(
|
@@ -788,10 +819,10 @@ class VarianceThreshold(BaseTransformer):
|
|
788
819
|
|
789
820
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
790
821
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
791
|
-
Returns
|
822
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
792
823
|
"""
|
793
824
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
794
|
-
return []
|
825
|
+
return [output_cols_prefix]
|
795
826
|
|
796
827
|
classes = self._sklearn_object.classes_
|
797
828
|
if isinstance(classes, numpy.ndarray):
|
@@ -1016,7 +1047,7 @@ class VarianceThreshold(BaseTransformer):
|
|
1016
1047
|
cp.dump(self._sklearn_object, local_score_file)
|
1017
1048
|
|
1018
1049
|
# Create temp stage to run score.
|
1019
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1050
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1020
1051
|
session = dataset._session
|
1021
1052
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1022
1053
|
SqlResultValidator(
|
@@ -1030,8 +1061,9 @@ class VarianceThreshold(BaseTransformer):
|
|
1030
1061
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1031
1062
|
).validate()
|
1032
1063
|
|
1033
|
-
|
1034
|
-
|
1064
|
+
# Use posixpath to construct stage paths
|
1065
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1066
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1035
1067
|
statement_params = telemetry.get_function_usage_statement_params(
|
1036
1068
|
project=_PROJECT,
|
1037
1069
|
subproject=_SUBPROJECT,
|
@@ -1057,6 +1089,7 @@ class VarianceThreshold(BaseTransformer):
|
|
1057
1089
|
replace=True,
|
1058
1090
|
session=session,
|
1059
1091
|
statement_params=statement_params,
|
1092
|
+
anonymous=True
|
1060
1093
|
)
|
1061
1094
|
def score_wrapper_sproc(
|
1062
1095
|
session: Session,
|
@@ -1064,7 +1097,8 @@ class VarianceThreshold(BaseTransformer):
|
|
1064
1097
|
stage_score_file_name: str,
|
1065
1098
|
input_cols: List[str],
|
1066
1099
|
label_cols: List[str],
|
1067
|
-
sample_weight_col: Optional[str]
|
1100
|
+
sample_weight_col: Optional[str],
|
1101
|
+
statement_params: Dict[str, str]
|
1068
1102
|
) -> float:
|
1069
1103
|
import cloudpickle as cp
|
1070
1104
|
import numpy as np
|
@@ -1114,14 +1148,14 @@ class VarianceThreshold(BaseTransformer):
|
|
1114
1148
|
api_calls=[Session.call],
|
1115
1149
|
custom_tags=dict([("autogen", True)]),
|
1116
1150
|
)
|
1117
|
-
score =
|
1118
|
-
|
1151
|
+
score = score_wrapper_sproc(
|
1152
|
+
session,
|
1119
1153
|
query,
|
1120
1154
|
stage_score_file_name,
|
1121
1155
|
identifier.get_unescaped_names(self.input_cols),
|
1122
1156
|
identifier.get_unescaped_names(self.label_cols),
|
1123
1157
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1124
|
-
statement_params
|
1158
|
+
statement_params,
|
1125
1159
|
)
|
1126
1160
|
|
1127
1161
|
cleanup_temp_files([local_score_file_name])
|
@@ -1139,18 +1173,20 @@ class VarianceThreshold(BaseTransformer):
|
|
1139
1173
|
if self._sklearn_object._estimator_type == 'classifier':
|
1140
1174
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1141
1175
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1142
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1176
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1177
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1143
1178
|
# For regressor, the type of predict is float64
|
1144
1179
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1145
1180
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1146
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1147
|
-
|
1181
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1182
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1148
1183
|
for prob_func in PROB_FUNCTIONS:
|
1149
1184
|
if hasattr(self, prob_func):
|
1150
1185
|
output_cols_prefix: str = f"{prob_func}_"
|
1151
1186
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1152
1187
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1153
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1188
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1189
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1154
1190
|
|
1155
1191
|
@property
|
1156
1192
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -270,7 +272,6 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
270
272
|
sample_weight_col: Optional[str] = None,
|
271
273
|
) -> None:
|
272
274
|
super().__init__()
|
273
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
274
275
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
275
276
|
|
276
277
|
self._deps = list(deps)
|
@@ -298,6 +299,15 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
298
299
|
self.set_drop_input_cols(drop_input_cols)
|
299
300
|
self.set_sample_weight_col(sample_weight_col)
|
300
301
|
|
302
|
+
def _get_rand_id(self) -> str:
|
303
|
+
"""
|
304
|
+
Generate random id to be used in sproc and stage names.
|
305
|
+
|
306
|
+
Returns:
|
307
|
+
Random id string usable in sproc, table, and stage names.
|
308
|
+
"""
|
309
|
+
return str(uuid4()).replace("-", "_").upper()
|
310
|
+
|
301
311
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
302
312
|
"""
|
303
313
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -376,7 +386,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
376
386
|
cp.dump(self._sklearn_object, local_transform_file)
|
377
387
|
|
378
388
|
# Create temp stage to run fit.
|
379
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
389
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
380
390
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
381
391
|
SqlResultValidator(
|
382
392
|
session=session,
|
@@ -389,11 +399,12 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
389
399
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
390
400
|
).validate()
|
391
401
|
|
392
|
-
|
402
|
+
# Use posixpath to construct stage paths
|
403
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
404
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
393
405
|
local_result_file_name = get_temp_file_path()
|
394
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
406
|
|
396
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
407
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
397
408
|
statement_params = telemetry.get_function_usage_statement_params(
|
398
409
|
project=_PROJECT,
|
399
410
|
subproject=_SUBPROJECT,
|
@@ -419,6 +430,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
419
430
|
replace=True,
|
420
431
|
session=session,
|
421
432
|
statement_params=statement_params,
|
433
|
+
anonymous=True
|
422
434
|
)
|
423
435
|
def fit_wrapper_sproc(
|
424
436
|
session: Session,
|
@@ -427,7 +439,8 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
427
439
|
stage_result_file_name: str,
|
428
440
|
input_cols: List[str],
|
429
441
|
label_cols: List[str],
|
430
|
-
sample_weight_col: Optional[str]
|
442
|
+
sample_weight_col: Optional[str],
|
443
|
+
statement_params: Dict[str, str]
|
431
444
|
) -> str:
|
432
445
|
import cloudpickle as cp
|
433
446
|
import numpy as np
|
@@ -494,15 +507,15 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
494
507
|
api_calls=[Session.call],
|
495
508
|
custom_tags=dict([("autogen", True)]),
|
496
509
|
)
|
497
|
-
sproc_export_file_name =
|
498
|
-
|
510
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
511
|
+
session,
|
499
512
|
query,
|
500
513
|
stage_transform_file_name,
|
501
514
|
stage_result_file_name,
|
502
515
|
identifier.get_unescaped_names(self.input_cols),
|
503
516
|
identifier.get_unescaped_names(self.label_cols),
|
504
517
|
identifier.get_unescaped_names(self.sample_weight_col),
|
505
|
-
statement_params
|
518
|
+
statement_params,
|
506
519
|
)
|
507
520
|
|
508
521
|
if "|" in sproc_export_file_name:
|
@@ -512,7 +525,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
512
525
|
print("\n".join(fields[1:]))
|
513
526
|
|
514
527
|
session.file.get(
|
515
|
-
|
528
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
516
529
|
local_result_file_name,
|
517
530
|
statement_params=statement_params
|
518
531
|
)
|
@@ -558,7 +571,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
558
571
|
|
559
572
|
# Register vectorized UDF for batch inference
|
560
573
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
561
|
-
safe_id=self.
|
574
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
562
575
|
|
563
576
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
564
577
|
# will try to pickle all of self which fails.
|
@@ -650,7 +663,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
650
663
|
return transformed_pandas_df.to_dict("records")
|
651
664
|
|
652
665
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
653
|
-
safe_id=self.
|
666
|
+
safe_id=self._get_rand_id()
|
654
667
|
)
|
655
668
|
|
656
669
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -706,26 +719,37 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
706
719
|
# input cols need to match unquoted / quoted
|
707
720
|
input_cols = self.input_cols
|
708
721
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
722
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
709
723
|
|
710
724
|
estimator = self._sklearn_object
|
711
725
|
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
726
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
727
|
+
missing_features = []
|
728
|
+
features_in_dataset = set(dataset.columns)
|
729
|
+
columns_to_select = []
|
730
|
+
for i, f in enumerate(features_required_by_estimator):
|
731
|
+
if (
|
732
|
+
i >= len(input_cols)
|
733
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
734
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
735
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
736
|
+
):
|
737
|
+
missing_features.append(f)
|
738
|
+
elif input_cols[i] in features_in_dataset:
|
739
|
+
columns_to_select.append(input_cols[i])
|
740
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
741
|
+
columns_to_select.append(unquoted_input_cols[i])
|
742
|
+
else:
|
743
|
+
columns_to_select.append(quoted_input_cols[i])
|
744
|
+
|
745
|
+
if len(missing_features) > 0:
|
746
|
+
raise ValueError(
|
747
|
+
"The feature names should match with those that were passed during fit.\n"
|
748
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
749
|
+
f"Features in the input dataframe : {input_cols}\n"
|
750
|
+
)
|
751
|
+
input_df = dataset[columns_to_select]
|
752
|
+
input_df.columns = features_required_by_estimator
|
729
753
|
|
730
754
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
731
755
|
input_df
|
@@ -806,11 +830,18 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
806
830
|
Transformed dataset.
|
807
831
|
"""
|
808
832
|
if isinstance(dataset, DataFrame):
|
833
|
+
expected_type_inferred = ""
|
834
|
+
# when it is classifier, infer the datatype from label columns
|
835
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
836
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
837
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
838
|
+
)
|
839
|
+
|
809
840
|
output_df = self._batch_inference(
|
810
841
|
dataset=dataset,
|
811
842
|
inference_method="predict",
|
812
843
|
expected_output_cols_list=self.output_cols,
|
813
|
-
expected_output_cols_type=
|
844
|
+
expected_output_cols_type=expected_type_inferred,
|
814
845
|
)
|
815
846
|
elif isinstance(dataset, pd.DataFrame):
|
816
847
|
output_df = self._sklearn_inference(
|
@@ -881,10 +912,10 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
881
912
|
|
882
913
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
883
914
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
884
|
-
Returns
|
915
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
885
916
|
"""
|
886
917
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
887
|
-
return []
|
918
|
+
return [output_cols_prefix]
|
888
919
|
|
889
920
|
classes = self._sklearn_object.classes_
|
890
921
|
if isinstance(classes, numpy.ndarray):
|
@@ -1113,7 +1144,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1113
1144
|
cp.dump(self._sklearn_object, local_score_file)
|
1114
1145
|
|
1115
1146
|
# Create temp stage to run score.
|
1116
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1147
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1117
1148
|
session = dataset._session
|
1118
1149
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1119
1150
|
SqlResultValidator(
|
@@ -1127,8 +1158,9 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1127
1158
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1128
1159
|
).validate()
|
1129
1160
|
|
1130
|
-
|
1131
|
-
|
1161
|
+
# Use posixpath to construct stage paths
|
1162
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1163
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1132
1164
|
statement_params = telemetry.get_function_usage_statement_params(
|
1133
1165
|
project=_PROJECT,
|
1134
1166
|
subproject=_SUBPROJECT,
|
@@ -1154,6 +1186,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1154
1186
|
replace=True,
|
1155
1187
|
session=session,
|
1156
1188
|
statement_params=statement_params,
|
1189
|
+
anonymous=True
|
1157
1190
|
)
|
1158
1191
|
def score_wrapper_sproc(
|
1159
1192
|
session: Session,
|
@@ -1161,7 +1194,8 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1161
1194
|
stage_score_file_name: str,
|
1162
1195
|
input_cols: List[str],
|
1163
1196
|
label_cols: List[str],
|
1164
|
-
sample_weight_col: Optional[str]
|
1197
|
+
sample_weight_col: Optional[str],
|
1198
|
+
statement_params: Dict[str, str]
|
1165
1199
|
) -> float:
|
1166
1200
|
import cloudpickle as cp
|
1167
1201
|
import numpy as np
|
@@ -1211,14 +1245,14 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1211
1245
|
api_calls=[Session.call],
|
1212
1246
|
custom_tags=dict([("autogen", True)]),
|
1213
1247
|
)
|
1214
|
-
score =
|
1215
|
-
|
1248
|
+
score = score_wrapper_sproc(
|
1249
|
+
session,
|
1216
1250
|
query,
|
1217
1251
|
stage_score_file_name,
|
1218
1252
|
identifier.get_unescaped_names(self.input_cols),
|
1219
1253
|
identifier.get_unescaped_names(self.label_cols),
|
1220
1254
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1221
|
-
statement_params
|
1255
|
+
statement_params,
|
1222
1256
|
)
|
1223
1257
|
|
1224
1258
|
cleanup_temp_files([local_score_file_name])
|
@@ -1236,18 +1270,20 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1236
1270
|
if self._sklearn_object._estimator_type == 'classifier':
|
1237
1271
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1238
1272
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1239
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1273
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1274
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1240
1275
|
# For regressor, the type of predict is float64
|
1241
1276
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1242
1277
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1243
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1244
|
-
|
1278
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1279
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1245
1280
|
for prob_func in PROB_FUNCTIONS:
|
1246
1281
|
if hasattr(self, prob_func):
|
1247
1282
|
output_cols_prefix: str = f"{prob_func}_"
|
1248
1283
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1249
1284
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1250
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1285
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1286
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1251
1287
|
|
1252
1288
|
@property
|
1253
1289
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|