snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -206,7 +208,6 @@ class OutputCodeClassifier(BaseTransformer):
|
|
206
208
|
sample_weight_col: Optional[str] = None,
|
207
209
|
) -> None:
|
208
210
|
super().__init__()
|
209
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
210
211
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
211
212
|
deps = deps | _gather_dependencies(estimator)
|
212
213
|
self._deps = list(deps)
|
@@ -229,6 +230,15 @@ class OutputCodeClassifier(BaseTransformer):
|
|
229
230
|
self.set_drop_input_cols(drop_input_cols)
|
230
231
|
self.set_sample_weight_col(sample_weight_col)
|
231
232
|
|
233
|
+
def _get_rand_id(self) -> str:
|
234
|
+
"""
|
235
|
+
Generate random id to be used in sproc and stage names.
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
Random id string usable in sproc, table, and stage names.
|
239
|
+
"""
|
240
|
+
return str(uuid4()).replace("-", "_").upper()
|
241
|
+
|
232
242
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
233
243
|
"""
|
234
244
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -307,7 +317,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
307
317
|
cp.dump(self._sklearn_object, local_transform_file)
|
308
318
|
|
309
319
|
# Create temp stage to run fit.
|
310
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
320
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
311
321
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
312
322
|
SqlResultValidator(
|
313
323
|
session=session,
|
@@ -320,11 +330,12 @@ class OutputCodeClassifier(BaseTransformer):
|
|
320
330
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
321
331
|
).validate()
|
322
332
|
|
323
|
-
|
333
|
+
# Use posixpath to construct stage paths
|
334
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
335
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
324
336
|
local_result_file_name = get_temp_file_path()
|
325
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
326
337
|
|
327
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
338
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
328
339
|
statement_params = telemetry.get_function_usage_statement_params(
|
329
340
|
project=_PROJECT,
|
330
341
|
subproject=_SUBPROJECT,
|
@@ -350,6 +361,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
350
361
|
replace=True,
|
351
362
|
session=session,
|
352
363
|
statement_params=statement_params,
|
364
|
+
anonymous=True
|
353
365
|
)
|
354
366
|
def fit_wrapper_sproc(
|
355
367
|
session: Session,
|
@@ -358,7 +370,8 @@ class OutputCodeClassifier(BaseTransformer):
|
|
358
370
|
stage_result_file_name: str,
|
359
371
|
input_cols: List[str],
|
360
372
|
label_cols: List[str],
|
361
|
-
sample_weight_col: Optional[str]
|
373
|
+
sample_weight_col: Optional[str],
|
374
|
+
statement_params: Dict[str, str]
|
362
375
|
) -> str:
|
363
376
|
import cloudpickle as cp
|
364
377
|
import numpy as np
|
@@ -425,15 +438,15 @@ class OutputCodeClassifier(BaseTransformer):
|
|
425
438
|
api_calls=[Session.call],
|
426
439
|
custom_tags=dict([("autogen", True)]),
|
427
440
|
)
|
428
|
-
sproc_export_file_name =
|
429
|
-
|
441
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
442
|
+
session,
|
430
443
|
query,
|
431
444
|
stage_transform_file_name,
|
432
445
|
stage_result_file_name,
|
433
446
|
identifier.get_unescaped_names(self.input_cols),
|
434
447
|
identifier.get_unescaped_names(self.label_cols),
|
435
448
|
identifier.get_unescaped_names(self.sample_weight_col),
|
436
|
-
statement_params
|
449
|
+
statement_params,
|
437
450
|
)
|
438
451
|
|
439
452
|
if "|" in sproc_export_file_name:
|
@@ -443,7 +456,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
443
456
|
print("\n".join(fields[1:]))
|
444
457
|
|
445
458
|
session.file.get(
|
446
|
-
|
459
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
447
460
|
local_result_file_name,
|
448
461
|
statement_params=statement_params
|
449
462
|
)
|
@@ -489,7 +502,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
489
502
|
|
490
503
|
# Register vectorized UDF for batch inference
|
491
504
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
492
|
-
safe_id=self.
|
505
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
493
506
|
|
494
507
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
495
508
|
# will try to pickle all of self which fails.
|
@@ -581,7 +594,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
581
594
|
return transformed_pandas_df.to_dict("records")
|
582
595
|
|
583
596
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
584
|
-
safe_id=self.
|
597
|
+
safe_id=self._get_rand_id()
|
585
598
|
)
|
586
599
|
|
587
600
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -637,26 +650,37 @@ class OutputCodeClassifier(BaseTransformer):
|
|
637
650
|
# input cols need to match unquoted / quoted
|
638
651
|
input_cols = self.input_cols
|
639
652
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
653
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
640
654
|
|
641
655
|
estimator = self._sklearn_object
|
642
656
|
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
657
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
658
|
+
missing_features = []
|
659
|
+
features_in_dataset = set(dataset.columns)
|
660
|
+
columns_to_select = []
|
661
|
+
for i, f in enumerate(features_required_by_estimator):
|
662
|
+
if (
|
663
|
+
i >= len(input_cols)
|
664
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
665
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
666
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
667
|
+
):
|
668
|
+
missing_features.append(f)
|
669
|
+
elif input_cols[i] in features_in_dataset:
|
670
|
+
columns_to_select.append(input_cols[i])
|
671
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
672
|
+
columns_to_select.append(unquoted_input_cols[i])
|
673
|
+
else:
|
674
|
+
columns_to_select.append(quoted_input_cols[i])
|
675
|
+
|
676
|
+
if len(missing_features) > 0:
|
677
|
+
raise ValueError(
|
678
|
+
"The feature names should match with those that were passed during fit.\n"
|
679
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
680
|
+
f"Features in the input dataframe : {input_cols}\n"
|
681
|
+
)
|
682
|
+
input_df = dataset[columns_to_select]
|
683
|
+
input_df.columns = features_required_by_estimator
|
660
684
|
|
661
685
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
662
686
|
input_df
|
@@ -737,11 +761,18 @@ class OutputCodeClassifier(BaseTransformer):
|
|
737
761
|
Transformed dataset.
|
738
762
|
"""
|
739
763
|
if isinstance(dataset, DataFrame):
|
764
|
+
expected_type_inferred = ""
|
765
|
+
# when it is classifier, infer the datatype from label columns
|
766
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
767
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
768
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
769
|
+
)
|
770
|
+
|
740
771
|
output_df = self._batch_inference(
|
741
772
|
dataset=dataset,
|
742
773
|
inference_method="predict",
|
743
774
|
expected_output_cols_list=self.output_cols,
|
744
|
-
expected_output_cols_type=
|
775
|
+
expected_output_cols_type=expected_type_inferred,
|
745
776
|
)
|
746
777
|
elif isinstance(dataset, pd.DataFrame):
|
747
778
|
output_df = self._sklearn_inference(
|
@@ -812,10 +843,10 @@ class OutputCodeClassifier(BaseTransformer):
|
|
812
843
|
|
813
844
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
814
845
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
815
|
-
Returns
|
846
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
816
847
|
"""
|
817
848
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
818
|
-
return []
|
849
|
+
return [output_cols_prefix]
|
819
850
|
|
820
851
|
classes = self._sklearn_object.classes_
|
821
852
|
if isinstance(classes, numpy.ndarray):
|
@@ -1040,7 +1071,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
1040
1071
|
cp.dump(self._sklearn_object, local_score_file)
|
1041
1072
|
|
1042
1073
|
# Create temp stage to run score.
|
1043
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1074
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1044
1075
|
session = dataset._session
|
1045
1076
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1046
1077
|
SqlResultValidator(
|
@@ -1054,8 +1085,9 @@ class OutputCodeClassifier(BaseTransformer):
|
|
1054
1085
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1055
1086
|
).validate()
|
1056
1087
|
|
1057
|
-
|
1058
|
-
|
1088
|
+
# Use posixpath to construct stage paths
|
1089
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1090
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1059
1091
|
statement_params = telemetry.get_function_usage_statement_params(
|
1060
1092
|
project=_PROJECT,
|
1061
1093
|
subproject=_SUBPROJECT,
|
@@ -1081,6 +1113,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
1081
1113
|
replace=True,
|
1082
1114
|
session=session,
|
1083
1115
|
statement_params=statement_params,
|
1116
|
+
anonymous=True
|
1084
1117
|
)
|
1085
1118
|
def score_wrapper_sproc(
|
1086
1119
|
session: Session,
|
@@ -1088,7 +1121,8 @@ class OutputCodeClassifier(BaseTransformer):
|
|
1088
1121
|
stage_score_file_name: str,
|
1089
1122
|
input_cols: List[str],
|
1090
1123
|
label_cols: List[str],
|
1091
|
-
sample_weight_col: Optional[str]
|
1124
|
+
sample_weight_col: Optional[str],
|
1125
|
+
statement_params: Dict[str, str]
|
1092
1126
|
) -> float:
|
1093
1127
|
import cloudpickle as cp
|
1094
1128
|
import numpy as np
|
@@ -1138,14 +1172,14 @@ class OutputCodeClassifier(BaseTransformer):
|
|
1138
1172
|
api_calls=[Session.call],
|
1139
1173
|
custom_tags=dict([("autogen", True)]),
|
1140
1174
|
)
|
1141
|
-
score =
|
1142
|
-
|
1175
|
+
score = score_wrapper_sproc(
|
1176
|
+
session,
|
1143
1177
|
query,
|
1144
1178
|
stage_score_file_name,
|
1145
1179
|
identifier.get_unescaped_names(self.input_cols),
|
1146
1180
|
identifier.get_unescaped_names(self.label_cols),
|
1147
1181
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1148
|
-
statement_params
|
1182
|
+
statement_params,
|
1149
1183
|
)
|
1150
1184
|
|
1151
1185
|
cleanup_temp_files([local_score_file_name])
|
@@ -1163,18 +1197,20 @@ class OutputCodeClassifier(BaseTransformer):
|
|
1163
1197
|
if self._sklearn_object._estimator_type == 'classifier':
|
1164
1198
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1165
1199
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1166
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1200
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1201
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1167
1202
|
# For regressor, the type of predict is float64
|
1168
1203
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1169
1204
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1170
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1171
|
-
|
1205
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1206
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1172
1207
|
for prob_func in PROB_FUNCTIONS:
|
1173
1208
|
if hasattr(self, prob_func):
|
1174
1209
|
output_cols_prefix: str = f"{prob_func}_"
|
1175
1210
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1176
1211
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1177
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1212
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1213
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1178
1214
|
|
1179
1215
|
@property
|
1180
1216
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -205,7 +207,6 @@ class BernoulliNB(BaseTransformer):
|
|
205
207
|
sample_weight_col: Optional[str] = None,
|
206
208
|
) -> None:
|
207
209
|
super().__init__()
|
208
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
209
210
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
210
211
|
|
211
212
|
self._deps = list(deps)
|
@@ -229,6 +230,15 @@ class BernoulliNB(BaseTransformer):
|
|
229
230
|
self.set_drop_input_cols(drop_input_cols)
|
230
231
|
self.set_sample_weight_col(sample_weight_col)
|
231
232
|
|
233
|
+
def _get_rand_id(self) -> str:
|
234
|
+
"""
|
235
|
+
Generate random id to be used in sproc and stage names.
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
Random id string usable in sproc, table, and stage names.
|
239
|
+
"""
|
240
|
+
return str(uuid4()).replace("-", "_").upper()
|
241
|
+
|
232
242
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
233
243
|
"""
|
234
244
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -307,7 +317,7 @@ class BernoulliNB(BaseTransformer):
|
|
307
317
|
cp.dump(self._sklearn_object, local_transform_file)
|
308
318
|
|
309
319
|
# Create temp stage to run fit.
|
310
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
320
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
311
321
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
312
322
|
SqlResultValidator(
|
313
323
|
session=session,
|
@@ -320,11 +330,12 @@ class BernoulliNB(BaseTransformer):
|
|
320
330
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
321
331
|
).validate()
|
322
332
|
|
323
|
-
|
333
|
+
# Use posixpath to construct stage paths
|
334
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
335
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
324
336
|
local_result_file_name = get_temp_file_path()
|
325
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
326
337
|
|
327
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
338
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
328
339
|
statement_params = telemetry.get_function_usage_statement_params(
|
329
340
|
project=_PROJECT,
|
330
341
|
subproject=_SUBPROJECT,
|
@@ -350,6 +361,7 @@ class BernoulliNB(BaseTransformer):
|
|
350
361
|
replace=True,
|
351
362
|
session=session,
|
352
363
|
statement_params=statement_params,
|
364
|
+
anonymous=True
|
353
365
|
)
|
354
366
|
def fit_wrapper_sproc(
|
355
367
|
session: Session,
|
@@ -358,7 +370,8 @@ class BernoulliNB(BaseTransformer):
|
|
358
370
|
stage_result_file_name: str,
|
359
371
|
input_cols: List[str],
|
360
372
|
label_cols: List[str],
|
361
|
-
sample_weight_col: Optional[str]
|
373
|
+
sample_weight_col: Optional[str],
|
374
|
+
statement_params: Dict[str, str]
|
362
375
|
) -> str:
|
363
376
|
import cloudpickle as cp
|
364
377
|
import numpy as np
|
@@ -425,15 +438,15 @@ class BernoulliNB(BaseTransformer):
|
|
425
438
|
api_calls=[Session.call],
|
426
439
|
custom_tags=dict([("autogen", True)]),
|
427
440
|
)
|
428
|
-
sproc_export_file_name =
|
429
|
-
|
441
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
442
|
+
session,
|
430
443
|
query,
|
431
444
|
stage_transform_file_name,
|
432
445
|
stage_result_file_name,
|
433
446
|
identifier.get_unescaped_names(self.input_cols),
|
434
447
|
identifier.get_unescaped_names(self.label_cols),
|
435
448
|
identifier.get_unescaped_names(self.sample_weight_col),
|
436
|
-
statement_params
|
449
|
+
statement_params,
|
437
450
|
)
|
438
451
|
|
439
452
|
if "|" in sproc_export_file_name:
|
@@ -443,7 +456,7 @@ class BernoulliNB(BaseTransformer):
|
|
443
456
|
print("\n".join(fields[1:]))
|
444
457
|
|
445
458
|
session.file.get(
|
446
|
-
|
459
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
447
460
|
local_result_file_name,
|
448
461
|
statement_params=statement_params
|
449
462
|
)
|
@@ -489,7 +502,7 @@ class BernoulliNB(BaseTransformer):
|
|
489
502
|
|
490
503
|
# Register vectorized UDF for batch inference
|
491
504
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
492
|
-
safe_id=self.
|
505
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
493
506
|
|
494
507
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
495
508
|
# will try to pickle all of self which fails.
|
@@ -581,7 +594,7 @@ class BernoulliNB(BaseTransformer):
|
|
581
594
|
return transformed_pandas_df.to_dict("records")
|
582
595
|
|
583
596
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
584
|
-
safe_id=self.
|
597
|
+
safe_id=self._get_rand_id()
|
585
598
|
)
|
586
599
|
|
587
600
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -637,26 +650,37 @@ class BernoulliNB(BaseTransformer):
|
|
637
650
|
# input cols need to match unquoted / quoted
|
638
651
|
input_cols = self.input_cols
|
639
652
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
653
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
640
654
|
|
641
655
|
estimator = self._sklearn_object
|
642
656
|
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
657
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
658
|
+
missing_features = []
|
659
|
+
features_in_dataset = set(dataset.columns)
|
660
|
+
columns_to_select = []
|
661
|
+
for i, f in enumerate(features_required_by_estimator):
|
662
|
+
if (
|
663
|
+
i >= len(input_cols)
|
664
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
665
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
666
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
667
|
+
):
|
668
|
+
missing_features.append(f)
|
669
|
+
elif input_cols[i] in features_in_dataset:
|
670
|
+
columns_to_select.append(input_cols[i])
|
671
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
672
|
+
columns_to_select.append(unquoted_input_cols[i])
|
673
|
+
else:
|
674
|
+
columns_to_select.append(quoted_input_cols[i])
|
675
|
+
|
676
|
+
if len(missing_features) > 0:
|
677
|
+
raise ValueError(
|
678
|
+
"The feature names should match with those that were passed during fit.\n"
|
679
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
680
|
+
f"Features in the input dataframe : {input_cols}\n"
|
681
|
+
)
|
682
|
+
input_df = dataset[columns_to_select]
|
683
|
+
input_df.columns = features_required_by_estimator
|
660
684
|
|
661
685
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
662
686
|
input_df
|
@@ -737,11 +761,18 @@ class BernoulliNB(BaseTransformer):
|
|
737
761
|
Transformed dataset.
|
738
762
|
"""
|
739
763
|
if isinstance(dataset, DataFrame):
|
764
|
+
expected_type_inferred = ""
|
765
|
+
# when it is classifier, infer the datatype from label columns
|
766
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
767
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
768
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
769
|
+
)
|
770
|
+
|
740
771
|
output_df = self._batch_inference(
|
741
772
|
dataset=dataset,
|
742
773
|
inference_method="predict",
|
743
774
|
expected_output_cols_list=self.output_cols,
|
744
|
-
expected_output_cols_type=
|
775
|
+
expected_output_cols_type=expected_type_inferred,
|
745
776
|
)
|
746
777
|
elif isinstance(dataset, pd.DataFrame):
|
747
778
|
output_df = self._sklearn_inference(
|
@@ -812,10 +843,10 @@ class BernoulliNB(BaseTransformer):
|
|
812
843
|
|
813
844
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
814
845
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
815
|
-
Returns
|
846
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
816
847
|
"""
|
817
848
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
818
|
-
return []
|
849
|
+
return [output_cols_prefix]
|
819
850
|
|
820
851
|
classes = self._sklearn_object.classes_
|
821
852
|
if isinstance(classes, numpy.ndarray):
|
@@ -1044,7 +1075,7 @@ class BernoulliNB(BaseTransformer):
|
|
1044
1075
|
cp.dump(self._sklearn_object, local_score_file)
|
1045
1076
|
|
1046
1077
|
# Create temp stage to run score.
|
1047
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1078
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1048
1079
|
session = dataset._session
|
1049
1080
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1050
1081
|
SqlResultValidator(
|
@@ -1058,8 +1089,9 @@ class BernoulliNB(BaseTransformer):
|
|
1058
1089
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1059
1090
|
).validate()
|
1060
1091
|
|
1061
|
-
|
1062
|
-
|
1092
|
+
# Use posixpath to construct stage paths
|
1093
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1094
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1063
1095
|
statement_params = telemetry.get_function_usage_statement_params(
|
1064
1096
|
project=_PROJECT,
|
1065
1097
|
subproject=_SUBPROJECT,
|
@@ -1085,6 +1117,7 @@ class BernoulliNB(BaseTransformer):
|
|
1085
1117
|
replace=True,
|
1086
1118
|
session=session,
|
1087
1119
|
statement_params=statement_params,
|
1120
|
+
anonymous=True
|
1088
1121
|
)
|
1089
1122
|
def score_wrapper_sproc(
|
1090
1123
|
session: Session,
|
@@ -1092,7 +1125,8 @@ class BernoulliNB(BaseTransformer):
|
|
1092
1125
|
stage_score_file_name: str,
|
1093
1126
|
input_cols: List[str],
|
1094
1127
|
label_cols: List[str],
|
1095
|
-
sample_weight_col: Optional[str]
|
1128
|
+
sample_weight_col: Optional[str],
|
1129
|
+
statement_params: Dict[str, str]
|
1096
1130
|
) -> float:
|
1097
1131
|
import cloudpickle as cp
|
1098
1132
|
import numpy as np
|
@@ -1142,14 +1176,14 @@ class BernoulliNB(BaseTransformer):
|
|
1142
1176
|
api_calls=[Session.call],
|
1143
1177
|
custom_tags=dict([("autogen", True)]),
|
1144
1178
|
)
|
1145
|
-
score =
|
1146
|
-
|
1179
|
+
score = score_wrapper_sproc(
|
1180
|
+
session,
|
1147
1181
|
query,
|
1148
1182
|
stage_score_file_name,
|
1149
1183
|
identifier.get_unescaped_names(self.input_cols),
|
1150
1184
|
identifier.get_unescaped_names(self.label_cols),
|
1151
1185
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1152
|
-
statement_params
|
1186
|
+
statement_params,
|
1153
1187
|
)
|
1154
1188
|
|
1155
1189
|
cleanup_temp_files([local_score_file_name])
|
@@ -1167,18 +1201,20 @@ class BernoulliNB(BaseTransformer):
|
|
1167
1201
|
if self._sklearn_object._estimator_type == 'classifier':
|
1168
1202
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1169
1203
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1170
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1205
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1171
1206
|
# For regressor, the type of predict is float64
|
1172
1207
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1173
1208
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1174
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1175
|
-
|
1209
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1210
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1176
1211
|
for prob_func in PROB_FUNCTIONS:
|
1177
1212
|
if hasattr(self, prob_func):
|
1178
1213
|
output_cols_prefix: str = f"{prob_func}_"
|
1179
1214
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1180
1215
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1181
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1216
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1217
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1182
1218
|
|
1183
1219
|
@property
|
1184
1220
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|