snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -233,7 +235,6 @@ class KernelDensity(BaseTransformer):
|
|
233
235
|
sample_weight_col: Optional[str] = None,
|
234
236
|
) -> None:
|
235
237
|
super().__init__()
|
236
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
237
238
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
238
239
|
|
239
240
|
self._deps = list(deps)
|
@@ -261,6 +262,15 @@ class KernelDensity(BaseTransformer):
|
|
261
262
|
self.set_drop_input_cols(drop_input_cols)
|
262
263
|
self.set_sample_weight_col(sample_weight_col)
|
263
264
|
|
265
|
+
def _get_rand_id(self) -> str:
|
266
|
+
"""
|
267
|
+
Generate random id to be used in sproc and stage names.
|
268
|
+
|
269
|
+
Returns:
|
270
|
+
Random id string usable in sproc, table, and stage names.
|
271
|
+
"""
|
272
|
+
return str(uuid4()).replace("-", "_").upper()
|
273
|
+
|
264
274
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
265
275
|
"""
|
266
276
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -339,7 +349,7 @@ class KernelDensity(BaseTransformer):
|
|
339
349
|
cp.dump(self._sklearn_object, local_transform_file)
|
340
350
|
|
341
351
|
# Create temp stage to run fit.
|
342
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
352
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
343
353
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
344
354
|
SqlResultValidator(
|
345
355
|
session=session,
|
@@ -352,11 +362,12 @@ class KernelDensity(BaseTransformer):
|
|
352
362
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
353
363
|
).validate()
|
354
364
|
|
355
|
-
|
365
|
+
# Use posixpath to construct stage paths
|
366
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
367
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
356
368
|
local_result_file_name = get_temp_file_path()
|
357
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
358
369
|
|
359
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
370
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
360
371
|
statement_params = telemetry.get_function_usage_statement_params(
|
361
372
|
project=_PROJECT,
|
362
373
|
subproject=_SUBPROJECT,
|
@@ -382,6 +393,7 @@ class KernelDensity(BaseTransformer):
|
|
382
393
|
replace=True,
|
383
394
|
session=session,
|
384
395
|
statement_params=statement_params,
|
396
|
+
anonymous=True
|
385
397
|
)
|
386
398
|
def fit_wrapper_sproc(
|
387
399
|
session: Session,
|
@@ -390,7 +402,8 @@ class KernelDensity(BaseTransformer):
|
|
390
402
|
stage_result_file_name: str,
|
391
403
|
input_cols: List[str],
|
392
404
|
label_cols: List[str],
|
393
|
-
sample_weight_col: Optional[str]
|
405
|
+
sample_weight_col: Optional[str],
|
406
|
+
statement_params: Dict[str, str]
|
394
407
|
) -> str:
|
395
408
|
import cloudpickle as cp
|
396
409
|
import numpy as np
|
@@ -457,15 +470,15 @@ class KernelDensity(BaseTransformer):
|
|
457
470
|
api_calls=[Session.call],
|
458
471
|
custom_tags=dict([("autogen", True)]),
|
459
472
|
)
|
460
|
-
sproc_export_file_name =
|
461
|
-
|
473
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
474
|
+
session,
|
462
475
|
query,
|
463
476
|
stage_transform_file_name,
|
464
477
|
stage_result_file_name,
|
465
478
|
identifier.get_unescaped_names(self.input_cols),
|
466
479
|
identifier.get_unescaped_names(self.label_cols),
|
467
480
|
identifier.get_unescaped_names(self.sample_weight_col),
|
468
|
-
statement_params
|
481
|
+
statement_params,
|
469
482
|
)
|
470
483
|
|
471
484
|
if "|" in sproc_export_file_name:
|
@@ -475,7 +488,7 @@ class KernelDensity(BaseTransformer):
|
|
475
488
|
print("\n".join(fields[1:]))
|
476
489
|
|
477
490
|
session.file.get(
|
478
|
-
|
491
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
479
492
|
local_result_file_name,
|
480
493
|
statement_params=statement_params
|
481
494
|
)
|
@@ -521,7 +534,7 @@ class KernelDensity(BaseTransformer):
|
|
521
534
|
|
522
535
|
# Register vectorized UDF for batch inference
|
523
536
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
524
|
-
safe_id=self.
|
537
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
525
538
|
|
526
539
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
527
540
|
# will try to pickle all of self which fails.
|
@@ -613,7 +626,7 @@ class KernelDensity(BaseTransformer):
|
|
613
626
|
return transformed_pandas_df.to_dict("records")
|
614
627
|
|
615
628
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
616
|
-
safe_id=self.
|
629
|
+
safe_id=self._get_rand_id()
|
617
630
|
)
|
618
631
|
|
619
632
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -669,26 +682,37 @@ class KernelDensity(BaseTransformer):
|
|
669
682
|
# input cols need to match unquoted / quoted
|
670
683
|
input_cols = self.input_cols
|
671
684
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
685
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
672
686
|
|
673
687
|
estimator = self._sklearn_object
|
674
688
|
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
689
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
690
|
+
missing_features = []
|
691
|
+
features_in_dataset = set(dataset.columns)
|
692
|
+
columns_to_select = []
|
693
|
+
for i, f in enumerate(features_required_by_estimator):
|
694
|
+
if (
|
695
|
+
i >= len(input_cols)
|
696
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
697
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
698
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
699
|
+
):
|
700
|
+
missing_features.append(f)
|
701
|
+
elif input_cols[i] in features_in_dataset:
|
702
|
+
columns_to_select.append(input_cols[i])
|
703
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
704
|
+
columns_to_select.append(unquoted_input_cols[i])
|
705
|
+
else:
|
706
|
+
columns_to_select.append(quoted_input_cols[i])
|
707
|
+
|
708
|
+
if len(missing_features) > 0:
|
709
|
+
raise ValueError(
|
710
|
+
"The feature names should match with those that were passed during fit.\n"
|
711
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
712
|
+
f"Features in the input dataframe : {input_cols}\n"
|
713
|
+
)
|
714
|
+
input_df = dataset[columns_to_select]
|
715
|
+
input_df.columns = features_required_by_estimator
|
692
716
|
|
693
717
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
694
718
|
input_df
|
@@ -767,11 +791,18 @@ class KernelDensity(BaseTransformer):
|
|
767
791
|
Transformed dataset.
|
768
792
|
"""
|
769
793
|
if isinstance(dataset, DataFrame):
|
794
|
+
expected_type_inferred = ""
|
795
|
+
# when it is classifier, infer the datatype from label columns
|
796
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
797
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
798
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
799
|
+
)
|
800
|
+
|
770
801
|
output_df = self._batch_inference(
|
771
802
|
dataset=dataset,
|
772
803
|
inference_method="predict",
|
773
804
|
expected_output_cols_list=self.output_cols,
|
774
|
-
expected_output_cols_type=
|
805
|
+
expected_output_cols_type=expected_type_inferred,
|
775
806
|
)
|
776
807
|
elif isinstance(dataset, pd.DataFrame):
|
777
808
|
output_df = self._sklearn_inference(
|
@@ -842,10 +873,10 @@ class KernelDensity(BaseTransformer):
|
|
842
873
|
|
843
874
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
844
875
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
845
|
-
Returns
|
876
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
846
877
|
"""
|
847
878
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
848
|
-
return []
|
879
|
+
return [output_cols_prefix]
|
849
880
|
|
850
881
|
classes = self._sklearn_object.classes_
|
851
882
|
if isinstance(classes, numpy.ndarray):
|
@@ -1070,7 +1101,7 @@ class KernelDensity(BaseTransformer):
|
|
1070
1101
|
cp.dump(self._sklearn_object, local_score_file)
|
1071
1102
|
|
1072
1103
|
# Create temp stage to run score.
|
1073
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1104
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1074
1105
|
session = dataset._session
|
1075
1106
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1076
1107
|
SqlResultValidator(
|
@@ -1084,8 +1115,9 @@ class KernelDensity(BaseTransformer):
|
|
1084
1115
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1085
1116
|
).validate()
|
1086
1117
|
|
1087
|
-
|
1088
|
-
|
1118
|
+
# Use posixpath to construct stage paths
|
1119
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1120
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1089
1121
|
statement_params = telemetry.get_function_usage_statement_params(
|
1090
1122
|
project=_PROJECT,
|
1091
1123
|
subproject=_SUBPROJECT,
|
@@ -1111,6 +1143,7 @@ class KernelDensity(BaseTransformer):
|
|
1111
1143
|
replace=True,
|
1112
1144
|
session=session,
|
1113
1145
|
statement_params=statement_params,
|
1146
|
+
anonymous=True
|
1114
1147
|
)
|
1115
1148
|
def score_wrapper_sproc(
|
1116
1149
|
session: Session,
|
@@ -1118,7 +1151,8 @@ class KernelDensity(BaseTransformer):
|
|
1118
1151
|
stage_score_file_name: str,
|
1119
1152
|
input_cols: List[str],
|
1120
1153
|
label_cols: List[str],
|
1121
|
-
sample_weight_col: Optional[str]
|
1154
|
+
sample_weight_col: Optional[str],
|
1155
|
+
statement_params: Dict[str, str]
|
1122
1156
|
) -> float:
|
1123
1157
|
import cloudpickle as cp
|
1124
1158
|
import numpy as np
|
@@ -1168,14 +1202,14 @@ class KernelDensity(BaseTransformer):
|
|
1168
1202
|
api_calls=[Session.call],
|
1169
1203
|
custom_tags=dict([("autogen", True)]),
|
1170
1204
|
)
|
1171
|
-
score =
|
1172
|
-
|
1205
|
+
score = score_wrapper_sproc(
|
1206
|
+
session,
|
1173
1207
|
query,
|
1174
1208
|
stage_score_file_name,
|
1175
1209
|
identifier.get_unescaped_names(self.input_cols),
|
1176
1210
|
identifier.get_unescaped_names(self.label_cols),
|
1177
1211
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1178
|
-
statement_params
|
1212
|
+
statement_params,
|
1179
1213
|
)
|
1180
1214
|
|
1181
1215
|
cleanup_temp_files([local_score_file_name])
|
@@ -1193,18 +1227,20 @@ class KernelDensity(BaseTransformer):
|
|
1193
1227
|
if self._sklearn_object._estimator_type == 'classifier':
|
1194
1228
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1195
1229
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1196
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1230
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1197
1232
|
# For regressor, the type of predict is float64
|
1198
1233
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1199
1234
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1200
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1201
|
-
|
1235
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1236
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1202
1237
|
for prob_func in PROB_FUNCTIONS:
|
1203
1238
|
if hasattr(self, prob_func):
|
1204
1239
|
output_cols_prefix: str = f"{prob_func}_"
|
1205
1240
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1206
1241
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1207
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1242
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1243
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1208
1244
|
|
1209
1245
|
@property
|
1210
1246
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -261,7 +263,6 @@ class LocalOutlierFactor(BaseTransformer):
|
|
261
263
|
sample_weight_col: Optional[str] = None,
|
262
264
|
) -> None:
|
263
265
|
super().__init__()
|
264
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
265
266
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
266
267
|
|
267
268
|
self._deps = list(deps)
|
@@ -289,6 +290,15 @@ class LocalOutlierFactor(BaseTransformer):
|
|
289
290
|
self.set_drop_input_cols(drop_input_cols)
|
290
291
|
self.set_sample_weight_col(sample_weight_col)
|
291
292
|
|
293
|
+
def _get_rand_id(self) -> str:
|
294
|
+
"""
|
295
|
+
Generate random id to be used in sproc and stage names.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Random id string usable in sproc, table, and stage names.
|
299
|
+
"""
|
300
|
+
return str(uuid4()).replace("-", "_").upper()
|
301
|
+
|
292
302
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
293
303
|
"""
|
294
304
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -367,7 +377,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
367
377
|
cp.dump(self._sklearn_object, local_transform_file)
|
368
378
|
|
369
379
|
# Create temp stage to run fit.
|
370
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
380
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
371
381
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
372
382
|
SqlResultValidator(
|
373
383
|
session=session,
|
@@ -380,11 +390,12 @@ class LocalOutlierFactor(BaseTransformer):
|
|
380
390
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
381
391
|
).validate()
|
382
392
|
|
383
|
-
|
393
|
+
# Use posixpath to construct stage paths
|
394
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
395
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
384
396
|
local_result_file_name = get_temp_file_path()
|
385
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
386
397
|
|
387
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
398
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
388
399
|
statement_params = telemetry.get_function_usage_statement_params(
|
389
400
|
project=_PROJECT,
|
390
401
|
subproject=_SUBPROJECT,
|
@@ -410,6 +421,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
410
421
|
replace=True,
|
411
422
|
session=session,
|
412
423
|
statement_params=statement_params,
|
424
|
+
anonymous=True
|
413
425
|
)
|
414
426
|
def fit_wrapper_sproc(
|
415
427
|
session: Session,
|
@@ -418,7 +430,8 @@ class LocalOutlierFactor(BaseTransformer):
|
|
418
430
|
stage_result_file_name: str,
|
419
431
|
input_cols: List[str],
|
420
432
|
label_cols: List[str],
|
421
|
-
sample_weight_col: Optional[str]
|
433
|
+
sample_weight_col: Optional[str],
|
434
|
+
statement_params: Dict[str, str]
|
422
435
|
) -> str:
|
423
436
|
import cloudpickle as cp
|
424
437
|
import numpy as np
|
@@ -485,15 +498,15 @@ class LocalOutlierFactor(BaseTransformer):
|
|
485
498
|
api_calls=[Session.call],
|
486
499
|
custom_tags=dict([("autogen", True)]),
|
487
500
|
)
|
488
|
-
sproc_export_file_name =
|
489
|
-
|
501
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
502
|
+
session,
|
490
503
|
query,
|
491
504
|
stage_transform_file_name,
|
492
505
|
stage_result_file_name,
|
493
506
|
identifier.get_unescaped_names(self.input_cols),
|
494
507
|
identifier.get_unescaped_names(self.label_cols),
|
495
508
|
identifier.get_unescaped_names(self.sample_weight_col),
|
496
|
-
statement_params
|
509
|
+
statement_params,
|
497
510
|
)
|
498
511
|
|
499
512
|
if "|" in sproc_export_file_name:
|
@@ -503,7 +516,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
503
516
|
print("\n".join(fields[1:]))
|
504
517
|
|
505
518
|
session.file.get(
|
506
|
-
|
519
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
507
520
|
local_result_file_name,
|
508
521
|
statement_params=statement_params
|
509
522
|
)
|
@@ -549,7 +562,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
549
562
|
|
550
563
|
# Register vectorized UDF for batch inference
|
551
564
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
552
|
-
safe_id=self.
|
565
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
553
566
|
|
554
567
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
555
568
|
# will try to pickle all of self which fails.
|
@@ -641,7 +654,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
641
654
|
return transformed_pandas_df.to_dict("records")
|
642
655
|
|
643
656
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
644
|
-
safe_id=self.
|
657
|
+
safe_id=self._get_rand_id()
|
645
658
|
)
|
646
659
|
|
647
660
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -697,26 +710,37 @@ class LocalOutlierFactor(BaseTransformer):
|
|
697
710
|
# input cols need to match unquoted / quoted
|
698
711
|
input_cols = self.input_cols
|
699
712
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
713
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
700
714
|
|
701
715
|
estimator = self._sklearn_object
|
702
716
|
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
717
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
718
|
+
missing_features = []
|
719
|
+
features_in_dataset = set(dataset.columns)
|
720
|
+
columns_to_select = []
|
721
|
+
for i, f in enumerate(features_required_by_estimator):
|
722
|
+
if (
|
723
|
+
i >= len(input_cols)
|
724
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
725
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
726
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
727
|
+
):
|
728
|
+
missing_features.append(f)
|
729
|
+
elif input_cols[i] in features_in_dataset:
|
730
|
+
columns_to_select.append(input_cols[i])
|
731
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
732
|
+
columns_to_select.append(unquoted_input_cols[i])
|
733
|
+
else:
|
734
|
+
columns_to_select.append(quoted_input_cols[i])
|
735
|
+
|
736
|
+
if len(missing_features) > 0:
|
737
|
+
raise ValueError(
|
738
|
+
"The feature names should match with those that were passed during fit.\n"
|
739
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
740
|
+
f"Features in the input dataframe : {input_cols}\n"
|
741
|
+
)
|
742
|
+
input_df = dataset[columns_to_select]
|
743
|
+
input_df.columns = features_required_by_estimator
|
720
744
|
|
721
745
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
722
746
|
input_df
|
@@ -797,11 +821,18 @@ class LocalOutlierFactor(BaseTransformer):
|
|
797
821
|
Transformed dataset.
|
798
822
|
"""
|
799
823
|
if isinstance(dataset, DataFrame):
|
824
|
+
expected_type_inferred = ""
|
825
|
+
# when it is classifier, infer the datatype from label columns
|
826
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
827
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
828
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
829
|
+
)
|
830
|
+
|
800
831
|
output_df = self._batch_inference(
|
801
832
|
dataset=dataset,
|
802
833
|
inference_method="predict",
|
803
834
|
expected_output_cols_list=self.output_cols,
|
804
|
-
expected_output_cols_type=
|
835
|
+
expected_output_cols_type=expected_type_inferred,
|
805
836
|
)
|
806
837
|
elif isinstance(dataset, pd.DataFrame):
|
807
838
|
output_df = self._sklearn_inference(
|
@@ -872,10 +903,10 @@ class LocalOutlierFactor(BaseTransformer):
|
|
872
903
|
|
873
904
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
874
905
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
875
|
-
Returns
|
906
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
876
907
|
"""
|
877
908
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
878
|
-
return []
|
909
|
+
return [output_cols_prefix]
|
879
910
|
|
880
911
|
classes = self._sklearn_object.classes_
|
881
912
|
if isinstance(classes, numpy.ndarray):
|
@@ -1102,7 +1133,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1102
1133
|
cp.dump(self._sklearn_object, local_score_file)
|
1103
1134
|
|
1104
1135
|
# Create temp stage to run score.
|
1105
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1136
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1106
1137
|
session = dataset._session
|
1107
1138
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1108
1139
|
SqlResultValidator(
|
@@ -1116,8 +1147,9 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1116
1147
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1117
1148
|
).validate()
|
1118
1149
|
|
1119
|
-
|
1120
|
-
|
1150
|
+
# Use posixpath to construct stage paths
|
1151
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1152
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1121
1153
|
statement_params = telemetry.get_function_usage_statement_params(
|
1122
1154
|
project=_PROJECT,
|
1123
1155
|
subproject=_SUBPROJECT,
|
@@ -1143,6 +1175,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1143
1175
|
replace=True,
|
1144
1176
|
session=session,
|
1145
1177
|
statement_params=statement_params,
|
1178
|
+
anonymous=True
|
1146
1179
|
)
|
1147
1180
|
def score_wrapper_sproc(
|
1148
1181
|
session: Session,
|
@@ -1150,7 +1183,8 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1150
1183
|
stage_score_file_name: str,
|
1151
1184
|
input_cols: List[str],
|
1152
1185
|
label_cols: List[str],
|
1153
|
-
sample_weight_col: Optional[str]
|
1186
|
+
sample_weight_col: Optional[str],
|
1187
|
+
statement_params: Dict[str, str]
|
1154
1188
|
) -> float:
|
1155
1189
|
import cloudpickle as cp
|
1156
1190
|
import numpy as np
|
@@ -1200,14 +1234,14 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1200
1234
|
api_calls=[Session.call],
|
1201
1235
|
custom_tags=dict([("autogen", True)]),
|
1202
1236
|
)
|
1203
|
-
score =
|
1204
|
-
|
1237
|
+
score = score_wrapper_sproc(
|
1238
|
+
session,
|
1205
1239
|
query,
|
1206
1240
|
stage_score_file_name,
|
1207
1241
|
identifier.get_unescaped_names(self.input_cols),
|
1208
1242
|
identifier.get_unescaped_names(self.label_cols),
|
1209
1243
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1210
|
-
statement_params
|
1244
|
+
statement_params,
|
1211
1245
|
)
|
1212
1246
|
|
1213
1247
|
cleanup_temp_files([local_score_file_name])
|
@@ -1225,18 +1259,20 @@ class LocalOutlierFactor(BaseTransformer):
|
|
1225
1259
|
if self._sklearn_object._estimator_type == 'classifier':
|
1226
1260
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1227
1261
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1228
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1262
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1263
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1229
1264
|
# For regressor, the type of predict is float64
|
1230
1265
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1231
1266
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1232
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1233
|
-
|
1267
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1268
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1234
1269
|
for prob_func in PROB_FUNCTIONS:
|
1235
1270
|
if hasattr(self, prob_func):
|
1236
1271
|
output_cols_prefix: str = f"{prob_func}_"
|
1237
1272
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1238
1273
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1239
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1274
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1275
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1240
1276
|
|
1241
1277
|
@property
|
1242
1278
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|