snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
26
27
|
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
28
29
|
from snowflake.snowpark.types import PandasSeries
|
30
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
31
|
|
30
32
|
from snowflake.ml.model.model_signature import (
|
31
33
|
DataType,
|
@@ -200,7 +202,6 @@ class LGBMClassifier(BaseTransformer):
|
|
200
202
|
**kwargs,
|
201
203
|
) -> None:
|
202
204
|
super().__init__()
|
203
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
204
205
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
|
205
206
|
|
206
207
|
self._deps = list(deps)
|
@@ -240,6 +241,15 @@ class LGBMClassifier(BaseTransformer):
|
|
240
241
|
self.set_drop_input_cols(drop_input_cols)
|
241
242
|
self.set_sample_weight_col(sample_weight_col)
|
242
243
|
|
244
|
+
def _get_rand_id(self) -> str:
|
245
|
+
"""
|
246
|
+
Generate random id to be used in sproc and stage names.
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
Random id string usable in sproc, table, and stage names.
|
250
|
+
"""
|
251
|
+
return str(uuid4()).replace("-", "_").upper()
|
252
|
+
|
243
253
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
244
254
|
"""
|
245
255
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -318,7 +328,7 @@ class LGBMClassifier(BaseTransformer):
|
|
318
328
|
cp.dump(self._sklearn_object, local_transform_file)
|
319
329
|
|
320
330
|
# Create temp stage to run fit.
|
321
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
331
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
322
332
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
323
333
|
SqlResultValidator(
|
324
334
|
session=session,
|
@@ -331,11 +341,12 @@ class LGBMClassifier(BaseTransformer):
|
|
331
341
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
332
342
|
).validate()
|
333
343
|
|
334
|
-
|
344
|
+
# Use posixpath to construct stage paths
|
345
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
335
347
|
local_result_file_name = get_temp_file_path()
|
336
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
337
348
|
|
338
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
349
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
339
350
|
statement_params = telemetry.get_function_usage_statement_params(
|
340
351
|
project=_PROJECT,
|
341
352
|
subproject=_SUBPROJECT,
|
@@ -361,6 +372,7 @@ class LGBMClassifier(BaseTransformer):
|
|
361
372
|
replace=True,
|
362
373
|
session=session,
|
363
374
|
statement_params=statement_params,
|
375
|
+
anonymous=True
|
364
376
|
)
|
365
377
|
def fit_wrapper_sproc(
|
366
378
|
session: Session,
|
@@ -369,7 +381,8 @@ class LGBMClassifier(BaseTransformer):
|
|
369
381
|
stage_result_file_name: str,
|
370
382
|
input_cols: List[str],
|
371
383
|
label_cols: List[str],
|
372
|
-
sample_weight_col: Optional[str]
|
384
|
+
sample_weight_col: Optional[str],
|
385
|
+
statement_params: Dict[str, str]
|
373
386
|
) -> str:
|
374
387
|
import cloudpickle as cp
|
375
388
|
import numpy as np
|
@@ -436,15 +449,15 @@ class LGBMClassifier(BaseTransformer):
|
|
436
449
|
api_calls=[Session.call],
|
437
450
|
custom_tags=dict([("autogen", True)]),
|
438
451
|
)
|
439
|
-
sproc_export_file_name =
|
440
|
-
|
452
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
453
|
+
session,
|
441
454
|
query,
|
442
455
|
stage_transform_file_name,
|
443
456
|
stage_result_file_name,
|
444
457
|
identifier.get_unescaped_names(self.input_cols),
|
445
458
|
identifier.get_unescaped_names(self.label_cols),
|
446
459
|
identifier.get_unescaped_names(self.sample_weight_col),
|
447
|
-
statement_params
|
460
|
+
statement_params,
|
448
461
|
)
|
449
462
|
|
450
463
|
if "|" in sproc_export_file_name:
|
@@ -454,7 +467,7 @@ class LGBMClassifier(BaseTransformer):
|
|
454
467
|
print("\n".join(fields[1:]))
|
455
468
|
|
456
469
|
session.file.get(
|
457
|
-
|
470
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
458
471
|
local_result_file_name,
|
459
472
|
statement_params=statement_params
|
460
473
|
)
|
@@ -500,7 +513,7 @@ class LGBMClassifier(BaseTransformer):
|
|
500
513
|
|
501
514
|
# Register vectorized UDF for batch inference
|
502
515
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
503
|
-
safe_id=self.
|
516
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
504
517
|
|
505
518
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
506
519
|
# will try to pickle all of self which fails.
|
@@ -592,7 +605,7 @@ class LGBMClassifier(BaseTransformer):
|
|
592
605
|
return transformed_pandas_df.to_dict("records")
|
593
606
|
|
594
607
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
595
|
-
safe_id=self.
|
608
|
+
safe_id=self._get_rand_id()
|
596
609
|
)
|
597
610
|
|
598
611
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -648,26 +661,37 @@ class LGBMClassifier(BaseTransformer):
|
|
648
661
|
# input cols need to match unquoted / quoted
|
649
662
|
input_cols = self.input_cols
|
650
663
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
664
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
651
665
|
|
652
666
|
estimator = self._sklearn_object
|
653
667
|
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
668
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
669
|
+
missing_features = []
|
670
|
+
features_in_dataset = set(dataset.columns)
|
671
|
+
columns_to_select = []
|
672
|
+
for i, f in enumerate(features_required_by_estimator):
|
673
|
+
if (
|
674
|
+
i >= len(input_cols)
|
675
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
676
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
677
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
678
|
+
):
|
679
|
+
missing_features.append(f)
|
680
|
+
elif input_cols[i] in features_in_dataset:
|
681
|
+
columns_to_select.append(input_cols[i])
|
682
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
683
|
+
columns_to_select.append(unquoted_input_cols[i])
|
684
|
+
else:
|
685
|
+
columns_to_select.append(quoted_input_cols[i])
|
686
|
+
|
687
|
+
if len(missing_features) > 0:
|
688
|
+
raise ValueError(
|
689
|
+
"The feature names should match with those that were passed during fit.\n"
|
690
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
691
|
+
f"Features in the input dataframe : {input_cols}\n"
|
692
|
+
)
|
693
|
+
input_df = dataset[columns_to_select]
|
694
|
+
input_df.columns = features_required_by_estimator
|
671
695
|
|
672
696
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
673
697
|
input_df
|
@@ -748,11 +772,18 @@ class LGBMClassifier(BaseTransformer):
|
|
748
772
|
Transformed dataset.
|
749
773
|
"""
|
750
774
|
if isinstance(dataset, DataFrame):
|
775
|
+
expected_type_inferred = ""
|
776
|
+
# when it is classifier, infer the datatype from label columns
|
777
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
778
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
779
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
780
|
+
)
|
781
|
+
|
751
782
|
output_df = self._batch_inference(
|
752
783
|
dataset=dataset,
|
753
784
|
inference_method="predict",
|
754
785
|
expected_output_cols_list=self.output_cols,
|
755
|
-
expected_output_cols_type=
|
786
|
+
expected_output_cols_type=expected_type_inferred,
|
756
787
|
)
|
757
788
|
elif isinstance(dataset, pd.DataFrame):
|
758
789
|
output_df = self._sklearn_inference(
|
@@ -823,10 +854,10 @@ class LGBMClassifier(BaseTransformer):
|
|
823
854
|
|
824
855
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
825
856
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
826
|
-
Returns
|
857
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
827
858
|
"""
|
828
859
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
829
|
-
return []
|
860
|
+
return [output_cols_prefix]
|
830
861
|
|
831
862
|
classes = self._sklearn_object.classes_
|
832
863
|
if isinstance(classes, numpy.ndarray):
|
@@ -1055,7 +1086,7 @@ class LGBMClassifier(BaseTransformer):
|
|
1055
1086
|
cp.dump(self._sklearn_object, local_score_file)
|
1056
1087
|
|
1057
1088
|
# Create temp stage to run score.
|
1058
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1089
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1059
1090
|
session = dataset._session
|
1060
1091
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1061
1092
|
SqlResultValidator(
|
@@ -1069,8 +1100,9 @@ class LGBMClassifier(BaseTransformer):
|
|
1069
1100
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1070
1101
|
).validate()
|
1071
1102
|
|
1072
|
-
|
1073
|
-
|
1103
|
+
# Use posixpath to construct stage paths
|
1104
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1105
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1074
1106
|
statement_params = telemetry.get_function_usage_statement_params(
|
1075
1107
|
project=_PROJECT,
|
1076
1108
|
subproject=_SUBPROJECT,
|
@@ -1096,6 +1128,7 @@ class LGBMClassifier(BaseTransformer):
|
|
1096
1128
|
replace=True,
|
1097
1129
|
session=session,
|
1098
1130
|
statement_params=statement_params,
|
1131
|
+
anonymous=True
|
1099
1132
|
)
|
1100
1133
|
def score_wrapper_sproc(
|
1101
1134
|
session: Session,
|
@@ -1103,7 +1136,8 @@ class LGBMClassifier(BaseTransformer):
|
|
1103
1136
|
stage_score_file_name: str,
|
1104
1137
|
input_cols: List[str],
|
1105
1138
|
label_cols: List[str],
|
1106
|
-
sample_weight_col: Optional[str]
|
1139
|
+
sample_weight_col: Optional[str],
|
1140
|
+
statement_params: Dict[str, str]
|
1107
1141
|
) -> float:
|
1108
1142
|
import cloudpickle as cp
|
1109
1143
|
import numpy as np
|
@@ -1153,14 +1187,14 @@ class LGBMClassifier(BaseTransformer):
|
|
1153
1187
|
api_calls=[Session.call],
|
1154
1188
|
custom_tags=dict([("autogen", True)]),
|
1155
1189
|
)
|
1156
|
-
score =
|
1157
|
-
|
1190
|
+
score = score_wrapper_sproc(
|
1191
|
+
session,
|
1158
1192
|
query,
|
1159
1193
|
stage_score_file_name,
|
1160
1194
|
identifier.get_unescaped_names(self.input_cols),
|
1161
1195
|
identifier.get_unescaped_names(self.label_cols),
|
1162
1196
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1163
|
-
statement_params
|
1197
|
+
statement_params,
|
1164
1198
|
)
|
1165
1199
|
|
1166
1200
|
cleanup_temp_files([local_score_file_name])
|
@@ -1178,18 +1212,20 @@ class LGBMClassifier(BaseTransformer):
|
|
1178
1212
|
if self._sklearn_object._estimator_type == 'classifier':
|
1179
1213
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1180
1214
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1181
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1215
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1216
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1182
1217
|
# For regressor, the type of predict is float64
|
1183
1218
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1184
1219
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1185
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1186
|
-
|
1220
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1187
1222
|
for prob_func in PROB_FUNCTIONS:
|
1188
1223
|
if hasattr(self, prob_func):
|
1189
1224
|
output_cols_prefix: str = f"{prob_func}_"
|
1190
1225
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1191
1226
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1192
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1227
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1193
1229
|
|
1194
1230
|
@property
|
1195
1231
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
26
27
|
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
28
29
|
from snowflake.snowpark.types import PandasSeries
|
30
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
31
|
|
30
32
|
from snowflake.ml.model.model_signature import (
|
31
33
|
DataType,
|
@@ -200,7 +202,6 @@ class LGBMRegressor(BaseTransformer):
|
|
200
202
|
**kwargs,
|
201
203
|
) -> None:
|
202
204
|
super().__init__()
|
203
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
204
205
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
|
205
206
|
|
206
207
|
self._deps = list(deps)
|
@@ -240,6 +241,15 @@ class LGBMRegressor(BaseTransformer):
|
|
240
241
|
self.set_drop_input_cols(drop_input_cols)
|
241
242
|
self.set_sample_weight_col(sample_weight_col)
|
242
243
|
|
244
|
+
def _get_rand_id(self) -> str:
|
245
|
+
"""
|
246
|
+
Generate random id to be used in sproc and stage names.
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
Random id string usable in sproc, table, and stage names.
|
250
|
+
"""
|
251
|
+
return str(uuid4()).replace("-", "_").upper()
|
252
|
+
|
243
253
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
244
254
|
"""
|
245
255
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -318,7 +328,7 @@ class LGBMRegressor(BaseTransformer):
|
|
318
328
|
cp.dump(self._sklearn_object, local_transform_file)
|
319
329
|
|
320
330
|
# Create temp stage to run fit.
|
321
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
331
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
322
332
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
323
333
|
SqlResultValidator(
|
324
334
|
session=session,
|
@@ -331,11 +341,12 @@ class LGBMRegressor(BaseTransformer):
|
|
331
341
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
332
342
|
).validate()
|
333
343
|
|
334
|
-
|
344
|
+
# Use posixpath to construct stage paths
|
345
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
335
347
|
local_result_file_name = get_temp_file_path()
|
336
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
337
348
|
|
338
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
349
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
339
350
|
statement_params = telemetry.get_function_usage_statement_params(
|
340
351
|
project=_PROJECT,
|
341
352
|
subproject=_SUBPROJECT,
|
@@ -361,6 +372,7 @@ class LGBMRegressor(BaseTransformer):
|
|
361
372
|
replace=True,
|
362
373
|
session=session,
|
363
374
|
statement_params=statement_params,
|
375
|
+
anonymous=True
|
364
376
|
)
|
365
377
|
def fit_wrapper_sproc(
|
366
378
|
session: Session,
|
@@ -369,7 +381,8 @@ class LGBMRegressor(BaseTransformer):
|
|
369
381
|
stage_result_file_name: str,
|
370
382
|
input_cols: List[str],
|
371
383
|
label_cols: List[str],
|
372
|
-
sample_weight_col: Optional[str]
|
384
|
+
sample_weight_col: Optional[str],
|
385
|
+
statement_params: Dict[str, str]
|
373
386
|
) -> str:
|
374
387
|
import cloudpickle as cp
|
375
388
|
import numpy as np
|
@@ -436,15 +449,15 @@ class LGBMRegressor(BaseTransformer):
|
|
436
449
|
api_calls=[Session.call],
|
437
450
|
custom_tags=dict([("autogen", True)]),
|
438
451
|
)
|
439
|
-
sproc_export_file_name =
|
440
|
-
|
452
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
453
|
+
session,
|
441
454
|
query,
|
442
455
|
stage_transform_file_name,
|
443
456
|
stage_result_file_name,
|
444
457
|
identifier.get_unescaped_names(self.input_cols),
|
445
458
|
identifier.get_unescaped_names(self.label_cols),
|
446
459
|
identifier.get_unescaped_names(self.sample_weight_col),
|
447
|
-
statement_params
|
460
|
+
statement_params,
|
448
461
|
)
|
449
462
|
|
450
463
|
if "|" in sproc_export_file_name:
|
@@ -454,7 +467,7 @@ class LGBMRegressor(BaseTransformer):
|
|
454
467
|
print("\n".join(fields[1:]))
|
455
468
|
|
456
469
|
session.file.get(
|
457
|
-
|
470
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
458
471
|
local_result_file_name,
|
459
472
|
statement_params=statement_params
|
460
473
|
)
|
@@ -500,7 +513,7 @@ class LGBMRegressor(BaseTransformer):
|
|
500
513
|
|
501
514
|
# Register vectorized UDF for batch inference
|
502
515
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
503
|
-
safe_id=self.
|
516
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
504
517
|
|
505
518
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
506
519
|
# will try to pickle all of self which fails.
|
@@ -592,7 +605,7 @@ class LGBMRegressor(BaseTransformer):
|
|
592
605
|
return transformed_pandas_df.to_dict("records")
|
593
606
|
|
594
607
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
595
|
-
safe_id=self.
|
608
|
+
safe_id=self._get_rand_id()
|
596
609
|
)
|
597
610
|
|
598
611
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -648,26 +661,37 @@ class LGBMRegressor(BaseTransformer):
|
|
648
661
|
# input cols need to match unquoted / quoted
|
649
662
|
input_cols = self.input_cols
|
650
663
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
664
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
651
665
|
|
652
666
|
estimator = self._sklearn_object
|
653
667
|
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
668
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
669
|
+
missing_features = []
|
670
|
+
features_in_dataset = set(dataset.columns)
|
671
|
+
columns_to_select = []
|
672
|
+
for i, f in enumerate(features_required_by_estimator):
|
673
|
+
if (
|
674
|
+
i >= len(input_cols)
|
675
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
676
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
677
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
678
|
+
):
|
679
|
+
missing_features.append(f)
|
680
|
+
elif input_cols[i] in features_in_dataset:
|
681
|
+
columns_to_select.append(input_cols[i])
|
682
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
683
|
+
columns_to_select.append(unquoted_input_cols[i])
|
684
|
+
else:
|
685
|
+
columns_to_select.append(quoted_input_cols[i])
|
686
|
+
|
687
|
+
if len(missing_features) > 0:
|
688
|
+
raise ValueError(
|
689
|
+
"The feature names should match with those that were passed during fit.\n"
|
690
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
691
|
+
f"Features in the input dataframe : {input_cols}\n"
|
692
|
+
)
|
693
|
+
input_df = dataset[columns_to_select]
|
694
|
+
input_df.columns = features_required_by_estimator
|
671
695
|
|
672
696
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
673
697
|
input_df
|
@@ -748,11 +772,18 @@ class LGBMRegressor(BaseTransformer):
|
|
748
772
|
Transformed dataset.
|
749
773
|
"""
|
750
774
|
if isinstance(dataset, DataFrame):
|
775
|
+
expected_type_inferred = "float"
|
776
|
+
# when it is classifier, infer the datatype from label columns
|
777
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
778
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
779
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
780
|
+
)
|
781
|
+
|
751
782
|
output_df = self._batch_inference(
|
752
783
|
dataset=dataset,
|
753
784
|
inference_method="predict",
|
754
785
|
expected_output_cols_list=self.output_cols,
|
755
|
-
expected_output_cols_type=
|
786
|
+
expected_output_cols_type=expected_type_inferred,
|
756
787
|
)
|
757
788
|
elif isinstance(dataset, pd.DataFrame):
|
758
789
|
output_df = self._sklearn_inference(
|
@@ -823,10 +854,10 @@ class LGBMRegressor(BaseTransformer):
|
|
823
854
|
|
824
855
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
825
856
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
826
|
-
Returns
|
857
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
827
858
|
"""
|
828
859
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
829
|
-
return []
|
860
|
+
return [output_cols_prefix]
|
830
861
|
|
831
862
|
classes = self._sklearn_object.classes_
|
832
863
|
if isinstance(classes, numpy.ndarray):
|
@@ -1051,7 +1082,7 @@ class LGBMRegressor(BaseTransformer):
|
|
1051
1082
|
cp.dump(self._sklearn_object, local_score_file)
|
1052
1083
|
|
1053
1084
|
# Create temp stage to run score.
|
1054
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1085
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1055
1086
|
session = dataset._session
|
1056
1087
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1057
1088
|
SqlResultValidator(
|
@@ -1065,8 +1096,9 @@ class LGBMRegressor(BaseTransformer):
|
|
1065
1096
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1066
1097
|
).validate()
|
1067
1098
|
|
1068
|
-
|
1069
|
-
|
1099
|
+
# Use posixpath to construct stage paths
|
1100
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1101
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1070
1102
|
statement_params = telemetry.get_function_usage_statement_params(
|
1071
1103
|
project=_PROJECT,
|
1072
1104
|
subproject=_SUBPROJECT,
|
@@ -1092,6 +1124,7 @@ class LGBMRegressor(BaseTransformer):
|
|
1092
1124
|
replace=True,
|
1093
1125
|
session=session,
|
1094
1126
|
statement_params=statement_params,
|
1127
|
+
anonymous=True
|
1095
1128
|
)
|
1096
1129
|
def score_wrapper_sproc(
|
1097
1130
|
session: Session,
|
@@ -1099,7 +1132,8 @@ class LGBMRegressor(BaseTransformer):
|
|
1099
1132
|
stage_score_file_name: str,
|
1100
1133
|
input_cols: List[str],
|
1101
1134
|
label_cols: List[str],
|
1102
|
-
sample_weight_col: Optional[str]
|
1135
|
+
sample_weight_col: Optional[str],
|
1136
|
+
statement_params: Dict[str, str]
|
1103
1137
|
) -> float:
|
1104
1138
|
import cloudpickle as cp
|
1105
1139
|
import numpy as np
|
@@ -1149,14 +1183,14 @@ class LGBMRegressor(BaseTransformer):
|
|
1149
1183
|
api_calls=[Session.call],
|
1150
1184
|
custom_tags=dict([("autogen", True)]),
|
1151
1185
|
)
|
1152
|
-
score =
|
1153
|
-
|
1186
|
+
score = score_wrapper_sproc(
|
1187
|
+
session,
|
1154
1188
|
query,
|
1155
1189
|
stage_score_file_name,
|
1156
1190
|
identifier.get_unescaped_names(self.input_cols),
|
1157
1191
|
identifier.get_unescaped_names(self.label_cols),
|
1158
1192
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1159
|
-
statement_params
|
1193
|
+
statement_params,
|
1160
1194
|
)
|
1161
1195
|
|
1162
1196
|
cleanup_temp_files([local_score_file_name])
|
@@ -1174,18 +1208,20 @@ class LGBMRegressor(BaseTransformer):
|
|
1174
1208
|
if self._sklearn_object._estimator_type == 'classifier':
|
1175
1209
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1176
1210
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1177
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1211
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1212
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1178
1213
|
# For regressor, the type of predict is float64
|
1179
1214
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1180
1215
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1181
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1182
|
-
|
1216
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1217
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1183
1218
|
for prob_func in PROB_FUNCTIONS:
|
1184
1219
|
if hasattr(self, prob_func):
|
1185
1220
|
output_cols_prefix: str = f"{prob_func}_"
|
1186
1221
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1187
1222
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1188
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1223
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1224
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1189
1225
|
|
1190
1226
|
@property
|
1191
1227
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|