snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -223,7 +225,6 @@ class SpectralCoclustering(BaseTransformer):
|
|
223
225
|
sample_weight_col: Optional[str] = None,
|
224
226
|
) -> None:
|
225
227
|
super().__init__()
|
226
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
227
228
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
228
229
|
|
229
230
|
self._deps = list(deps)
|
@@ -249,6 +250,15 @@ class SpectralCoclustering(BaseTransformer):
|
|
249
250
|
self.set_drop_input_cols(drop_input_cols)
|
250
251
|
self.set_sample_weight_col(sample_weight_col)
|
251
252
|
|
253
|
+
def _get_rand_id(self) -> str:
|
254
|
+
"""
|
255
|
+
Generate random id to be used in sproc and stage names.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
Random id string usable in sproc, table, and stage names.
|
259
|
+
"""
|
260
|
+
return str(uuid4()).replace("-", "_").upper()
|
261
|
+
|
252
262
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
253
263
|
"""
|
254
264
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -327,7 +337,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
327
337
|
cp.dump(self._sklearn_object, local_transform_file)
|
328
338
|
|
329
339
|
# Create temp stage to run fit.
|
330
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
340
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
331
341
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
332
342
|
SqlResultValidator(
|
333
343
|
session=session,
|
@@ -340,11 +350,12 @@ class SpectralCoclustering(BaseTransformer):
|
|
340
350
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
341
351
|
).validate()
|
342
352
|
|
343
|
-
|
353
|
+
# Use posixpath to construct stage paths
|
354
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
355
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
344
356
|
local_result_file_name = get_temp_file_path()
|
345
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
357
|
|
347
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
358
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
348
359
|
statement_params = telemetry.get_function_usage_statement_params(
|
349
360
|
project=_PROJECT,
|
350
361
|
subproject=_SUBPROJECT,
|
@@ -370,6 +381,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
370
381
|
replace=True,
|
371
382
|
session=session,
|
372
383
|
statement_params=statement_params,
|
384
|
+
anonymous=True
|
373
385
|
)
|
374
386
|
def fit_wrapper_sproc(
|
375
387
|
session: Session,
|
@@ -378,7 +390,8 @@ class SpectralCoclustering(BaseTransformer):
|
|
378
390
|
stage_result_file_name: str,
|
379
391
|
input_cols: List[str],
|
380
392
|
label_cols: List[str],
|
381
|
-
sample_weight_col: Optional[str]
|
393
|
+
sample_weight_col: Optional[str],
|
394
|
+
statement_params: Dict[str, str]
|
382
395
|
) -> str:
|
383
396
|
import cloudpickle as cp
|
384
397
|
import numpy as np
|
@@ -445,15 +458,15 @@ class SpectralCoclustering(BaseTransformer):
|
|
445
458
|
api_calls=[Session.call],
|
446
459
|
custom_tags=dict([("autogen", True)]),
|
447
460
|
)
|
448
|
-
sproc_export_file_name =
|
449
|
-
|
461
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
462
|
+
session,
|
450
463
|
query,
|
451
464
|
stage_transform_file_name,
|
452
465
|
stage_result_file_name,
|
453
466
|
identifier.get_unescaped_names(self.input_cols),
|
454
467
|
identifier.get_unescaped_names(self.label_cols),
|
455
468
|
identifier.get_unescaped_names(self.sample_weight_col),
|
456
|
-
statement_params
|
469
|
+
statement_params,
|
457
470
|
)
|
458
471
|
|
459
472
|
if "|" in sproc_export_file_name:
|
@@ -463,7 +476,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
463
476
|
print("\n".join(fields[1:]))
|
464
477
|
|
465
478
|
session.file.get(
|
466
|
-
|
479
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
467
480
|
local_result_file_name,
|
468
481
|
statement_params=statement_params
|
469
482
|
)
|
@@ -509,7 +522,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
509
522
|
|
510
523
|
# Register vectorized UDF for batch inference
|
511
524
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
512
|
-
safe_id=self.
|
525
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
513
526
|
|
514
527
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
515
528
|
# will try to pickle all of self which fails.
|
@@ -601,7 +614,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
601
614
|
return transformed_pandas_df.to_dict("records")
|
602
615
|
|
603
616
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
604
|
-
safe_id=self.
|
617
|
+
safe_id=self._get_rand_id()
|
605
618
|
)
|
606
619
|
|
607
620
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -657,26 +670,37 @@ class SpectralCoclustering(BaseTransformer):
|
|
657
670
|
# input cols need to match unquoted / quoted
|
658
671
|
input_cols = self.input_cols
|
659
672
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
673
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
660
674
|
|
661
675
|
estimator = self._sklearn_object
|
662
676
|
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
677
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
678
|
+
missing_features = []
|
679
|
+
features_in_dataset = set(dataset.columns)
|
680
|
+
columns_to_select = []
|
681
|
+
for i, f in enumerate(features_required_by_estimator):
|
682
|
+
if (
|
683
|
+
i >= len(input_cols)
|
684
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
685
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
686
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
687
|
+
):
|
688
|
+
missing_features.append(f)
|
689
|
+
elif input_cols[i] in features_in_dataset:
|
690
|
+
columns_to_select.append(input_cols[i])
|
691
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
692
|
+
columns_to_select.append(unquoted_input_cols[i])
|
693
|
+
else:
|
694
|
+
columns_to_select.append(quoted_input_cols[i])
|
695
|
+
|
696
|
+
if len(missing_features) > 0:
|
697
|
+
raise ValueError(
|
698
|
+
"The feature names should match with those that were passed during fit.\n"
|
699
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
700
|
+
f"Features in the input dataframe : {input_cols}\n"
|
701
|
+
)
|
702
|
+
input_df = dataset[columns_to_select]
|
703
|
+
input_df.columns = features_required_by_estimator
|
680
704
|
|
681
705
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
682
706
|
input_df
|
@@ -755,11 +779,18 @@ class SpectralCoclustering(BaseTransformer):
|
|
755
779
|
Transformed dataset.
|
756
780
|
"""
|
757
781
|
if isinstance(dataset, DataFrame):
|
782
|
+
expected_type_inferred = ""
|
783
|
+
# when it is classifier, infer the datatype from label columns
|
784
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
785
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
786
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
787
|
+
)
|
788
|
+
|
758
789
|
output_df = self._batch_inference(
|
759
790
|
dataset=dataset,
|
760
791
|
inference_method="predict",
|
761
792
|
expected_output_cols_list=self.output_cols,
|
762
|
-
expected_output_cols_type=
|
793
|
+
expected_output_cols_type=expected_type_inferred,
|
763
794
|
)
|
764
795
|
elif isinstance(dataset, pd.DataFrame):
|
765
796
|
output_df = self._sklearn_inference(
|
@@ -830,10 +861,10 @@ class SpectralCoclustering(BaseTransformer):
|
|
830
861
|
|
831
862
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
832
863
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
833
|
-
Returns
|
864
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
834
865
|
"""
|
835
866
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
836
|
-
return []
|
867
|
+
return [output_cols_prefix]
|
837
868
|
|
838
869
|
classes = self._sklearn_object.classes_
|
839
870
|
if isinstance(classes, numpy.ndarray):
|
@@ -1058,7 +1089,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
1058
1089
|
cp.dump(self._sklearn_object, local_score_file)
|
1059
1090
|
|
1060
1091
|
# Create temp stage to run score.
|
1061
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1092
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1062
1093
|
session = dataset._session
|
1063
1094
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1064
1095
|
SqlResultValidator(
|
@@ -1072,8 +1103,9 @@ class SpectralCoclustering(BaseTransformer):
|
|
1072
1103
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1073
1104
|
).validate()
|
1074
1105
|
|
1075
|
-
|
1076
|
-
|
1106
|
+
# Use posixpath to construct stage paths
|
1107
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1108
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1077
1109
|
statement_params = telemetry.get_function_usage_statement_params(
|
1078
1110
|
project=_PROJECT,
|
1079
1111
|
subproject=_SUBPROJECT,
|
@@ -1099,6 +1131,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
1099
1131
|
replace=True,
|
1100
1132
|
session=session,
|
1101
1133
|
statement_params=statement_params,
|
1134
|
+
anonymous=True
|
1102
1135
|
)
|
1103
1136
|
def score_wrapper_sproc(
|
1104
1137
|
session: Session,
|
@@ -1106,7 +1139,8 @@ class SpectralCoclustering(BaseTransformer):
|
|
1106
1139
|
stage_score_file_name: str,
|
1107
1140
|
input_cols: List[str],
|
1108
1141
|
label_cols: List[str],
|
1109
|
-
sample_weight_col: Optional[str]
|
1142
|
+
sample_weight_col: Optional[str],
|
1143
|
+
statement_params: Dict[str, str]
|
1110
1144
|
) -> float:
|
1111
1145
|
import cloudpickle as cp
|
1112
1146
|
import numpy as np
|
@@ -1156,14 +1190,14 @@ class SpectralCoclustering(BaseTransformer):
|
|
1156
1190
|
api_calls=[Session.call],
|
1157
1191
|
custom_tags=dict([("autogen", True)]),
|
1158
1192
|
)
|
1159
|
-
score =
|
1160
|
-
|
1193
|
+
score = score_wrapper_sproc(
|
1194
|
+
session,
|
1161
1195
|
query,
|
1162
1196
|
stage_score_file_name,
|
1163
1197
|
identifier.get_unescaped_names(self.input_cols),
|
1164
1198
|
identifier.get_unescaped_names(self.label_cols),
|
1165
1199
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1166
|
-
statement_params
|
1200
|
+
statement_params,
|
1167
1201
|
)
|
1168
1202
|
|
1169
1203
|
cleanup_temp_files([local_score_file_name])
|
@@ -1181,18 +1215,20 @@ class SpectralCoclustering(BaseTransformer):
|
|
1181
1215
|
if self._sklearn_object._estimator_type == 'classifier':
|
1182
1216
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1183
1217
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1184
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1218
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1219
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1185
1220
|
# For regressor, the type of predict is float64
|
1186
1221
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1187
1222
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1188
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1189
|
-
|
1223
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1224
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1190
1225
|
for prob_func in PROB_FUNCTIONS:
|
1191
1226
|
if hasattr(self, prob_func):
|
1192
1227
|
output_cols_prefix: str = f"{prob_func}_"
|
1193
1228
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1194
1229
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1195
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1230
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1231
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1196
1232
|
|
1197
1233
|
@property
|
1198
1234
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -253,7 +255,6 @@ class ColumnTransformer(BaseTransformer):
|
|
253
255
|
sample_weight_col: Optional[str] = None,
|
254
256
|
) -> None:
|
255
257
|
super().__init__()
|
256
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
257
258
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
258
259
|
deps = deps | _gather_dependencies(transformers)
|
259
260
|
self._deps = list(deps)
|
@@ -279,6 +280,15 @@ class ColumnTransformer(BaseTransformer):
|
|
279
280
|
self.set_drop_input_cols(drop_input_cols)
|
280
281
|
self.set_sample_weight_col(sample_weight_col)
|
281
282
|
|
283
|
+
def _get_rand_id(self) -> str:
|
284
|
+
"""
|
285
|
+
Generate random id to be used in sproc and stage names.
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
Random id string usable in sproc, table, and stage names.
|
289
|
+
"""
|
290
|
+
return str(uuid4()).replace("-", "_").upper()
|
291
|
+
|
282
292
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
283
293
|
"""
|
284
294
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -357,7 +367,7 @@ class ColumnTransformer(BaseTransformer):
|
|
357
367
|
cp.dump(self._sklearn_object, local_transform_file)
|
358
368
|
|
359
369
|
# Create temp stage to run fit.
|
360
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
370
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
361
371
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
362
372
|
SqlResultValidator(
|
363
373
|
session=session,
|
@@ -370,11 +380,12 @@ class ColumnTransformer(BaseTransformer):
|
|
370
380
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
371
381
|
).validate()
|
372
382
|
|
373
|
-
|
383
|
+
# Use posixpath to construct stage paths
|
384
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
385
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
374
386
|
local_result_file_name = get_temp_file_path()
|
375
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
376
387
|
|
377
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
388
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
378
389
|
statement_params = telemetry.get_function_usage_statement_params(
|
379
390
|
project=_PROJECT,
|
380
391
|
subproject=_SUBPROJECT,
|
@@ -400,6 +411,7 @@ class ColumnTransformer(BaseTransformer):
|
|
400
411
|
replace=True,
|
401
412
|
session=session,
|
402
413
|
statement_params=statement_params,
|
414
|
+
anonymous=True
|
403
415
|
)
|
404
416
|
def fit_wrapper_sproc(
|
405
417
|
session: Session,
|
@@ -408,7 +420,8 @@ class ColumnTransformer(BaseTransformer):
|
|
408
420
|
stage_result_file_name: str,
|
409
421
|
input_cols: List[str],
|
410
422
|
label_cols: List[str],
|
411
|
-
sample_weight_col: Optional[str]
|
423
|
+
sample_weight_col: Optional[str],
|
424
|
+
statement_params: Dict[str, str]
|
412
425
|
) -> str:
|
413
426
|
import cloudpickle as cp
|
414
427
|
import numpy as np
|
@@ -475,15 +488,15 @@ class ColumnTransformer(BaseTransformer):
|
|
475
488
|
api_calls=[Session.call],
|
476
489
|
custom_tags=dict([("autogen", True)]),
|
477
490
|
)
|
478
|
-
sproc_export_file_name =
|
479
|
-
|
491
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
492
|
+
session,
|
480
493
|
query,
|
481
494
|
stage_transform_file_name,
|
482
495
|
stage_result_file_name,
|
483
496
|
identifier.get_unescaped_names(self.input_cols),
|
484
497
|
identifier.get_unescaped_names(self.label_cols),
|
485
498
|
identifier.get_unescaped_names(self.sample_weight_col),
|
486
|
-
statement_params
|
499
|
+
statement_params,
|
487
500
|
)
|
488
501
|
|
489
502
|
if "|" in sproc_export_file_name:
|
@@ -493,7 +506,7 @@ class ColumnTransformer(BaseTransformer):
|
|
493
506
|
print("\n".join(fields[1:]))
|
494
507
|
|
495
508
|
session.file.get(
|
496
|
-
|
509
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
497
510
|
local_result_file_name,
|
498
511
|
statement_params=statement_params
|
499
512
|
)
|
@@ -539,7 +552,7 @@ class ColumnTransformer(BaseTransformer):
|
|
539
552
|
|
540
553
|
# Register vectorized UDF for batch inference
|
541
554
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
542
|
-
safe_id=self.
|
555
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
543
556
|
|
544
557
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
545
558
|
# will try to pickle all of self which fails.
|
@@ -631,7 +644,7 @@ class ColumnTransformer(BaseTransformer):
|
|
631
644
|
return transformed_pandas_df.to_dict("records")
|
632
645
|
|
633
646
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
634
|
-
safe_id=self.
|
647
|
+
safe_id=self._get_rand_id()
|
635
648
|
)
|
636
649
|
|
637
650
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -687,26 +700,37 @@ class ColumnTransformer(BaseTransformer):
|
|
687
700
|
# input cols need to match unquoted / quoted
|
688
701
|
input_cols = self.input_cols
|
689
702
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
703
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
690
704
|
|
691
705
|
estimator = self._sklearn_object
|
692
706
|
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
707
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
708
|
+
missing_features = []
|
709
|
+
features_in_dataset = set(dataset.columns)
|
710
|
+
columns_to_select = []
|
711
|
+
for i, f in enumerate(features_required_by_estimator):
|
712
|
+
if (
|
713
|
+
i >= len(input_cols)
|
714
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
715
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
716
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
717
|
+
):
|
718
|
+
missing_features.append(f)
|
719
|
+
elif input_cols[i] in features_in_dataset:
|
720
|
+
columns_to_select.append(input_cols[i])
|
721
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
722
|
+
columns_to_select.append(unquoted_input_cols[i])
|
723
|
+
else:
|
724
|
+
columns_to_select.append(quoted_input_cols[i])
|
725
|
+
|
726
|
+
if len(missing_features) > 0:
|
727
|
+
raise ValueError(
|
728
|
+
"The feature names should match with those that were passed during fit.\n"
|
729
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
730
|
+
f"Features in the input dataframe : {input_cols}\n"
|
731
|
+
)
|
732
|
+
input_df = dataset[columns_to_select]
|
733
|
+
input_df.columns = features_required_by_estimator
|
710
734
|
|
711
735
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
712
736
|
input_df
|
@@ -785,11 +809,18 @@ class ColumnTransformer(BaseTransformer):
|
|
785
809
|
Transformed dataset.
|
786
810
|
"""
|
787
811
|
if isinstance(dataset, DataFrame):
|
812
|
+
expected_type_inferred = ""
|
813
|
+
# when it is classifier, infer the datatype from label columns
|
814
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
815
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
816
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
817
|
+
)
|
818
|
+
|
788
819
|
output_df = self._batch_inference(
|
789
820
|
dataset=dataset,
|
790
821
|
inference_method="predict",
|
791
822
|
expected_output_cols_list=self.output_cols,
|
792
|
-
expected_output_cols_type=
|
823
|
+
expected_output_cols_type=expected_type_inferred,
|
793
824
|
)
|
794
825
|
elif isinstance(dataset, pd.DataFrame):
|
795
826
|
output_df = self._sklearn_inference(
|
@@ -862,10 +893,10 @@ class ColumnTransformer(BaseTransformer):
|
|
862
893
|
|
863
894
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
864
895
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
865
|
-
Returns
|
896
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
866
897
|
"""
|
867
898
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
868
|
-
return []
|
899
|
+
return [output_cols_prefix]
|
869
900
|
|
870
901
|
classes = self._sklearn_object.classes_
|
871
902
|
if isinstance(classes, numpy.ndarray):
|
@@ -1090,7 +1121,7 @@ class ColumnTransformer(BaseTransformer):
|
|
1090
1121
|
cp.dump(self._sklearn_object, local_score_file)
|
1091
1122
|
|
1092
1123
|
# Create temp stage to run score.
|
1093
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1124
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1094
1125
|
session = dataset._session
|
1095
1126
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1096
1127
|
SqlResultValidator(
|
@@ -1104,8 +1135,9 @@ class ColumnTransformer(BaseTransformer):
|
|
1104
1135
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1105
1136
|
).validate()
|
1106
1137
|
|
1107
|
-
|
1108
|
-
|
1138
|
+
# Use posixpath to construct stage paths
|
1139
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1140
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1109
1141
|
statement_params = telemetry.get_function_usage_statement_params(
|
1110
1142
|
project=_PROJECT,
|
1111
1143
|
subproject=_SUBPROJECT,
|
@@ -1131,6 +1163,7 @@ class ColumnTransformer(BaseTransformer):
|
|
1131
1163
|
replace=True,
|
1132
1164
|
session=session,
|
1133
1165
|
statement_params=statement_params,
|
1166
|
+
anonymous=True
|
1134
1167
|
)
|
1135
1168
|
def score_wrapper_sproc(
|
1136
1169
|
session: Session,
|
@@ -1138,7 +1171,8 @@ class ColumnTransformer(BaseTransformer):
|
|
1138
1171
|
stage_score_file_name: str,
|
1139
1172
|
input_cols: List[str],
|
1140
1173
|
label_cols: List[str],
|
1141
|
-
sample_weight_col: Optional[str]
|
1174
|
+
sample_weight_col: Optional[str],
|
1175
|
+
statement_params: Dict[str, str]
|
1142
1176
|
) -> float:
|
1143
1177
|
import cloudpickle as cp
|
1144
1178
|
import numpy as np
|
@@ -1188,14 +1222,14 @@ class ColumnTransformer(BaseTransformer):
|
|
1188
1222
|
api_calls=[Session.call],
|
1189
1223
|
custom_tags=dict([("autogen", True)]),
|
1190
1224
|
)
|
1191
|
-
score =
|
1192
|
-
|
1225
|
+
score = score_wrapper_sproc(
|
1226
|
+
session,
|
1193
1227
|
query,
|
1194
1228
|
stage_score_file_name,
|
1195
1229
|
identifier.get_unescaped_names(self.input_cols),
|
1196
1230
|
identifier.get_unescaped_names(self.label_cols),
|
1197
1231
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1198
|
-
statement_params
|
1232
|
+
statement_params,
|
1199
1233
|
)
|
1200
1234
|
|
1201
1235
|
cleanup_temp_files([local_score_file_name])
|
@@ -1213,18 +1247,20 @@ class ColumnTransformer(BaseTransformer):
|
|
1213
1247
|
if self._sklearn_object._estimator_type == 'classifier':
|
1214
1248
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1215
1249
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1216
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1251
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1217
1252
|
# For regressor, the type of predict is float64
|
1218
1253
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1219
1254
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1220
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1221
|
-
|
1255
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1256
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1222
1257
|
for prob_func in PROB_FUNCTIONS:
|
1223
1258
|
if hasattr(self, prob_func):
|
1224
1259
|
output_cols_prefix: str = f"{prob_func}_"
|
1225
1260
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1226
1261
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1227
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1262
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1263
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1228
1264
|
|
1229
1265
|
@property
|
1230
1266
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|