snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -241,12 +240,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
241
240
|
)
|
242
241
|
return selected_cols
|
243
242
|
|
244
|
-
|
245
|
-
project=_PROJECT,
|
246
|
-
subproject=_SUBPROJECT,
|
247
|
-
custom_tags=dict([("autogen", True)]),
|
248
|
-
)
|
249
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "AdaBoostRegressor":
|
243
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "AdaBoostRegressor":
|
250
244
|
"""Build a boosted classifier/regressor from the training set (X, y)
|
251
245
|
For more details on this function, see [sklearn.ensemble.AdaBoostRegressor.fit]
|
252
246
|
(https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostRegressor.html#sklearn.ensemble.AdaBoostRegressor.fit)
|
@@ -273,12 +267,14 @@ class AdaBoostRegressor(BaseTransformer):
|
|
273
267
|
|
274
268
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
275
269
|
|
276
|
-
|
270
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
277
271
|
if SNOWML_SPROC_ENV in os.environ:
|
278
272
|
statement_params = telemetry.get_function_usage_statement_params(
|
279
273
|
project=_PROJECT,
|
280
274
|
subproject=_SUBPROJECT,
|
281
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
275
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
276
|
+
inspect.currentframe(), AdaBoostRegressor.__class__.__name__
|
277
|
+
),
|
282
278
|
api_calls=[Session.call],
|
283
279
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
284
280
|
)
|
@@ -299,7 +295,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
299
295
|
)
|
300
296
|
self._sklearn_object = model_trainer.train()
|
301
297
|
self._is_fitted = True
|
302
|
-
self.
|
298
|
+
self._generate_model_signatures(dataset)
|
303
299
|
return self
|
304
300
|
|
305
301
|
def _batch_inference_validate_snowpark(
|
@@ -375,7 +371,9 @@ class AdaBoostRegressor(BaseTransformer):
|
|
375
371
|
# when it is classifier, infer the datatype from label columns
|
376
372
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
377
373
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
378
|
-
label_cols_signatures = [
|
374
|
+
label_cols_signatures = [
|
375
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
376
|
+
]
|
379
377
|
if len(label_cols_signatures) == 0:
|
380
378
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
381
379
|
raise exceptions.SnowflakeMLException(
|
@@ -383,25 +381,22 @@ class AdaBoostRegressor(BaseTransformer):
|
|
383
381
|
original_exception=ValueError(error_str),
|
384
382
|
)
|
385
383
|
|
386
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
387
|
-
label_cols_signatures[0].as_snowpark_type()
|
388
|
-
)
|
384
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
389
385
|
|
390
386
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
391
|
-
assert isinstance(
|
387
|
+
assert isinstance(
|
388
|
+
dataset._session, Session
|
389
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
392
390
|
|
393
391
|
transform_kwargs = dict(
|
394
|
-
session
|
395
|
-
dependencies
|
396
|
-
drop_input_cols
|
397
|
-
expected_output_cols_type
|
392
|
+
session=dataset._session,
|
393
|
+
dependencies=self._deps,
|
394
|
+
drop_input_cols=self._drop_input_cols,
|
395
|
+
expected_output_cols_type=expected_type_inferred,
|
398
396
|
)
|
399
397
|
|
400
398
|
elif isinstance(dataset, pd.DataFrame):
|
401
|
-
transform_kwargs = dict(
|
402
|
-
snowpark_input_cols = self._snowpark_cols,
|
403
|
-
drop_input_cols = self._drop_input_cols
|
404
|
-
)
|
399
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
405
400
|
|
406
401
|
transform_handlers = ModelTransformerBuilder.build(
|
407
402
|
dataset=dataset,
|
@@ -441,7 +436,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
441
436
|
Transformed dataset.
|
442
437
|
"""
|
443
438
|
super()._check_dataset_type(dataset)
|
444
|
-
inference_method="transform"
|
439
|
+
inference_method = "transform"
|
445
440
|
|
446
441
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
447
442
|
# are specific to the type of dataset used.
|
@@ -478,17 +473,14 @@ class AdaBoostRegressor(BaseTransformer):
|
|
478
473
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
479
474
|
|
480
475
|
transform_kwargs = dict(
|
481
|
-
session
|
482
|
-
dependencies
|
483
|
-
drop_input_cols
|
484
|
-
expected_output_cols_type
|
476
|
+
session=dataset._session,
|
477
|
+
dependencies=self._deps,
|
478
|
+
drop_input_cols=self._drop_input_cols,
|
479
|
+
expected_output_cols_type=expected_dtype,
|
485
480
|
)
|
486
481
|
|
487
482
|
elif isinstance(dataset, pd.DataFrame):
|
488
|
-
transform_kwargs = dict(
|
489
|
-
snowpark_input_cols = self._snowpark_cols,
|
490
|
-
drop_input_cols = self._drop_input_cols
|
491
|
-
)
|
483
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
492
484
|
|
493
485
|
transform_handlers = ModelTransformerBuilder.build(
|
494
486
|
dataset=dataset,
|
@@ -507,7 +499,11 @@ class AdaBoostRegressor(BaseTransformer):
|
|
507
499
|
return output_df
|
508
500
|
|
509
501
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
510
|
-
def fit_predict(
|
502
|
+
def fit_predict(
|
503
|
+
self,
|
504
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
505
|
+
output_cols_prefix: str = "fit_predict_",
|
506
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
511
507
|
""" Method not supported for this class.
|
512
508
|
|
513
509
|
|
@@ -532,7 +528,9 @@ class AdaBoostRegressor(BaseTransformer):
|
|
532
528
|
)
|
533
529
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
534
530
|
drop_input_cols=self._drop_input_cols,
|
535
|
-
expected_output_cols_list=
|
531
|
+
expected_output_cols_list=(
|
532
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
533
|
+
),
|
536
534
|
)
|
537
535
|
self._sklearn_object = fitted_estimator
|
538
536
|
self._is_fitted = True
|
@@ -549,6 +547,62 @@ class AdaBoostRegressor(BaseTransformer):
|
|
549
547
|
assert self._sklearn_object is not None
|
550
548
|
return self._sklearn_object.embedding_
|
551
549
|
|
550
|
+
|
551
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
552
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
553
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
554
|
+
"""
|
555
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
556
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
557
|
+
if output_cols:
|
558
|
+
output_cols = [
|
559
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
560
|
+
for c in output_cols
|
561
|
+
]
|
562
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
563
|
+
output_cols = [output_cols_prefix]
|
564
|
+
elif self._sklearn_object is not None:
|
565
|
+
classes = self._sklearn_object.classes_
|
566
|
+
if isinstance(classes, numpy.ndarray):
|
567
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
568
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
569
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
570
|
+
output_cols = []
|
571
|
+
for i, cl in enumerate(classes):
|
572
|
+
# For binary classification, there is only one output column for each class
|
573
|
+
# ndarray as the two classes are complementary.
|
574
|
+
if len(cl) == 2:
|
575
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
576
|
+
else:
|
577
|
+
output_cols.extend([
|
578
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
579
|
+
])
|
580
|
+
else:
|
581
|
+
output_cols = []
|
582
|
+
|
583
|
+
# Make sure column names are valid snowflake identifiers.
|
584
|
+
assert output_cols is not None # Make MyPy happy
|
585
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
586
|
+
|
587
|
+
return rv
|
588
|
+
|
589
|
+
def _align_expected_output_names(
|
590
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
591
|
+
) -> List[str]:
|
592
|
+
# in case the inferred output column names dimension is different
|
593
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
594
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
595
|
+
output_df_columns = list(output_df_pd.columns)
|
596
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
597
|
+
if self.sample_weight_col:
|
598
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
599
|
+
# if the dimension of inferred output column names is correct; use it
|
600
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
601
|
+
return expected_output_cols_list
|
602
|
+
# otherwise, use the sklearn estimator's output
|
603
|
+
else:
|
604
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
605
|
+
|
552
606
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
553
607
|
@telemetry.send_api_usage_telemetry(
|
554
608
|
project=_PROJECT,
|
@@ -579,24 +633,28 @@ class AdaBoostRegressor(BaseTransformer):
|
|
579
633
|
# are specific to the type of dataset used.
|
580
634
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
581
635
|
|
636
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
637
|
+
|
582
638
|
if isinstance(dataset, DataFrame):
|
583
639
|
self._deps = self._batch_inference_validate_snowpark(
|
584
640
|
dataset=dataset,
|
585
641
|
inference_method=inference_method,
|
586
642
|
)
|
587
|
-
assert isinstance(
|
643
|
+
assert isinstance(
|
644
|
+
dataset._session, Session
|
645
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
588
646
|
transform_kwargs = dict(
|
589
647
|
session=dataset._session,
|
590
648
|
dependencies=self._deps,
|
591
|
-
drop_input_cols
|
649
|
+
drop_input_cols=self._drop_input_cols,
|
592
650
|
expected_output_cols_type="float",
|
593
651
|
)
|
652
|
+
expected_output_cols = self._align_expected_output_names(
|
653
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
654
|
+
)
|
594
655
|
|
595
656
|
elif isinstance(dataset, pd.DataFrame):
|
596
|
-
transform_kwargs = dict(
|
597
|
-
snowpark_input_cols = self._snowpark_cols,
|
598
|
-
drop_input_cols = self._drop_input_cols
|
599
|
-
)
|
657
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
600
658
|
|
601
659
|
transform_handlers = ModelTransformerBuilder.build(
|
602
660
|
dataset=dataset,
|
@@ -608,7 +666,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
608
666
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
609
667
|
inference_method=inference_method,
|
610
668
|
input_cols=self.input_cols,
|
611
|
-
expected_output_cols=
|
669
|
+
expected_output_cols=expected_output_cols,
|
612
670
|
**transform_kwargs
|
613
671
|
)
|
614
672
|
return output_df
|
@@ -638,7 +696,8 @@ class AdaBoostRegressor(BaseTransformer):
|
|
638
696
|
Output dataset with log probability of the sample for each class in the model.
|
639
697
|
"""
|
640
698
|
super()._check_dataset_type(dataset)
|
641
|
-
inference_method="predict_log_proba"
|
699
|
+
inference_method = "predict_log_proba"
|
700
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
642
701
|
|
643
702
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
644
703
|
# are specific to the type of dataset used.
|
@@ -649,18 +708,20 @@ class AdaBoostRegressor(BaseTransformer):
|
|
649
708
|
dataset=dataset,
|
650
709
|
inference_method=inference_method,
|
651
710
|
)
|
652
|
-
assert isinstance(
|
711
|
+
assert isinstance(
|
712
|
+
dataset._session, Session
|
713
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
653
714
|
transform_kwargs = dict(
|
654
715
|
session=dataset._session,
|
655
716
|
dependencies=self._deps,
|
656
|
-
drop_input_cols
|
717
|
+
drop_input_cols=self._drop_input_cols,
|
657
718
|
expected_output_cols_type="float",
|
658
719
|
)
|
720
|
+
expected_output_cols = self._align_expected_output_names(
|
721
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
722
|
+
)
|
659
723
|
elif isinstance(dataset, pd.DataFrame):
|
660
|
-
transform_kwargs = dict(
|
661
|
-
snowpark_input_cols = self._snowpark_cols,
|
662
|
-
drop_input_cols = self._drop_input_cols
|
663
|
-
)
|
724
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
664
725
|
|
665
726
|
transform_handlers = ModelTransformerBuilder.build(
|
666
727
|
dataset=dataset,
|
@@ -673,7 +734,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
673
734
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
674
735
|
inference_method=inference_method,
|
675
736
|
input_cols=self.input_cols,
|
676
|
-
expected_output_cols=
|
737
|
+
expected_output_cols=expected_output_cols,
|
677
738
|
**transform_kwargs
|
678
739
|
)
|
679
740
|
return output_df
|
@@ -699,30 +760,34 @@ class AdaBoostRegressor(BaseTransformer):
|
|
699
760
|
Output dataset with results of the decision function for the samples in input dataset.
|
700
761
|
"""
|
701
762
|
super()._check_dataset_type(dataset)
|
702
|
-
inference_method="decision_function"
|
763
|
+
inference_method = "decision_function"
|
703
764
|
|
704
765
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
705
766
|
# are specific to the type of dataset used.
|
706
767
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
707
768
|
|
769
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
770
|
+
|
708
771
|
if isinstance(dataset, DataFrame):
|
709
772
|
self._deps = self._batch_inference_validate_snowpark(
|
710
773
|
dataset=dataset,
|
711
774
|
inference_method=inference_method,
|
712
775
|
)
|
713
|
-
assert isinstance(
|
776
|
+
assert isinstance(
|
777
|
+
dataset._session, Session
|
778
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
714
779
|
transform_kwargs = dict(
|
715
780
|
session=dataset._session,
|
716
781
|
dependencies=self._deps,
|
717
|
-
drop_input_cols
|
782
|
+
drop_input_cols=self._drop_input_cols,
|
718
783
|
expected_output_cols_type="float",
|
719
784
|
)
|
785
|
+
expected_output_cols = self._align_expected_output_names(
|
786
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
787
|
+
)
|
720
788
|
|
721
789
|
elif isinstance(dataset, pd.DataFrame):
|
722
|
-
transform_kwargs = dict(
|
723
|
-
snowpark_input_cols = self._snowpark_cols,
|
724
|
-
drop_input_cols = self._drop_input_cols
|
725
|
-
)
|
790
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
726
791
|
|
727
792
|
transform_handlers = ModelTransformerBuilder.build(
|
728
793
|
dataset=dataset,
|
@@ -735,7 +800,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
735
800
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
736
801
|
inference_method=inference_method,
|
737
802
|
input_cols=self.input_cols,
|
738
|
-
expected_output_cols=
|
803
|
+
expected_output_cols=expected_output_cols,
|
739
804
|
**transform_kwargs
|
740
805
|
)
|
741
806
|
return output_df
|
@@ -764,12 +829,14 @@ class AdaBoostRegressor(BaseTransformer):
|
|
764
829
|
Output dataset with probability of the sample for each class in the model.
|
765
830
|
"""
|
766
831
|
super()._check_dataset_type(dataset)
|
767
|
-
inference_method="score_samples"
|
832
|
+
inference_method = "score_samples"
|
768
833
|
|
769
834
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
770
835
|
# are specific to the type of dataset used.
|
771
836
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
772
837
|
|
838
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
839
|
+
|
773
840
|
if isinstance(dataset, DataFrame):
|
774
841
|
self._deps = self._batch_inference_validate_snowpark(
|
775
842
|
dataset=dataset,
|
@@ -782,6 +849,9 @@ class AdaBoostRegressor(BaseTransformer):
|
|
782
849
|
drop_input_cols = self._drop_input_cols,
|
783
850
|
expected_output_cols_type="float",
|
784
851
|
)
|
852
|
+
expected_output_cols = self._align_expected_output_names(
|
853
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
854
|
+
)
|
785
855
|
|
786
856
|
elif isinstance(dataset, pd.DataFrame):
|
787
857
|
transform_kwargs = dict(
|
@@ -800,7 +870,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
800
870
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
801
871
|
inference_method=inference_method,
|
802
872
|
input_cols=self.input_cols,
|
803
|
-
expected_output_cols=
|
873
|
+
expected_output_cols=expected_output_cols,
|
804
874
|
**transform_kwargs
|
805
875
|
)
|
806
876
|
return output_df
|
@@ -947,50 +1017,84 @@ class AdaBoostRegressor(BaseTransformer):
|
|
947
1017
|
)
|
948
1018
|
return output_df
|
949
1019
|
|
1020
|
+
|
1021
|
+
|
1022
|
+
def to_sklearn(self) -> Any:
|
1023
|
+
"""Get sklearn.ensemble.AdaBoostRegressor object.
|
1024
|
+
"""
|
1025
|
+
if self._sklearn_object is None:
|
1026
|
+
self._sklearn_object = self._create_sklearn_object()
|
1027
|
+
return self._sklearn_object
|
1028
|
+
|
1029
|
+
def to_xgboost(self) -> Any:
|
1030
|
+
raise exceptions.SnowflakeMLException(
|
1031
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1032
|
+
original_exception=AttributeError(
|
1033
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1034
|
+
"to_xgboost()",
|
1035
|
+
"to_sklearn()"
|
1036
|
+
)
|
1037
|
+
),
|
1038
|
+
)
|
1039
|
+
|
1040
|
+
def to_lightgbm(self) -> Any:
|
1041
|
+
raise exceptions.SnowflakeMLException(
|
1042
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1043
|
+
original_exception=AttributeError(
|
1044
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1045
|
+
"to_lightgbm()",
|
1046
|
+
"to_sklearn()"
|
1047
|
+
)
|
1048
|
+
),
|
1049
|
+
)
|
950
1050
|
|
951
|
-
def
|
1051
|
+
def _get_dependencies(self) -> List[str]:
|
1052
|
+
return self._deps
|
1053
|
+
|
1054
|
+
|
1055
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
952
1056
|
self._model_signature_dict = dict()
|
953
1057
|
|
954
1058
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
955
1059
|
|
956
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1060
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
957
1061
|
outputs: List[BaseFeatureSpec] = []
|
958
1062
|
if hasattr(self, "predict"):
|
959
1063
|
# keep mypy happy
|
960
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1064
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
961
1065
|
# For classifier, the type of predict is the same as the type of label
|
962
|
-
if self._sklearn_object._estimator_type ==
|
963
|
-
|
1066
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1067
|
+
# label columns is the desired type for output
|
964
1068
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
965
1069
|
# rename the output columns
|
966
1070
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
967
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
968
|
-
|
969
|
-
|
1071
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1072
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1073
|
+
)
|
970
1074
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
971
1075
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
972
|
-
# Clusterer returns int64 cluster labels.
|
1076
|
+
# Clusterer returns int64 cluster labels.
|
973
1077
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
974
1078
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
975
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
976
|
-
|
977
|
-
|
978
|
-
|
1079
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1080
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1081
|
+
)
|
1082
|
+
|
979
1083
|
# For regressor, the type of predict is float64
|
980
|
-
elif self._sklearn_object._estimator_type ==
|
1084
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
981
1085
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
982
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
983
|
-
|
984
|
-
|
985
|
-
|
1086
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1087
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1088
|
+
)
|
1089
|
+
|
986
1090
|
for prob_func in PROB_FUNCTIONS:
|
987
1091
|
if hasattr(self, prob_func):
|
988
1092
|
output_cols_prefix: str = f"{prob_func}_"
|
989
1093
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
990
1094
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
991
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
992
|
-
|
993
|
-
|
1095
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1096
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1097
|
+
)
|
994
1098
|
|
995
1099
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
996
1100
|
items = list(self._model_signature_dict.items())
|
@@ -1003,10 +1107,10 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1003
1107
|
"""Returns model signature of current class.
|
1004
1108
|
|
1005
1109
|
Raises:
|
1006
|
-
|
1110
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1007
1111
|
|
1008
1112
|
Returns:
|
1009
|
-
Dict
|
1113
|
+
Dict with each method and its input output signature
|
1010
1114
|
"""
|
1011
1115
|
if self._model_signature_dict is None:
|
1012
1116
|
raise exceptions.SnowflakeMLException(
|
@@ -1014,35 +1118,3 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1014
1118
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1015
1119
|
)
|
1016
1120
|
return self._model_signature_dict
|
1017
|
-
|
1018
|
-
def to_sklearn(self) -> Any:
|
1019
|
-
"""Get sklearn.ensemble.AdaBoostRegressor object.
|
1020
|
-
"""
|
1021
|
-
if self._sklearn_object is None:
|
1022
|
-
self._sklearn_object = self._create_sklearn_object()
|
1023
|
-
return self._sklearn_object
|
1024
|
-
|
1025
|
-
def to_xgboost(self) -> Any:
|
1026
|
-
raise exceptions.SnowflakeMLException(
|
1027
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1028
|
-
original_exception=AttributeError(
|
1029
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1030
|
-
"to_xgboost()",
|
1031
|
-
"to_sklearn()"
|
1032
|
-
)
|
1033
|
-
),
|
1034
|
-
)
|
1035
|
-
|
1036
|
-
def to_lightgbm(self) -> Any:
|
1037
|
-
raise exceptions.SnowflakeMLException(
|
1038
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1039
|
-
original_exception=AttributeError(
|
1040
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1041
|
-
"to_lightgbm()",
|
1042
|
-
"to_sklearn()"
|
1043
|
-
)
|
1044
|
-
),
|
1045
|
-
)
|
1046
|
-
|
1047
|
-
def _get_dependencies(self) -> List[str]:
|
1048
|
-
return self._deps
|