snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -293,12 +292,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
293
292
|
)
|
294
293
|
return selected_cols
|
295
294
|
|
296
|
-
|
297
|
-
project=_PROJECT,
|
298
|
-
subproject=_SUBPROJECT,
|
299
|
-
custom_tags=dict([("autogen", True)]),
|
300
|
-
)
|
301
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MultiTaskElasticNetCV":
|
295
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MultiTaskElasticNetCV":
|
302
296
|
"""Fit MultiTaskElasticNet model with coordinate descent
|
303
297
|
For more details on this function, see [sklearn.linear_model.MultiTaskElasticNetCV.fit]
|
304
298
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.MultiTaskElasticNetCV.html#sklearn.linear_model.MultiTaskElasticNetCV.fit)
|
@@ -325,12 +319,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
325
319
|
|
326
320
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
327
321
|
|
328
|
-
|
322
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
329
323
|
if SNOWML_SPROC_ENV in os.environ:
|
330
324
|
statement_params = telemetry.get_function_usage_statement_params(
|
331
325
|
project=_PROJECT,
|
332
326
|
subproject=_SUBPROJECT,
|
333
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
327
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
328
|
+
inspect.currentframe(), MultiTaskElasticNetCV.__class__.__name__
|
329
|
+
),
|
334
330
|
api_calls=[Session.call],
|
335
331
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
336
332
|
)
|
@@ -351,7 +347,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
351
347
|
)
|
352
348
|
self._sklearn_object = model_trainer.train()
|
353
349
|
self._is_fitted = True
|
354
|
-
self.
|
350
|
+
self._generate_model_signatures(dataset)
|
355
351
|
return self
|
356
352
|
|
357
353
|
def _batch_inference_validate_snowpark(
|
@@ -427,7 +423,9 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
427
423
|
# when it is classifier, infer the datatype from label columns
|
428
424
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
429
425
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
430
|
-
label_cols_signatures = [
|
426
|
+
label_cols_signatures = [
|
427
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
428
|
+
]
|
431
429
|
if len(label_cols_signatures) == 0:
|
432
430
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
433
431
|
raise exceptions.SnowflakeMLException(
|
@@ -435,25 +433,22 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
435
433
|
original_exception=ValueError(error_str),
|
436
434
|
)
|
437
435
|
|
438
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
439
|
-
label_cols_signatures[0].as_snowpark_type()
|
440
|
-
)
|
436
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
441
437
|
|
442
438
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
443
|
-
assert isinstance(
|
439
|
+
assert isinstance(
|
440
|
+
dataset._session, Session
|
441
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
444
442
|
|
445
443
|
transform_kwargs = dict(
|
446
|
-
session
|
447
|
-
dependencies
|
448
|
-
drop_input_cols
|
449
|
-
expected_output_cols_type
|
444
|
+
session=dataset._session,
|
445
|
+
dependencies=self._deps,
|
446
|
+
drop_input_cols=self._drop_input_cols,
|
447
|
+
expected_output_cols_type=expected_type_inferred,
|
450
448
|
)
|
451
449
|
|
452
450
|
elif isinstance(dataset, pd.DataFrame):
|
453
|
-
transform_kwargs = dict(
|
454
|
-
snowpark_input_cols = self._snowpark_cols,
|
455
|
-
drop_input_cols = self._drop_input_cols
|
456
|
-
)
|
451
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
457
452
|
|
458
453
|
transform_handlers = ModelTransformerBuilder.build(
|
459
454
|
dataset=dataset,
|
@@ -493,7 +488,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
493
488
|
Transformed dataset.
|
494
489
|
"""
|
495
490
|
super()._check_dataset_type(dataset)
|
496
|
-
inference_method="transform"
|
491
|
+
inference_method = "transform"
|
497
492
|
|
498
493
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
499
494
|
# are specific to the type of dataset used.
|
@@ -530,17 +525,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
530
525
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
531
526
|
|
532
527
|
transform_kwargs = dict(
|
533
|
-
session
|
534
|
-
dependencies
|
535
|
-
drop_input_cols
|
536
|
-
expected_output_cols_type
|
528
|
+
session=dataset._session,
|
529
|
+
dependencies=self._deps,
|
530
|
+
drop_input_cols=self._drop_input_cols,
|
531
|
+
expected_output_cols_type=expected_dtype,
|
537
532
|
)
|
538
533
|
|
539
534
|
elif isinstance(dataset, pd.DataFrame):
|
540
|
-
transform_kwargs = dict(
|
541
|
-
snowpark_input_cols = self._snowpark_cols,
|
542
|
-
drop_input_cols = self._drop_input_cols
|
543
|
-
)
|
535
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
544
536
|
|
545
537
|
transform_handlers = ModelTransformerBuilder.build(
|
546
538
|
dataset=dataset,
|
@@ -559,7 +551,11 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
559
551
|
return output_df
|
560
552
|
|
561
553
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
562
|
-
def fit_predict(
|
554
|
+
def fit_predict(
|
555
|
+
self,
|
556
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
557
|
+
output_cols_prefix: str = "fit_predict_",
|
558
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
563
559
|
""" Method not supported for this class.
|
564
560
|
|
565
561
|
|
@@ -584,7 +580,9 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
584
580
|
)
|
585
581
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
586
582
|
drop_input_cols=self._drop_input_cols,
|
587
|
-
expected_output_cols_list=
|
583
|
+
expected_output_cols_list=(
|
584
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
585
|
+
),
|
588
586
|
)
|
589
587
|
self._sklearn_object = fitted_estimator
|
590
588
|
self._is_fitted = True
|
@@ -601,6 +599,62 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
601
599
|
assert self._sklearn_object is not None
|
602
600
|
return self._sklearn_object.embedding_
|
603
601
|
|
602
|
+
|
603
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
604
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
605
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
606
|
+
"""
|
607
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
608
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
609
|
+
if output_cols:
|
610
|
+
output_cols = [
|
611
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
612
|
+
for c in output_cols
|
613
|
+
]
|
614
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
615
|
+
output_cols = [output_cols_prefix]
|
616
|
+
elif self._sklearn_object is not None:
|
617
|
+
classes = self._sklearn_object.classes_
|
618
|
+
if isinstance(classes, numpy.ndarray):
|
619
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
620
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
621
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
622
|
+
output_cols = []
|
623
|
+
for i, cl in enumerate(classes):
|
624
|
+
# For binary classification, there is only one output column for each class
|
625
|
+
# ndarray as the two classes are complementary.
|
626
|
+
if len(cl) == 2:
|
627
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
628
|
+
else:
|
629
|
+
output_cols.extend([
|
630
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
631
|
+
])
|
632
|
+
else:
|
633
|
+
output_cols = []
|
634
|
+
|
635
|
+
# Make sure column names are valid snowflake identifiers.
|
636
|
+
assert output_cols is not None # Make MyPy happy
|
637
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
638
|
+
|
639
|
+
return rv
|
640
|
+
|
641
|
+
def _align_expected_output_names(
|
642
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
643
|
+
) -> List[str]:
|
644
|
+
# in case the inferred output column names dimension is different
|
645
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
646
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
647
|
+
output_df_columns = list(output_df_pd.columns)
|
648
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
649
|
+
if self.sample_weight_col:
|
650
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
651
|
+
# if the dimension of inferred output column names is correct; use it
|
652
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
653
|
+
return expected_output_cols_list
|
654
|
+
# otherwise, use the sklearn estimator's output
|
655
|
+
else:
|
656
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
657
|
+
|
604
658
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
605
659
|
@telemetry.send_api_usage_telemetry(
|
606
660
|
project=_PROJECT,
|
@@ -631,24 +685,28 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
631
685
|
# are specific to the type of dataset used.
|
632
686
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
633
687
|
|
688
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
689
|
+
|
634
690
|
if isinstance(dataset, DataFrame):
|
635
691
|
self._deps = self._batch_inference_validate_snowpark(
|
636
692
|
dataset=dataset,
|
637
693
|
inference_method=inference_method,
|
638
694
|
)
|
639
|
-
assert isinstance(
|
695
|
+
assert isinstance(
|
696
|
+
dataset._session, Session
|
697
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
640
698
|
transform_kwargs = dict(
|
641
699
|
session=dataset._session,
|
642
700
|
dependencies=self._deps,
|
643
|
-
drop_input_cols
|
701
|
+
drop_input_cols=self._drop_input_cols,
|
644
702
|
expected_output_cols_type="float",
|
645
703
|
)
|
704
|
+
expected_output_cols = self._align_expected_output_names(
|
705
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
706
|
+
)
|
646
707
|
|
647
708
|
elif isinstance(dataset, pd.DataFrame):
|
648
|
-
transform_kwargs = dict(
|
649
|
-
snowpark_input_cols = self._snowpark_cols,
|
650
|
-
drop_input_cols = self._drop_input_cols
|
651
|
-
)
|
709
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
652
710
|
|
653
711
|
transform_handlers = ModelTransformerBuilder.build(
|
654
712
|
dataset=dataset,
|
@@ -660,7 +718,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
660
718
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
661
719
|
inference_method=inference_method,
|
662
720
|
input_cols=self.input_cols,
|
663
|
-
expected_output_cols=
|
721
|
+
expected_output_cols=expected_output_cols,
|
664
722
|
**transform_kwargs
|
665
723
|
)
|
666
724
|
return output_df
|
@@ -690,7 +748,8 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
690
748
|
Output dataset with log probability of the sample for each class in the model.
|
691
749
|
"""
|
692
750
|
super()._check_dataset_type(dataset)
|
693
|
-
inference_method="predict_log_proba"
|
751
|
+
inference_method = "predict_log_proba"
|
752
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
694
753
|
|
695
754
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
696
755
|
# are specific to the type of dataset used.
|
@@ -701,18 +760,20 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
701
760
|
dataset=dataset,
|
702
761
|
inference_method=inference_method,
|
703
762
|
)
|
704
|
-
assert isinstance(
|
763
|
+
assert isinstance(
|
764
|
+
dataset._session, Session
|
765
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
705
766
|
transform_kwargs = dict(
|
706
767
|
session=dataset._session,
|
707
768
|
dependencies=self._deps,
|
708
|
-
drop_input_cols
|
769
|
+
drop_input_cols=self._drop_input_cols,
|
709
770
|
expected_output_cols_type="float",
|
710
771
|
)
|
772
|
+
expected_output_cols = self._align_expected_output_names(
|
773
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
774
|
+
)
|
711
775
|
elif isinstance(dataset, pd.DataFrame):
|
712
|
-
transform_kwargs = dict(
|
713
|
-
snowpark_input_cols = self._snowpark_cols,
|
714
|
-
drop_input_cols = self._drop_input_cols
|
715
|
-
)
|
776
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
716
777
|
|
717
778
|
transform_handlers = ModelTransformerBuilder.build(
|
718
779
|
dataset=dataset,
|
@@ -725,7 +786,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
725
786
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
726
787
|
inference_method=inference_method,
|
727
788
|
input_cols=self.input_cols,
|
728
|
-
expected_output_cols=
|
789
|
+
expected_output_cols=expected_output_cols,
|
729
790
|
**transform_kwargs
|
730
791
|
)
|
731
792
|
return output_df
|
@@ -751,30 +812,34 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
751
812
|
Output dataset with results of the decision function for the samples in input dataset.
|
752
813
|
"""
|
753
814
|
super()._check_dataset_type(dataset)
|
754
|
-
inference_method="decision_function"
|
815
|
+
inference_method = "decision_function"
|
755
816
|
|
756
817
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
757
818
|
# are specific to the type of dataset used.
|
758
819
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
759
820
|
|
821
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
822
|
+
|
760
823
|
if isinstance(dataset, DataFrame):
|
761
824
|
self._deps = self._batch_inference_validate_snowpark(
|
762
825
|
dataset=dataset,
|
763
826
|
inference_method=inference_method,
|
764
827
|
)
|
765
|
-
assert isinstance(
|
828
|
+
assert isinstance(
|
829
|
+
dataset._session, Session
|
830
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
766
831
|
transform_kwargs = dict(
|
767
832
|
session=dataset._session,
|
768
833
|
dependencies=self._deps,
|
769
|
-
drop_input_cols
|
834
|
+
drop_input_cols=self._drop_input_cols,
|
770
835
|
expected_output_cols_type="float",
|
771
836
|
)
|
837
|
+
expected_output_cols = self._align_expected_output_names(
|
838
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
839
|
+
)
|
772
840
|
|
773
841
|
elif isinstance(dataset, pd.DataFrame):
|
774
|
-
transform_kwargs = dict(
|
775
|
-
snowpark_input_cols = self._snowpark_cols,
|
776
|
-
drop_input_cols = self._drop_input_cols
|
777
|
-
)
|
842
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
778
843
|
|
779
844
|
transform_handlers = ModelTransformerBuilder.build(
|
780
845
|
dataset=dataset,
|
@@ -787,7 +852,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
787
852
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
788
853
|
inference_method=inference_method,
|
789
854
|
input_cols=self.input_cols,
|
790
|
-
expected_output_cols=
|
855
|
+
expected_output_cols=expected_output_cols,
|
791
856
|
**transform_kwargs
|
792
857
|
)
|
793
858
|
return output_df
|
@@ -816,12 +881,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
816
881
|
Output dataset with probability of the sample for each class in the model.
|
817
882
|
"""
|
818
883
|
super()._check_dataset_type(dataset)
|
819
|
-
inference_method="score_samples"
|
884
|
+
inference_method = "score_samples"
|
820
885
|
|
821
886
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
822
887
|
# are specific to the type of dataset used.
|
823
888
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
824
889
|
|
890
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
891
|
+
|
825
892
|
if isinstance(dataset, DataFrame):
|
826
893
|
self._deps = self._batch_inference_validate_snowpark(
|
827
894
|
dataset=dataset,
|
@@ -834,6 +901,9 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
834
901
|
drop_input_cols = self._drop_input_cols,
|
835
902
|
expected_output_cols_type="float",
|
836
903
|
)
|
904
|
+
expected_output_cols = self._align_expected_output_names(
|
905
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
906
|
+
)
|
837
907
|
|
838
908
|
elif isinstance(dataset, pd.DataFrame):
|
839
909
|
transform_kwargs = dict(
|
@@ -852,7 +922,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
852
922
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
853
923
|
inference_method=inference_method,
|
854
924
|
input_cols=self.input_cols,
|
855
|
-
expected_output_cols=
|
925
|
+
expected_output_cols=expected_output_cols,
|
856
926
|
**transform_kwargs
|
857
927
|
)
|
858
928
|
return output_df
|
@@ -999,50 +1069,84 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
999
1069
|
)
|
1000
1070
|
return output_df
|
1001
1071
|
|
1072
|
+
|
1073
|
+
|
1074
|
+
def to_sklearn(self) -> Any:
|
1075
|
+
"""Get sklearn.linear_model.MultiTaskElasticNetCV object.
|
1076
|
+
"""
|
1077
|
+
if self._sklearn_object is None:
|
1078
|
+
self._sklearn_object = self._create_sklearn_object()
|
1079
|
+
return self._sklearn_object
|
1080
|
+
|
1081
|
+
def to_xgboost(self) -> Any:
|
1082
|
+
raise exceptions.SnowflakeMLException(
|
1083
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1084
|
+
original_exception=AttributeError(
|
1085
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1086
|
+
"to_xgboost()",
|
1087
|
+
"to_sklearn()"
|
1088
|
+
)
|
1089
|
+
),
|
1090
|
+
)
|
1091
|
+
|
1092
|
+
def to_lightgbm(self) -> Any:
|
1093
|
+
raise exceptions.SnowflakeMLException(
|
1094
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1095
|
+
original_exception=AttributeError(
|
1096
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1097
|
+
"to_lightgbm()",
|
1098
|
+
"to_sklearn()"
|
1099
|
+
)
|
1100
|
+
),
|
1101
|
+
)
|
1002
1102
|
|
1003
|
-
def
|
1103
|
+
def _get_dependencies(self) -> List[str]:
|
1104
|
+
return self._deps
|
1105
|
+
|
1106
|
+
|
1107
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1004
1108
|
self._model_signature_dict = dict()
|
1005
1109
|
|
1006
1110
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1007
1111
|
|
1008
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1112
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1009
1113
|
outputs: List[BaseFeatureSpec] = []
|
1010
1114
|
if hasattr(self, "predict"):
|
1011
1115
|
# keep mypy happy
|
1012
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1116
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1013
1117
|
# For classifier, the type of predict is the same as the type of label
|
1014
|
-
if self._sklearn_object._estimator_type ==
|
1015
|
-
|
1118
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1119
|
+
# label columns is the desired type for output
|
1016
1120
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1017
1121
|
# rename the output columns
|
1018
1122
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1019
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1020
|
-
|
1021
|
-
|
1123
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1124
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1125
|
+
)
|
1022
1126
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1023
1127
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1024
|
-
# Clusterer returns int64 cluster labels.
|
1128
|
+
# Clusterer returns int64 cluster labels.
|
1025
1129
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1026
1130
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1027
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1131
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1132
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1133
|
+
)
|
1134
|
+
|
1031
1135
|
# For regressor, the type of predict is float64
|
1032
|
-
elif self._sklearn_object._estimator_type ==
|
1136
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1033
1137
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1034
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1138
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1139
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1140
|
+
)
|
1141
|
+
|
1038
1142
|
for prob_func in PROB_FUNCTIONS:
|
1039
1143
|
if hasattr(self, prob_func):
|
1040
1144
|
output_cols_prefix: str = f"{prob_func}_"
|
1041
1145
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1042
1146
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1043
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1044
|
-
|
1045
|
-
|
1147
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1148
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1149
|
+
)
|
1046
1150
|
|
1047
1151
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1048
1152
|
items = list(self._model_signature_dict.items())
|
@@ -1055,10 +1159,10 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1055
1159
|
"""Returns model signature of current class.
|
1056
1160
|
|
1057
1161
|
Raises:
|
1058
|
-
|
1162
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1059
1163
|
|
1060
1164
|
Returns:
|
1061
|
-
Dict
|
1165
|
+
Dict with each method and its input output signature
|
1062
1166
|
"""
|
1063
1167
|
if self._model_signature_dict is None:
|
1064
1168
|
raise exceptions.SnowflakeMLException(
|
@@ -1066,35 +1170,3 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
1066
1170
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1067
1171
|
)
|
1068
1172
|
return self._model_signature_dict
|
1069
|
-
|
1070
|
-
def to_sklearn(self) -> Any:
|
1071
|
-
"""Get sklearn.linear_model.MultiTaskElasticNetCV object.
|
1072
|
-
"""
|
1073
|
-
if self._sklearn_object is None:
|
1074
|
-
self._sklearn_object = self._create_sklearn_object()
|
1075
|
-
return self._sklearn_object
|
1076
|
-
|
1077
|
-
def to_xgboost(self) -> Any:
|
1078
|
-
raise exceptions.SnowflakeMLException(
|
1079
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1080
|
-
original_exception=AttributeError(
|
1081
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1082
|
-
"to_xgboost()",
|
1083
|
-
"to_sklearn()"
|
1084
|
-
)
|
1085
|
-
),
|
1086
|
-
)
|
1087
|
-
|
1088
|
-
def to_lightgbm(self) -> Any:
|
1089
|
-
raise exceptions.SnowflakeMLException(
|
1090
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1091
|
-
original_exception=AttributeError(
|
1092
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1093
|
-
"to_lightgbm()",
|
1094
|
-
"to_sklearn()"
|
1095
|
-
)
|
1096
|
-
),
|
1097
|
-
)
|
1098
|
-
|
1099
|
-
def _get_dependencies(self) -> List[str]:
|
1100
|
-
return self._deps
|