snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class LassoLarsCV(BaseTransformer):
|
71
64
|
r"""Cross-validated Lasso, using the LARS algorithm
|
72
65
|
For more details on this class, see [sklearn.linear_model.LassoLarsCV]
|
@@ -283,12 +276,7 @@ class LassoLarsCV(BaseTransformer):
|
|
283
276
|
)
|
284
277
|
return selected_cols
|
285
278
|
|
286
|
-
|
287
|
-
project=_PROJECT,
|
288
|
-
subproject=_SUBPROJECT,
|
289
|
-
custom_tags=dict([("autogen", True)]),
|
290
|
-
)
|
291
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "LassoLarsCV":
|
279
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "LassoLarsCV":
|
292
280
|
"""Fit the model using X, y as training data
|
293
281
|
For more details on this function, see [sklearn.linear_model.LassoLarsCV.fit]
|
294
282
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsCV.html#sklearn.linear_model.LassoLarsCV.fit)
|
@@ -315,12 +303,14 @@ class LassoLarsCV(BaseTransformer):
|
|
315
303
|
|
316
304
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
317
305
|
|
318
|
-
|
306
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
319
307
|
if SNOWML_SPROC_ENV in os.environ:
|
320
308
|
statement_params = telemetry.get_function_usage_statement_params(
|
321
309
|
project=_PROJECT,
|
322
310
|
subproject=_SUBPROJECT,
|
323
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
311
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
312
|
+
inspect.currentframe(), LassoLarsCV.__class__.__name__
|
313
|
+
),
|
324
314
|
api_calls=[Session.call],
|
325
315
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
326
316
|
)
|
@@ -341,27 +331,24 @@ class LassoLarsCV(BaseTransformer):
|
|
341
331
|
)
|
342
332
|
self._sklearn_object = model_trainer.train()
|
343
333
|
self._is_fitted = True
|
344
|
-
self.
|
334
|
+
self._generate_model_signatures(dataset)
|
345
335
|
return self
|
346
336
|
|
347
337
|
def _batch_inference_validate_snowpark(
|
348
338
|
self,
|
349
339
|
dataset: DataFrame,
|
350
340
|
inference_method: str,
|
351
|
-
) ->
|
352
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
353
|
-
return the available package that exists in the snowflake anaconda channel
|
341
|
+
) -> None:
|
342
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
354
343
|
|
355
344
|
Args:
|
356
345
|
dataset: snowpark dataframe
|
357
346
|
inference_method: the inference method such as predict, score...
|
358
|
-
|
347
|
+
|
359
348
|
Raises:
|
360
349
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
361
350
|
SnowflakeMLException: If the session is None, raise error
|
362
351
|
|
363
|
-
Returns:
|
364
|
-
A list of available package that exists in the snowflake anaconda channel
|
365
352
|
"""
|
366
353
|
if not self._is_fitted:
|
367
354
|
raise exceptions.SnowflakeMLException(
|
@@ -379,9 +366,7 @@ class LassoLarsCV(BaseTransformer):
|
|
379
366
|
"Session must not specified for snowpark dataset."
|
380
367
|
),
|
381
368
|
)
|
382
|
-
|
383
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
384
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
369
|
+
|
385
370
|
|
386
371
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
387
372
|
@telemetry.send_api_usage_telemetry(
|
@@ -417,7 +402,9 @@ class LassoLarsCV(BaseTransformer):
|
|
417
402
|
# when it is classifier, infer the datatype from label columns
|
418
403
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
419
404
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
420
|
-
label_cols_signatures = [
|
405
|
+
label_cols_signatures = [
|
406
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
407
|
+
]
|
421
408
|
if len(label_cols_signatures) == 0:
|
422
409
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
423
410
|
raise exceptions.SnowflakeMLException(
|
@@ -425,25 +412,23 @@ class LassoLarsCV(BaseTransformer):
|
|
425
412
|
original_exception=ValueError(error_str),
|
426
413
|
)
|
427
414
|
|
428
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
429
|
-
label_cols_signatures[0].as_snowpark_type()
|
430
|
-
)
|
415
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
431
416
|
|
432
|
-
self.
|
433
|
-
|
417
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
418
|
+
self._deps = self._get_dependencies()
|
419
|
+
assert isinstance(
|
420
|
+
dataset._session, Session
|
421
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
434
422
|
|
435
423
|
transform_kwargs = dict(
|
436
|
-
session
|
437
|
-
dependencies
|
438
|
-
drop_input_cols
|
439
|
-
expected_output_cols_type
|
424
|
+
session=dataset._session,
|
425
|
+
dependencies=self._deps,
|
426
|
+
drop_input_cols=self._drop_input_cols,
|
427
|
+
expected_output_cols_type=expected_type_inferred,
|
440
428
|
)
|
441
429
|
|
442
430
|
elif isinstance(dataset, pd.DataFrame):
|
443
|
-
transform_kwargs = dict(
|
444
|
-
snowpark_input_cols = self._snowpark_cols,
|
445
|
-
drop_input_cols = self._drop_input_cols
|
446
|
-
)
|
431
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
447
432
|
|
448
433
|
transform_handlers = ModelTransformerBuilder.build(
|
449
434
|
dataset=dataset,
|
@@ -483,7 +468,7 @@ class LassoLarsCV(BaseTransformer):
|
|
483
468
|
Transformed dataset.
|
484
469
|
"""
|
485
470
|
super()._check_dataset_type(dataset)
|
486
|
-
inference_method="transform"
|
471
|
+
inference_method = "transform"
|
487
472
|
|
488
473
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
489
474
|
# are specific to the type of dataset used.
|
@@ -513,24 +498,19 @@ class LassoLarsCV(BaseTransformer):
|
|
513
498
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
514
499
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
515
500
|
|
516
|
-
self.
|
517
|
-
|
518
|
-
inference_method=inference_method,
|
519
|
-
)
|
501
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
502
|
+
self._deps = self._get_dependencies()
|
520
503
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
521
504
|
|
522
505
|
transform_kwargs = dict(
|
523
|
-
session
|
524
|
-
dependencies
|
525
|
-
drop_input_cols
|
526
|
-
expected_output_cols_type
|
506
|
+
session=dataset._session,
|
507
|
+
dependencies=self._deps,
|
508
|
+
drop_input_cols=self._drop_input_cols,
|
509
|
+
expected_output_cols_type=expected_dtype,
|
527
510
|
)
|
528
511
|
|
529
512
|
elif isinstance(dataset, pd.DataFrame):
|
530
|
-
transform_kwargs = dict(
|
531
|
-
snowpark_input_cols = self._snowpark_cols,
|
532
|
-
drop_input_cols = self._drop_input_cols
|
533
|
-
)
|
513
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
534
514
|
|
535
515
|
transform_handlers = ModelTransformerBuilder.build(
|
536
516
|
dataset=dataset,
|
@@ -549,7 +529,11 @@ class LassoLarsCV(BaseTransformer):
|
|
549
529
|
return output_df
|
550
530
|
|
551
531
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
552
|
-
def fit_predict(
|
532
|
+
def fit_predict(
|
533
|
+
self,
|
534
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
535
|
+
output_cols_prefix: str = "fit_predict_",
|
536
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
553
537
|
""" Method not supported for this class.
|
554
538
|
|
555
539
|
|
@@ -574,22 +558,104 @@ class LassoLarsCV(BaseTransformer):
|
|
574
558
|
)
|
575
559
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
576
560
|
drop_input_cols=self._drop_input_cols,
|
577
|
-
expected_output_cols_list=
|
561
|
+
expected_output_cols_list=(
|
562
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
563
|
+
),
|
578
564
|
)
|
579
565
|
self._sklearn_object = fitted_estimator
|
580
566
|
self._is_fitted = True
|
581
567
|
return output_result
|
582
568
|
|
569
|
+
|
570
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
571
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
572
|
+
""" Method not supported for this class.
|
573
|
+
|
583
574
|
|
584
|
-
|
585
|
-
|
586
|
-
|
575
|
+
Raises:
|
576
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
577
|
+
|
578
|
+
Args:
|
579
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
580
|
+
Snowpark or Pandas DataFrame.
|
581
|
+
output_cols_prefix: Prefix for the response columns
|
587
582
|
Returns:
|
588
583
|
Transformed dataset.
|
589
584
|
"""
|
590
|
-
self.
|
591
|
-
|
592
|
-
|
585
|
+
self._infer_input_output_cols(dataset)
|
586
|
+
super()._check_dataset_type(dataset)
|
587
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
588
|
+
estimator=self._sklearn_object,
|
589
|
+
dataset=dataset,
|
590
|
+
input_cols=self.input_cols,
|
591
|
+
label_cols=self.label_cols,
|
592
|
+
sample_weight_col=self.sample_weight_col,
|
593
|
+
autogenerated=self._autogenerated,
|
594
|
+
subproject=_SUBPROJECT,
|
595
|
+
)
|
596
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
597
|
+
drop_input_cols=self._drop_input_cols,
|
598
|
+
expected_output_cols_list=self.output_cols,
|
599
|
+
)
|
600
|
+
self._sklearn_object = fitted_estimator
|
601
|
+
self._is_fitted = True
|
602
|
+
return output_result
|
603
|
+
|
604
|
+
|
605
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
606
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
607
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
608
|
+
"""
|
609
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
610
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
611
|
+
if output_cols:
|
612
|
+
output_cols = [
|
613
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
614
|
+
for c in output_cols
|
615
|
+
]
|
616
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
617
|
+
output_cols = [output_cols_prefix]
|
618
|
+
elif self._sklearn_object is not None:
|
619
|
+
classes = self._sklearn_object.classes_
|
620
|
+
if isinstance(classes, numpy.ndarray):
|
621
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
622
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
623
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
624
|
+
output_cols = []
|
625
|
+
for i, cl in enumerate(classes):
|
626
|
+
# For binary classification, there is only one output column for each class
|
627
|
+
# ndarray as the two classes are complementary.
|
628
|
+
if len(cl) == 2:
|
629
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
630
|
+
else:
|
631
|
+
output_cols.extend([
|
632
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
633
|
+
])
|
634
|
+
else:
|
635
|
+
output_cols = []
|
636
|
+
|
637
|
+
# Make sure column names are valid snowflake identifiers.
|
638
|
+
assert output_cols is not None # Make MyPy happy
|
639
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
640
|
+
|
641
|
+
return rv
|
642
|
+
|
643
|
+
def _align_expected_output_names(
|
644
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
645
|
+
) -> List[str]:
|
646
|
+
# in case the inferred output column names dimension is different
|
647
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
648
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
649
|
+
output_df_columns = list(output_df_pd.columns)
|
650
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
651
|
+
if self.sample_weight_col:
|
652
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
653
|
+
# if the dimension of inferred output column names is correct; use it
|
654
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
655
|
+
return expected_output_cols_list
|
656
|
+
# otherwise, use the sklearn estimator's output
|
657
|
+
else:
|
658
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
593
659
|
|
594
660
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
595
661
|
@telemetry.send_api_usage_telemetry(
|
@@ -621,24 +687,26 @@ class LassoLarsCV(BaseTransformer):
|
|
621
687
|
# are specific to the type of dataset used.
|
622
688
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
623
689
|
|
690
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
691
|
+
|
624
692
|
if isinstance(dataset, DataFrame):
|
625
|
-
self.
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
693
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
694
|
+
self._deps = self._get_dependencies()
|
695
|
+
assert isinstance(
|
696
|
+
dataset._session, Session
|
697
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
630
698
|
transform_kwargs = dict(
|
631
699
|
session=dataset._session,
|
632
700
|
dependencies=self._deps,
|
633
|
-
drop_input_cols
|
701
|
+
drop_input_cols=self._drop_input_cols,
|
634
702
|
expected_output_cols_type="float",
|
635
703
|
)
|
704
|
+
expected_output_cols = self._align_expected_output_names(
|
705
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
706
|
+
)
|
636
707
|
|
637
708
|
elif isinstance(dataset, pd.DataFrame):
|
638
|
-
transform_kwargs = dict(
|
639
|
-
snowpark_input_cols = self._snowpark_cols,
|
640
|
-
drop_input_cols = self._drop_input_cols
|
641
|
-
)
|
709
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
642
710
|
|
643
711
|
transform_handlers = ModelTransformerBuilder.build(
|
644
712
|
dataset=dataset,
|
@@ -650,7 +718,7 @@ class LassoLarsCV(BaseTransformer):
|
|
650
718
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
651
719
|
inference_method=inference_method,
|
652
720
|
input_cols=self.input_cols,
|
653
|
-
expected_output_cols=
|
721
|
+
expected_output_cols=expected_output_cols,
|
654
722
|
**transform_kwargs
|
655
723
|
)
|
656
724
|
return output_df
|
@@ -680,29 +748,30 @@ class LassoLarsCV(BaseTransformer):
|
|
680
748
|
Output dataset with log probability of the sample for each class in the model.
|
681
749
|
"""
|
682
750
|
super()._check_dataset_type(dataset)
|
683
|
-
inference_method="predict_log_proba"
|
751
|
+
inference_method = "predict_log_proba"
|
752
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
684
753
|
|
685
754
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
686
755
|
# are specific to the type of dataset used.
|
687
756
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
688
757
|
|
689
758
|
if isinstance(dataset, DataFrame):
|
690
|
-
self.
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
759
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
760
|
+
self._deps = self._get_dependencies()
|
761
|
+
assert isinstance(
|
762
|
+
dataset._session, Session
|
763
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
695
764
|
transform_kwargs = dict(
|
696
765
|
session=dataset._session,
|
697
766
|
dependencies=self._deps,
|
698
|
-
drop_input_cols
|
767
|
+
drop_input_cols=self._drop_input_cols,
|
699
768
|
expected_output_cols_type="float",
|
700
769
|
)
|
770
|
+
expected_output_cols = self._align_expected_output_names(
|
771
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
772
|
+
)
|
701
773
|
elif isinstance(dataset, pd.DataFrame):
|
702
|
-
transform_kwargs = dict(
|
703
|
-
snowpark_input_cols = self._snowpark_cols,
|
704
|
-
drop_input_cols = self._drop_input_cols
|
705
|
-
)
|
774
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
706
775
|
|
707
776
|
transform_handlers = ModelTransformerBuilder.build(
|
708
777
|
dataset=dataset,
|
@@ -715,7 +784,7 @@ class LassoLarsCV(BaseTransformer):
|
|
715
784
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
716
785
|
inference_method=inference_method,
|
717
786
|
input_cols=self.input_cols,
|
718
|
-
expected_output_cols=
|
787
|
+
expected_output_cols=expected_output_cols,
|
719
788
|
**transform_kwargs
|
720
789
|
)
|
721
790
|
return output_df
|
@@ -741,30 +810,32 @@ class LassoLarsCV(BaseTransformer):
|
|
741
810
|
Output dataset with results of the decision function for the samples in input dataset.
|
742
811
|
"""
|
743
812
|
super()._check_dataset_type(dataset)
|
744
|
-
inference_method="decision_function"
|
813
|
+
inference_method = "decision_function"
|
745
814
|
|
746
815
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
747
816
|
# are specific to the type of dataset used.
|
748
817
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
749
818
|
|
819
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
820
|
+
|
750
821
|
if isinstance(dataset, DataFrame):
|
751
|
-
self.
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
822
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
823
|
+
self._deps = self._get_dependencies()
|
824
|
+
assert isinstance(
|
825
|
+
dataset._session, Session
|
826
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
756
827
|
transform_kwargs = dict(
|
757
828
|
session=dataset._session,
|
758
829
|
dependencies=self._deps,
|
759
|
-
drop_input_cols
|
830
|
+
drop_input_cols=self._drop_input_cols,
|
760
831
|
expected_output_cols_type="float",
|
761
832
|
)
|
833
|
+
expected_output_cols = self._align_expected_output_names(
|
834
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
835
|
+
)
|
762
836
|
|
763
837
|
elif isinstance(dataset, pd.DataFrame):
|
764
|
-
transform_kwargs = dict(
|
765
|
-
snowpark_input_cols = self._snowpark_cols,
|
766
|
-
drop_input_cols = self._drop_input_cols
|
767
|
-
)
|
838
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
768
839
|
|
769
840
|
transform_handlers = ModelTransformerBuilder.build(
|
770
841
|
dataset=dataset,
|
@@ -777,7 +848,7 @@ class LassoLarsCV(BaseTransformer):
|
|
777
848
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
778
849
|
inference_method=inference_method,
|
779
850
|
input_cols=self.input_cols,
|
780
|
-
expected_output_cols=
|
851
|
+
expected_output_cols=expected_output_cols,
|
781
852
|
**transform_kwargs
|
782
853
|
)
|
783
854
|
return output_df
|
@@ -806,17 +877,17 @@ class LassoLarsCV(BaseTransformer):
|
|
806
877
|
Output dataset with probability of the sample for each class in the model.
|
807
878
|
"""
|
808
879
|
super()._check_dataset_type(dataset)
|
809
|
-
inference_method="score_samples"
|
880
|
+
inference_method = "score_samples"
|
810
881
|
|
811
882
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
812
883
|
# are specific to the type of dataset used.
|
813
884
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
814
885
|
|
886
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
887
|
+
|
815
888
|
if isinstance(dataset, DataFrame):
|
816
|
-
self.
|
817
|
-
|
818
|
-
inference_method=inference_method,
|
819
|
-
)
|
889
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
890
|
+
self._deps = self._get_dependencies()
|
820
891
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
821
892
|
transform_kwargs = dict(
|
822
893
|
session=dataset._session,
|
@@ -824,6 +895,9 @@ class LassoLarsCV(BaseTransformer):
|
|
824
895
|
drop_input_cols = self._drop_input_cols,
|
825
896
|
expected_output_cols_type="float",
|
826
897
|
)
|
898
|
+
expected_output_cols = self._align_expected_output_names(
|
899
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
900
|
+
)
|
827
901
|
|
828
902
|
elif isinstance(dataset, pd.DataFrame):
|
829
903
|
transform_kwargs = dict(
|
@@ -842,7 +916,7 @@ class LassoLarsCV(BaseTransformer):
|
|
842
916
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
843
917
|
inference_method=inference_method,
|
844
918
|
input_cols=self.input_cols,
|
845
|
-
expected_output_cols=
|
919
|
+
expected_output_cols=expected_output_cols,
|
846
920
|
**transform_kwargs
|
847
921
|
)
|
848
922
|
return output_df
|
@@ -877,17 +951,15 @@ class LassoLarsCV(BaseTransformer):
|
|
877
951
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
878
952
|
|
879
953
|
if isinstance(dataset, DataFrame):
|
880
|
-
self.
|
881
|
-
|
882
|
-
inference_method="score",
|
883
|
-
)
|
954
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
955
|
+
self._deps = self._get_dependencies()
|
884
956
|
selected_cols = self._get_active_columns()
|
885
957
|
if len(selected_cols) > 0:
|
886
958
|
dataset = dataset.select(selected_cols)
|
887
959
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
888
960
|
transform_kwargs = dict(
|
889
961
|
session=dataset._session,
|
890
|
-
dependencies=
|
962
|
+
dependencies=self._deps,
|
891
963
|
score_sproc_imports=['sklearn'],
|
892
964
|
)
|
893
965
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -952,11 +1024,8 @@ class LassoLarsCV(BaseTransformer):
|
|
952
1024
|
|
953
1025
|
if isinstance(dataset, DataFrame):
|
954
1026
|
|
955
|
-
self.
|
956
|
-
|
957
|
-
inference_method=inference_method,
|
958
|
-
|
959
|
-
)
|
1027
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1028
|
+
self._deps = self._get_dependencies()
|
960
1029
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
961
1030
|
transform_kwargs = dict(
|
962
1031
|
session = dataset._session,
|
@@ -989,50 +1058,84 @@ class LassoLarsCV(BaseTransformer):
|
|
989
1058
|
)
|
990
1059
|
return output_df
|
991
1060
|
|
1061
|
+
|
1062
|
+
|
1063
|
+
def to_sklearn(self) -> Any:
|
1064
|
+
"""Get sklearn.linear_model.LassoLarsCV object.
|
1065
|
+
"""
|
1066
|
+
if self._sklearn_object is None:
|
1067
|
+
self._sklearn_object = self._create_sklearn_object()
|
1068
|
+
return self._sklearn_object
|
1069
|
+
|
1070
|
+
def to_xgboost(self) -> Any:
|
1071
|
+
raise exceptions.SnowflakeMLException(
|
1072
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1073
|
+
original_exception=AttributeError(
|
1074
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1075
|
+
"to_xgboost()",
|
1076
|
+
"to_sklearn()"
|
1077
|
+
)
|
1078
|
+
),
|
1079
|
+
)
|
1080
|
+
|
1081
|
+
def to_lightgbm(self) -> Any:
|
1082
|
+
raise exceptions.SnowflakeMLException(
|
1083
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1084
|
+
original_exception=AttributeError(
|
1085
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1086
|
+
"to_lightgbm()",
|
1087
|
+
"to_sklearn()"
|
1088
|
+
)
|
1089
|
+
),
|
1090
|
+
)
|
1091
|
+
|
1092
|
+
def _get_dependencies(self) -> List[str]:
|
1093
|
+
return self._deps
|
1094
|
+
|
992
1095
|
|
993
|
-
def
|
1096
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
994
1097
|
self._model_signature_dict = dict()
|
995
1098
|
|
996
1099
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
997
1100
|
|
998
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1101
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
999
1102
|
outputs: List[BaseFeatureSpec] = []
|
1000
1103
|
if hasattr(self, "predict"):
|
1001
1104
|
# keep mypy happy
|
1002
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1105
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1003
1106
|
# For classifier, the type of predict is the same as the type of label
|
1004
|
-
if self._sklearn_object._estimator_type ==
|
1005
|
-
|
1107
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1108
|
+
# label columns is the desired type for output
|
1006
1109
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1007
1110
|
# rename the output columns
|
1008
1111
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1009
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1010
|
-
|
1011
|
-
|
1112
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1113
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1114
|
+
)
|
1012
1115
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1013
1116
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1014
|
-
# Clusterer returns int64 cluster labels.
|
1117
|
+
# Clusterer returns int64 cluster labels.
|
1015
1118
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1016
1119
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1017
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1120
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1121
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1122
|
+
)
|
1123
|
+
|
1021
1124
|
# For regressor, the type of predict is float64
|
1022
|
-
elif self._sklearn_object._estimator_type ==
|
1125
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1023
1126
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1024
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1127
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1128
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1129
|
+
)
|
1130
|
+
|
1028
1131
|
for prob_func in PROB_FUNCTIONS:
|
1029
1132
|
if hasattr(self, prob_func):
|
1030
1133
|
output_cols_prefix: str = f"{prob_func}_"
|
1031
1134
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1032
1135
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1033
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1034
|
-
|
1035
|
-
|
1136
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1137
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1138
|
+
)
|
1036
1139
|
|
1037
1140
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1038
1141
|
items = list(self._model_signature_dict.items())
|
@@ -1045,10 +1148,10 @@ class LassoLarsCV(BaseTransformer):
|
|
1045
1148
|
"""Returns model signature of current class.
|
1046
1149
|
|
1047
1150
|
Raises:
|
1048
|
-
|
1151
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1049
1152
|
|
1050
1153
|
Returns:
|
1051
|
-
Dict
|
1154
|
+
Dict with each method and its input output signature
|
1052
1155
|
"""
|
1053
1156
|
if self._model_signature_dict is None:
|
1054
1157
|
raise exceptions.SnowflakeMLException(
|
@@ -1056,35 +1159,3 @@ class LassoLarsCV(BaseTransformer):
|
|
1056
1159
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1057
1160
|
)
|
1058
1161
|
return self._model_signature_dict
|
1059
|
-
|
1060
|
-
def to_sklearn(self) -> Any:
|
1061
|
-
"""Get sklearn.linear_model.LassoLarsCV object.
|
1062
|
-
"""
|
1063
|
-
if self._sklearn_object is None:
|
1064
|
-
self._sklearn_object = self._create_sklearn_object()
|
1065
|
-
return self._sklearn_object
|
1066
|
-
|
1067
|
-
def to_xgboost(self) -> Any:
|
1068
|
-
raise exceptions.SnowflakeMLException(
|
1069
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1070
|
-
original_exception=AttributeError(
|
1071
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1072
|
-
"to_xgboost()",
|
1073
|
-
"to_sklearn()"
|
1074
|
-
)
|
1075
|
-
),
|
1076
|
-
)
|
1077
|
-
|
1078
|
-
def to_lightgbm(self) -> Any:
|
1079
|
-
raise exceptions.SnowflakeMLException(
|
1080
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1081
|
-
original_exception=AttributeError(
|
1082
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1083
|
-
"to_lightgbm()",
|
1084
|
-
"to_sklearn()"
|
1085
|
-
)
|
1086
|
-
),
|
1087
|
-
)
|
1088
|
-
|
1089
|
-
def _get_dependencies(self) -> List[str]:
|
1090
|
-
return self._deps
|