snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class LassoLarsIC(BaseTransformer):
|
71
64
|
r"""Lasso model fit with Lars using BIC or AIC for model selection
|
72
65
|
For more details on this class, see [sklearn.linear_model.LassoLarsIC]
|
@@ -266,12 +259,7 @@ class LassoLarsIC(BaseTransformer):
|
|
266
259
|
)
|
267
260
|
return selected_cols
|
268
261
|
|
269
|
-
|
270
|
-
project=_PROJECT,
|
271
|
-
subproject=_SUBPROJECT,
|
272
|
-
custom_tags=dict([("autogen", True)]),
|
273
|
-
)
|
274
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "LassoLarsIC":
|
262
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "LassoLarsIC":
|
275
263
|
"""Fit the model using X, y as training data
|
276
264
|
For more details on this function, see [sklearn.linear_model.LassoLarsIC.fit]
|
277
265
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsIC.html#sklearn.linear_model.LassoLarsIC.fit)
|
@@ -298,12 +286,14 @@ class LassoLarsIC(BaseTransformer):
|
|
298
286
|
|
299
287
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
300
288
|
|
301
|
-
|
289
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
302
290
|
if SNOWML_SPROC_ENV in os.environ:
|
303
291
|
statement_params = telemetry.get_function_usage_statement_params(
|
304
292
|
project=_PROJECT,
|
305
293
|
subproject=_SUBPROJECT,
|
306
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
294
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
295
|
+
inspect.currentframe(), LassoLarsIC.__class__.__name__
|
296
|
+
),
|
307
297
|
api_calls=[Session.call],
|
308
298
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
309
299
|
)
|
@@ -324,27 +314,24 @@ class LassoLarsIC(BaseTransformer):
|
|
324
314
|
)
|
325
315
|
self._sklearn_object = model_trainer.train()
|
326
316
|
self._is_fitted = True
|
327
|
-
self.
|
317
|
+
self._generate_model_signatures(dataset)
|
328
318
|
return self
|
329
319
|
|
330
320
|
def _batch_inference_validate_snowpark(
|
331
321
|
self,
|
332
322
|
dataset: DataFrame,
|
333
323
|
inference_method: str,
|
334
|
-
) ->
|
335
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
336
|
-
return the available package that exists in the snowflake anaconda channel
|
324
|
+
) -> None:
|
325
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
337
326
|
|
338
327
|
Args:
|
339
328
|
dataset: snowpark dataframe
|
340
329
|
inference_method: the inference method such as predict, score...
|
341
|
-
|
330
|
+
|
342
331
|
Raises:
|
343
332
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
344
333
|
SnowflakeMLException: If the session is None, raise error
|
345
334
|
|
346
|
-
Returns:
|
347
|
-
A list of available package that exists in the snowflake anaconda channel
|
348
335
|
"""
|
349
336
|
if not self._is_fitted:
|
350
337
|
raise exceptions.SnowflakeMLException(
|
@@ -362,9 +349,7 @@ class LassoLarsIC(BaseTransformer):
|
|
362
349
|
"Session must not specified for snowpark dataset."
|
363
350
|
),
|
364
351
|
)
|
365
|
-
|
366
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
367
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
352
|
+
|
368
353
|
|
369
354
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
370
355
|
@telemetry.send_api_usage_telemetry(
|
@@ -400,7 +385,9 @@ class LassoLarsIC(BaseTransformer):
|
|
400
385
|
# when it is classifier, infer the datatype from label columns
|
401
386
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
402
387
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
403
|
-
label_cols_signatures = [
|
388
|
+
label_cols_signatures = [
|
389
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
390
|
+
]
|
404
391
|
if len(label_cols_signatures) == 0:
|
405
392
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
406
393
|
raise exceptions.SnowflakeMLException(
|
@@ -408,25 +395,23 @@ class LassoLarsIC(BaseTransformer):
|
|
408
395
|
original_exception=ValueError(error_str),
|
409
396
|
)
|
410
397
|
|
411
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
412
|
-
label_cols_signatures[0].as_snowpark_type()
|
413
|
-
)
|
398
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
414
399
|
|
415
|
-
self.
|
416
|
-
|
400
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
401
|
+
self._deps = self._get_dependencies()
|
402
|
+
assert isinstance(
|
403
|
+
dataset._session, Session
|
404
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
417
405
|
|
418
406
|
transform_kwargs = dict(
|
419
|
-
session
|
420
|
-
dependencies
|
421
|
-
drop_input_cols
|
422
|
-
expected_output_cols_type
|
407
|
+
session=dataset._session,
|
408
|
+
dependencies=self._deps,
|
409
|
+
drop_input_cols=self._drop_input_cols,
|
410
|
+
expected_output_cols_type=expected_type_inferred,
|
423
411
|
)
|
424
412
|
|
425
413
|
elif isinstance(dataset, pd.DataFrame):
|
426
|
-
transform_kwargs = dict(
|
427
|
-
snowpark_input_cols = self._snowpark_cols,
|
428
|
-
drop_input_cols = self._drop_input_cols
|
429
|
-
)
|
414
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
430
415
|
|
431
416
|
transform_handlers = ModelTransformerBuilder.build(
|
432
417
|
dataset=dataset,
|
@@ -466,7 +451,7 @@ class LassoLarsIC(BaseTransformer):
|
|
466
451
|
Transformed dataset.
|
467
452
|
"""
|
468
453
|
super()._check_dataset_type(dataset)
|
469
|
-
inference_method="transform"
|
454
|
+
inference_method = "transform"
|
470
455
|
|
471
456
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
472
457
|
# are specific to the type of dataset used.
|
@@ -496,24 +481,19 @@ class LassoLarsIC(BaseTransformer):
|
|
496
481
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
497
482
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
498
483
|
|
499
|
-
self.
|
500
|
-
|
501
|
-
inference_method=inference_method,
|
502
|
-
)
|
484
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
485
|
+
self._deps = self._get_dependencies()
|
503
486
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
504
487
|
|
505
488
|
transform_kwargs = dict(
|
506
|
-
session
|
507
|
-
dependencies
|
508
|
-
drop_input_cols
|
509
|
-
expected_output_cols_type
|
489
|
+
session=dataset._session,
|
490
|
+
dependencies=self._deps,
|
491
|
+
drop_input_cols=self._drop_input_cols,
|
492
|
+
expected_output_cols_type=expected_dtype,
|
510
493
|
)
|
511
494
|
|
512
495
|
elif isinstance(dataset, pd.DataFrame):
|
513
|
-
transform_kwargs = dict(
|
514
|
-
snowpark_input_cols = self._snowpark_cols,
|
515
|
-
drop_input_cols = self._drop_input_cols
|
516
|
-
)
|
496
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
517
497
|
|
518
498
|
transform_handlers = ModelTransformerBuilder.build(
|
519
499
|
dataset=dataset,
|
@@ -532,7 +512,11 @@ class LassoLarsIC(BaseTransformer):
|
|
532
512
|
return output_df
|
533
513
|
|
534
514
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
535
|
-
def fit_predict(
|
515
|
+
def fit_predict(
|
516
|
+
self,
|
517
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
518
|
+
output_cols_prefix: str = "fit_predict_",
|
519
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
536
520
|
""" Method not supported for this class.
|
537
521
|
|
538
522
|
|
@@ -557,22 +541,104 @@ class LassoLarsIC(BaseTransformer):
|
|
557
541
|
)
|
558
542
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
559
543
|
drop_input_cols=self._drop_input_cols,
|
560
|
-
expected_output_cols_list=
|
544
|
+
expected_output_cols_list=(
|
545
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
546
|
+
),
|
561
547
|
)
|
562
548
|
self._sklearn_object = fitted_estimator
|
563
549
|
self._is_fitted = True
|
564
550
|
return output_result
|
565
551
|
|
552
|
+
|
553
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
554
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
555
|
+
""" Method not supported for this class.
|
556
|
+
|
566
557
|
|
567
|
-
|
568
|
-
|
569
|
-
|
558
|
+
Raises:
|
559
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
560
|
+
|
561
|
+
Args:
|
562
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
563
|
+
Snowpark or Pandas DataFrame.
|
564
|
+
output_cols_prefix: Prefix for the response columns
|
570
565
|
Returns:
|
571
566
|
Transformed dataset.
|
572
567
|
"""
|
573
|
-
self.
|
574
|
-
|
575
|
-
|
568
|
+
self._infer_input_output_cols(dataset)
|
569
|
+
super()._check_dataset_type(dataset)
|
570
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
571
|
+
estimator=self._sklearn_object,
|
572
|
+
dataset=dataset,
|
573
|
+
input_cols=self.input_cols,
|
574
|
+
label_cols=self.label_cols,
|
575
|
+
sample_weight_col=self.sample_weight_col,
|
576
|
+
autogenerated=self._autogenerated,
|
577
|
+
subproject=_SUBPROJECT,
|
578
|
+
)
|
579
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
580
|
+
drop_input_cols=self._drop_input_cols,
|
581
|
+
expected_output_cols_list=self.output_cols,
|
582
|
+
)
|
583
|
+
self._sklearn_object = fitted_estimator
|
584
|
+
self._is_fitted = True
|
585
|
+
return output_result
|
586
|
+
|
587
|
+
|
588
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
589
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
590
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
591
|
+
"""
|
592
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
593
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
594
|
+
if output_cols:
|
595
|
+
output_cols = [
|
596
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
597
|
+
for c in output_cols
|
598
|
+
]
|
599
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
600
|
+
output_cols = [output_cols_prefix]
|
601
|
+
elif self._sklearn_object is not None:
|
602
|
+
classes = self._sklearn_object.classes_
|
603
|
+
if isinstance(classes, numpy.ndarray):
|
604
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
605
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
606
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
607
|
+
output_cols = []
|
608
|
+
for i, cl in enumerate(classes):
|
609
|
+
# For binary classification, there is only one output column for each class
|
610
|
+
# ndarray as the two classes are complementary.
|
611
|
+
if len(cl) == 2:
|
612
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
613
|
+
else:
|
614
|
+
output_cols.extend([
|
615
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
616
|
+
])
|
617
|
+
else:
|
618
|
+
output_cols = []
|
619
|
+
|
620
|
+
# Make sure column names are valid snowflake identifiers.
|
621
|
+
assert output_cols is not None # Make MyPy happy
|
622
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
623
|
+
|
624
|
+
return rv
|
625
|
+
|
626
|
+
def _align_expected_output_names(
|
627
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
628
|
+
) -> List[str]:
|
629
|
+
# in case the inferred output column names dimension is different
|
630
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
631
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
632
|
+
output_df_columns = list(output_df_pd.columns)
|
633
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
634
|
+
if self.sample_weight_col:
|
635
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
636
|
+
# if the dimension of inferred output column names is correct; use it
|
637
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
638
|
+
return expected_output_cols_list
|
639
|
+
# otherwise, use the sklearn estimator's output
|
640
|
+
else:
|
641
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
576
642
|
|
577
643
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
578
644
|
@telemetry.send_api_usage_telemetry(
|
@@ -604,24 +670,26 @@ class LassoLarsIC(BaseTransformer):
|
|
604
670
|
# are specific to the type of dataset used.
|
605
671
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
606
672
|
|
673
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
674
|
+
|
607
675
|
if isinstance(dataset, DataFrame):
|
608
|
-
self.
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
676
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
677
|
+
self._deps = self._get_dependencies()
|
678
|
+
assert isinstance(
|
679
|
+
dataset._session, Session
|
680
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
613
681
|
transform_kwargs = dict(
|
614
682
|
session=dataset._session,
|
615
683
|
dependencies=self._deps,
|
616
|
-
drop_input_cols
|
684
|
+
drop_input_cols=self._drop_input_cols,
|
617
685
|
expected_output_cols_type="float",
|
618
686
|
)
|
687
|
+
expected_output_cols = self._align_expected_output_names(
|
688
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
689
|
+
)
|
619
690
|
|
620
691
|
elif isinstance(dataset, pd.DataFrame):
|
621
|
-
transform_kwargs = dict(
|
622
|
-
snowpark_input_cols = self._snowpark_cols,
|
623
|
-
drop_input_cols = self._drop_input_cols
|
624
|
-
)
|
692
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
625
693
|
|
626
694
|
transform_handlers = ModelTransformerBuilder.build(
|
627
695
|
dataset=dataset,
|
@@ -633,7 +701,7 @@ class LassoLarsIC(BaseTransformer):
|
|
633
701
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
634
702
|
inference_method=inference_method,
|
635
703
|
input_cols=self.input_cols,
|
636
|
-
expected_output_cols=
|
704
|
+
expected_output_cols=expected_output_cols,
|
637
705
|
**transform_kwargs
|
638
706
|
)
|
639
707
|
return output_df
|
@@ -663,29 +731,30 @@ class LassoLarsIC(BaseTransformer):
|
|
663
731
|
Output dataset with log probability of the sample for each class in the model.
|
664
732
|
"""
|
665
733
|
super()._check_dataset_type(dataset)
|
666
|
-
inference_method="predict_log_proba"
|
734
|
+
inference_method = "predict_log_proba"
|
735
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
667
736
|
|
668
737
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
669
738
|
# are specific to the type of dataset used.
|
670
739
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
671
740
|
|
672
741
|
if isinstance(dataset, DataFrame):
|
673
|
-
self.
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
742
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
743
|
+
self._deps = self._get_dependencies()
|
744
|
+
assert isinstance(
|
745
|
+
dataset._session, Session
|
746
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
678
747
|
transform_kwargs = dict(
|
679
748
|
session=dataset._session,
|
680
749
|
dependencies=self._deps,
|
681
|
-
drop_input_cols
|
750
|
+
drop_input_cols=self._drop_input_cols,
|
682
751
|
expected_output_cols_type="float",
|
683
752
|
)
|
753
|
+
expected_output_cols = self._align_expected_output_names(
|
754
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
755
|
+
)
|
684
756
|
elif isinstance(dataset, pd.DataFrame):
|
685
|
-
transform_kwargs = dict(
|
686
|
-
snowpark_input_cols = self._snowpark_cols,
|
687
|
-
drop_input_cols = self._drop_input_cols
|
688
|
-
)
|
757
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
689
758
|
|
690
759
|
transform_handlers = ModelTransformerBuilder.build(
|
691
760
|
dataset=dataset,
|
@@ -698,7 +767,7 @@ class LassoLarsIC(BaseTransformer):
|
|
698
767
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
699
768
|
inference_method=inference_method,
|
700
769
|
input_cols=self.input_cols,
|
701
|
-
expected_output_cols=
|
770
|
+
expected_output_cols=expected_output_cols,
|
702
771
|
**transform_kwargs
|
703
772
|
)
|
704
773
|
return output_df
|
@@ -724,30 +793,32 @@ class LassoLarsIC(BaseTransformer):
|
|
724
793
|
Output dataset with results of the decision function for the samples in input dataset.
|
725
794
|
"""
|
726
795
|
super()._check_dataset_type(dataset)
|
727
|
-
inference_method="decision_function"
|
796
|
+
inference_method = "decision_function"
|
728
797
|
|
729
798
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
730
799
|
# are specific to the type of dataset used.
|
731
800
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
732
801
|
|
802
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
803
|
+
|
733
804
|
if isinstance(dataset, DataFrame):
|
734
|
-
self.
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
805
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
806
|
+
self._deps = self._get_dependencies()
|
807
|
+
assert isinstance(
|
808
|
+
dataset._session, Session
|
809
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
739
810
|
transform_kwargs = dict(
|
740
811
|
session=dataset._session,
|
741
812
|
dependencies=self._deps,
|
742
|
-
drop_input_cols
|
813
|
+
drop_input_cols=self._drop_input_cols,
|
743
814
|
expected_output_cols_type="float",
|
744
815
|
)
|
816
|
+
expected_output_cols = self._align_expected_output_names(
|
817
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
818
|
+
)
|
745
819
|
|
746
820
|
elif isinstance(dataset, pd.DataFrame):
|
747
|
-
transform_kwargs = dict(
|
748
|
-
snowpark_input_cols = self._snowpark_cols,
|
749
|
-
drop_input_cols = self._drop_input_cols
|
750
|
-
)
|
821
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
751
822
|
|
752
823
|
transform_handlers = ModelTransformerBuilder.build(
|
753
824
|
dataset=dataset,
|
@@ -760,7 +831,7 @@ class LassoLarsIC(BaseTransformer):
|
|
760
831
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
761
832
|
inference_method=inference_method,
|
762
833
|
input_cols=self.input_cols,
|
763
|
-
expected_output_cols=
|
834
|
+
expected_output_cols=expected_output_cols,
|
764
835
|
**transform_kwargs
|
765
836
|
)
|
766
837
|
return output_df
|
@@ -789,17 +860,17 @@ class LassoLarsIC(BaseTransformer):
|
|
789
860
|
Output dataset with probability of the sample for each class in the model.
|
790
861
|
"""
|
791
862
|
super()._check_dataset_type(dataset)
|
792
|
-
inference_method="score_samples"
|
863
|
+
inference_method = "score_samples"
|
793
864
|
|
794
865
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
795
866
|
# are specific to the type of dataset used.
|
796
867
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
797
868
|
|
869
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
870
|
+
|
798
871
|
if isinstance(dataset, DataFrame):
|
799
|
-
self.
|
800
|
-
|
801
|
-
inference_method=inference_method,
|
802
|
-
)
|
872
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
873
|
+
self._deps = self._get_dependencies()
|
803
874
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
804
875
|
transform_kwargs = dict(
|
805
876
|
session=dataset._session,
|
@@ -807,6 +878,9 @@ class LassoLarsIC(BaseTransformer):
|
|
807
878
|
drop_input_cols = self._drop_input_cols,
|
808
879
|
expected_output_cols_type="float",
|
809
880
|
)
|
881
|
+
expected_output_cols = self._align_expected_output_names(
|
882
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
883
|
+
)
|
810
884
|
|
811
885
|
elif isinstance(dataset, pd.DataFrame):
|
812
886
|
transform_kwargs = dict(
|
@@ -825,7 +899,7 @@ class LassoLarsIC(BaseTransformer):
|
|
825
899
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
826
900
|
inference_method=inference_method,
|
827
901
|
input_cols=self.input_cols,
|
828
|
-
expected_output_cols=
|
902
|
+
expected_output_cols=expected_output_cols,
|
829
903
|
**transform_kwargs
|
830
904
|
)
|
831
905
|
return output_df
|
@@ -860,17 +934,15 @@ class LassoLarsIC(BaseTransformer):
|
|
860
934
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
861
935
|
|
862
936
|
if isinstance(dataset, DataFrame):
|
863
|
-
self.
|
864
|
-
|
865
|
-
inference_method="score",
|
866
|
-
)
|
937
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
938
|
+
self._deps = self._get_dependencies()
|
867
939
|
selected_cols = self._get_active_columns()
|
868
940
|
if len(selected_cols) > 0:
|
869
941
|
dataset = dataset.select(selected_cols)
|
870
942
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
871
943
|
transform_kwargs = dict(
|
872
944
|
session=dataset._session,
|
873
|
-
dependencies=
|
945
|
+
dependencies=self._deps,
|
874
946
|
score_sproc_imports=['sklearn'],
|
875
947
|
)
|
876
948
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -935,11 +1007,8 @@ class LassoLarsIC(BaseTransformer):
|
|
935
1007
|
|
936
1008
|
if isinstance(dataset, DataFrame):
|
937
1009
|
|
938
|
-
self.
|
939
|
-
|
940
|
-
inference_method=inference_method,
|
941
|
-
|
942
|
-
)
|
1010
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1011
|
+
self._deps = self._get_dependencies()
|
943
1012
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
944
1013
|
transform_kwargs = dict(
|
945
1014
|
session = dataset._session,
|
@@ -972,50 +1041,84 @@ class LassoLarsIC(BaseTransformer):
|
|
972
1041
|
)
|
973
1042
|
return output_df
|
974
1043
|
|
1044
|
+
|
1045
|
+
|
1046
|
+
def to_sklearn(self) -> Any:
|
1047
|
+
"""Get sklearn.linear_model.LassoLarsIC object.
|
1048
|
+
"""
|
1049
|
+
if self._sklearn_object is None:
|
1050
|
+
self._sklearn_object = self._create_sklearn_object()
|
1051
|
+
return self._sklearn_object
|
1052
|
+
|
1053
|
+
def to_xgboost(self) -> Any:
|
1054
|
+
raise exceptions.SnowflakeMLException(
|
1055
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1056
|
+
original_exception=AttributeError(
|
1057
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1058
|
+
"to_xgboost()",
|
1059
|
+
"to_sklearn()"
|
1060
|
+
)
|
1061
|
+
),
|
1062
|
+
)
|
1063
|
+
|
1064
|
+
def to_lightgbm(self) -> Any:
|
1065
|
+
raise exceptions.SnowflakeMLException(
|
1066
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1067
|
+
original_exception=AttributeError(
|
1068
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1069
|
+
"to_lightgbm()",
|
1070
|
+
"to_sklearn()"
|
1071
|
+
)
|
1072
|
+
),
|
1073
|
+
)
|
1074
|
+
|
1075
|
+
def _get_dependencies(self) -> List[str]:
|
1076
|
+
return self._deps
|
1077
|
+
|
975
1078
|
|
976
|
-
def
|
1079
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
977
1080
|
self._model_signature_dict = dict()
|
978
1081
|
|
979
1082
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
980
1083
|
|
981
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1084
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
982
1085
|
outputs: List[BaseFeatureSpec] = []
|
983
1086
|
if hasattr(self, "predict"):
|
984
1087
|
# keep mypy happy
|
985
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1088
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
986
1089
|
# For classifier, the type of predict is the same as the type of label
|
987
|
-
if self._sklearn_object._estimator_type ==
|
988
|
-
|
1090
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1091
|
+
# label columns is the desired type for output
|
989
1092
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
990
1093
|
# rename the output columns
|
991
1094
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
992
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
993
|
-
|
994
|
-
|
1095
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1096
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1097
|
+
)
|
995
1098
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
996
1099
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
997
|
-
# Clusterer returns int64 cluster labels.
|
1100
|
+
# Clusterer returns int64 cluster labels.
|
998
1101
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
999
1102
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1000
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1103
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1104
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1105
|
+
)
|
1106
|
+
|
1004
1107
|
# For regressor, the type of predict is float64
|
1005
|
-
elif self._sklearn_object._estimator_type ==
|
1108
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1006
1109
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1007
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1110
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1111
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1112
|
+
)
|
1113
|
+
|
1011
1114
|
for prob_func in PROB_FUNCTIONS:
|
1012
1115
|
if hasattr(self, prob_func):
|
1013
1116
|
output_cols_prefix: str = f"{prob_func}_"
|
1014
1117
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1015
1118
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1016
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1017
|
-
|
1018
|
-
|
1119
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1120
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1121
|
+
)
|
1019
1122
|
|
1020
1123
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1021
1124
|
items = list(self._model_signature_dict.items())
|
@@ -1028,10 +1131,10 @@ class LassoLarsIC(BaseTransformer):
|
|
1028
1131
|
"""Returns model signature of current class.
|
1029
1132
|
|
1030
1133
|
Raises:
|
1031
|
-
|
1134
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1032
1135
|
|
1033
1136
|
Returns:
|
1034
|
-
Dict
|
1137
|
+
Dict with each method and its input output signature
|
1035
1138
|
"""
|
1036
1139
|
if self._model_signature_dict is None:
|
1037
1140
|
raise exceptions.SnowflakeMLException(
|
@@ -1039,35 +1142,3 @@ class LassoLarsIC(BaseTransformer):
|
|
1039
1142
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1040
1143
|
)
|
1041
1144
|
return self._model_signature_dict
|
1042
|
-
|
1043
|
-
def to_sklearn(self) -> Any:
|
1044
|
-
"""Get sklearn.linear_model.LassoLarsIC object.
|
1045
|
-
"""
|
1046
|
-
if self._sklearn_object is None:
|
1047
|
-
self._sklearn_object = self._create_sklearn_object()
|
1048
|
-
return self._sklearn_object
|
1049
|
-
|
1050
|
-
def to_xgboost(self) -> Any:
|
1051
|
-
raise exceptions.SnowflakeMLException(
|
1052
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1053
|
-
original_exception=AttributeError(
|
1054
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1055
|
-
"to_xgboost()",
|
1056
|
-
"to_sklearn()"
|
1057
|
-
)
|
1058
|
-
),
|
1059
|
-
)
|
1060
|
-
|
1061
|
-
def to_lightgbm(self) -> Any:
|
1062
|
-
raise exceptions.SnowflakeMLException(
|
1063
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1064
|
-
original_exception=AttributeError(
|
1065
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1066
|
-
"to_lightgbm()",
|
1067
|
-
"to_sklearn()"
|
1068
|
-
)
|
1069
|
-
),
|
1070
|
-
)
|
1071
|
-
|
1072
|
-
def _get_dependencies(self) -> List[str]:
|
1073
|
-
return self._deps
|