snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.covariance".replace("skl
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class GraphicalLassoCV(BaseTransformer):
|
71
64
|
r"""Sparse inverse covariance w/ cross-validated choice of the l1 penalty
|
72
65
|
For more details on this class, see [sklearn.covariance.GraphicalLassoCV]
|
@@ -276,12 +269,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
276
269
|
)
|
277
270
|
return selected_cols
|
278
271
|
|
279
|
-
|
280
|
-
project=_PROJECT,
|
281
|
-
subproject=_SUBPROJECT,
|
282
|
-
custom_tags=dict([("autogen", True)]),
|
283
|
-
)
|
284
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GraphicalLassoCV":
|
272
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GraphicalLassoCV":
|
285
273
|
"""Fit the GraphicalLasso covariance model to X
|
286
274
|
For more details on this function, see [sklearn.covariance.GraphicalLassoCV.fit]
|
287
275
|
(https://scikit-learn.org/stable/modules/generated/sklearn.covariance.GraphicalLassoCV.html#sklearn.covariance.GraphicalLassoCV.fit)
|
@@ -308,12 +296,14 @@ class GraphicalLassoCV(BaseTransformer):
|
|
308
296
|
|
309
297
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
310
298
|
|
311
|
-
|
299
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
312
300
|
if SNOWML_SPROC_ENV in os.environ:
|
313
301
|
statement_params = telemetry.get_function_usage_statement_params(
|
314
302
|
project=_PROJECT,
|
315
303
|
subproject=_SUBPROJECT,
|
316
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
304
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
305
|
+
inspect.currentframe(), GraphicalLassoCV.__class__.__name__
|
306
|
+
),
|
317
307
|
api_calls=[Session.call],
|
318
308
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
319
309
|
)
|
@@ -334,27 +324,24 @@ class GraphicalLassoCV(BaseTransformer):
|
|
334
324
|
)
|
335
325
|
self._sklearn_object = model_trainer.train()
|
336
326
|
self._is_fitted = True
|
337
|
-
self.
|
327
|
+
self._generate_model_signatures(dataset)
|
338
328
|
return self
|
339
329
|
|
340
330
|
def _batch_inference_validate_snowpark(
|
341
331
|
self,
|
342
332
|
dataset: DataFrame,
|
343
333
|
inference_method: str,
|
344
|
-
) ->
|
345
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
346
|
-
return the available package that exists in the snowflake anaconda channel
|
334
|
+
) -> None:
|
335
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
347
336
|
|
348
337
|
Args:
|
349
338
|
dataset: snowpark dataframe
|
350
339
|
inference_method: the inference method such as predict, score...
|
351
|
-
|
340
|
+
|
352
341
|
Raises:
|
353
342
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
354
343
|
SnowflakeMLException: If the session is None, raise error
|
355
344
|
|
356
|
-
Returns:
|
357
|
-
A list of available package that exists in the snowflake anaconda channel
|
358
345
|
"""
|
359
346
|
if not self._is_fitted:
|
360
347
|
raise exceptions.SnowflakeMLException(
|
@@ -372,9 +359,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
372
359
|
"Session must not specified for snowpark dataset."
|
373
360
|
),
|
374
361
|
)
|
375
|
-
|
376
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
377
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
362
|
+
|
378
363
|
|
379
364
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
380
365
|
@telemetry.send_api_usage_telemetry(
|
@@ -408,7 +393,9 @@ class GraphicalLassoCV(BaseTransformer):
|
|
408
393
|
# when it is classifier, infer the datatype from label columns
|
409
394
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
410
395
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
411
|
-
label_cols_signatures = [
|
396
|
+
label_cols_signatures = [
|
397
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
398
|
+
]
|
412
399
|
if len(label_cols_signatures) == 0:
|
413
400
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
414
401
|
raise exceptions.SnowflakeMLException(
|
@@ -416,25 +403,23 @@ class GraphicalLassoCV(BaseTransformer):
|
|
416
403
|
original_exception=ValueError(error_str),
|
417
404
|
)
|
418
405
|
|
419
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
420
|
-
label_cols_signatures[0].as_snowpark_type()
|
421
|
-
)
|
406
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
422
407
|
|
423
|
-
self.
|
424
|
-
|
408
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
409
|
+
self._deps = self._get_dependencies()
|
410
|
+
assert isinstance(
|
411
|
+
dataset._session, Session
|
412
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
425
413
|
|
426
414
|
transform_kwargs = dict(
|
427
|
-
session
|
428
|
-
dependencies
|
429
|
-
drop_input_cols
|
430
|
-
expected_output_cols_type
|
415
|
+
session=dataset._session,
|
416
|
+
dependencies=self._deps,
|
417
|
+
drop_input_cols=self._drop_input_cols,
|
418
|
+
expected_output_cols_type=expected_type_inferred,
|
431
419
|
)
|
432
420
|
|
433
421
|
elif isinstance(dataset, pd.DataFrame):
|
434
|
-
transform_kwargs = dict(
|
435
|
-
snowpark_input_cols = self._snowpark_cols,
|
436
|
-
drop_input_cols = self._drop_input_cols
|
437
|
-
)
|
422
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
438
423
|
|
439
424
|
transform_handlers = ModelTransformerBuilder.build(
|
440
425
|
dataset=dataset,
|
@@ -474,7 +459,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
474
459
|
Transformed dataset.
|
475
460
|
"""
|
476
461
|
super()._check_dataset_type(dataset)
|
477
|
-
inference_method="transform"
|
462
|
+
inference_method = "transform"
|
478
463
|
|
479
464
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
480
465
|
# are specific to the type of dataset used.
|
@@ -504,24 +489,19 @@ class GraphicalLassoCV(BaseTransformer):
|
|
504
489
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
505
490
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
506
491
|
|
507
|
-
self.
|
508
|
-
|
509
|
-
inference_method=inference_method,
|
510
|
-
)
|
492
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
493
|
+
self._deps = self._get_dependencies()
|
511
494
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
512
495
|
|
513
496
|
transform_kwargs = dict(
|
514
|
-
session
|
515
|
-
dependencies
|
516
|
-
drop_input_cols
|
517
|
-
expected_output_cols_type
|
497
|
+
session=dataset._session,
|
498
|
+
dependencies=self._deps,
|
499
|
+
drop_input_cols=self._drop_input_cols,
|
500
|
+
expected_output_cols_type=expected_dtype,
|
518
501
|
)
|
519
502
|
|
520
503
|
elif isinstance(dataset, pd.DataFrame):
|
521
|
-
transform_kwargs = dict(
|
522
|
-
snowpark_input_cols = self._snowpark_cols,
|
523
|
-
drop_input_cols = self._drop_input_cols
|
524
|
-
)
|
504
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
525
505
|
|
526
506
|
transform_handlers = ModelTransformerBuilder.build(
|
527
507
|
dataset=dataset,
|
@@ -540,7 +520,11 @@ class GraphicalLassoCV(BaseTransformer):
|
|
540
520
|
return output_df
|
541
521
|
|
542
522
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
543
|
-
def fit_predict(
|
523
|
+
def fit_predict(
|
524
|
+
self,
|
525
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
526
|
+
output_cols_prefix: str = "fit_predict_",
|
527
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
544
528
|
""" Method not supported for this class.
|
545
529
|
|
546
530
|
|
@@ -565,22 +549,104 @@ class GraphicalLassoCV(BaseTransformer):
|
|
565
549
|
)
|
566
550
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
567
551
|
drop_input_cols=self._drop_input_cols,
|
568
|
-
expected_output_cols_list=
|
552
|
+
expected_output_cols_list=(
|
553
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
554
|
+
),
|
569
555
|
)
|
570
556
|
self._sklearn_object = fitted_estimator
|
571
557
|
self._is_fitted = True
|
572
558
|
return output_result
|
573
559
|
|
560
|
+
|
561
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
562
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
563
|
+
""" Method not supported for this class.
|
564
|
+
|
574
565
|
|
575
|
-
|
576
|
-
|
577
|
-
|
566
|
+
Raises:
|
567
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
568
|
+
|
569
|
+
Args:
|
570
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
571
|
+
Snowpark or Pandas DataFrame.
|
572
|
+
output_cols_prefix: Prefix for the response columns
|
578
573
|
Returns:
|
579
574
|
Transformed dataset.
|
580
575
|
"""
|
581
|
-
self.
|
582
|
-
|
583
|
-
|
576
|
+
self._infer_input_output_cols(dataset)
|
577
|
+
super()._check_dataset_type(dataset)
|
578
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
579
|
+
estimator=self._sklearn_object,
|
580
|
+
dataset=dataset,
|
581
|
+
input_cols=self.input_cols,
|
582
|
+
label_cols=self.label_cols,
|
583
|
+
sample_weight_col=self.sample_weight_col,
|
584
|
+
autogenerated=self._autogenerated,
|
585
|
+
subproject=_SUBPROJECT,
|
586
|
+
)
|
587
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
588
|
+
drop_input_cols=self._drop_input_cols,
|
589
|
+
expected_output_cols_list=self.output_cols,
|
590
|
+
)
|
591
|
+
self._sklearn_object = fitted_estimator
|
592
|
+
self._is_fitted = True
|
593
|
+
return output_result
|
594
|
+
|
595
|
+
|
596
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
597
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
598
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
599
|
+
"""
|
600
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
601
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
602
|
+
if output_cols:
|
603
|
+
output_cols = [
|
604
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
605
|
+
for c in output_cols
|
606
|
+
]
|
607
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
608
|
+
output_cols = [output_cols_prefix]
|
609
|
+
elif self._sklearn_object is not None:
|
610
|
+
classes = self._sklearn_object.classes_
|
611
|
+
if isinstance(classes, numpy.ndarray):
|
612
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
613
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
614
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
615
|
+
output_cols = []
|
616
|
+
for i, cl in enumerate(classes):
|
617
|
+
# For binary classification, there is only one output column for each class
|
618
|
+
# ndarray as the two classes are complementary.
|
619
|
+
if len(cl) == 2:
|
620
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
621
|
+
else:
|
622
|
+
output_cols.extend([
|
623
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
624
|
+
])
|
625
|
+
else:
|
626
|
+
output_cols = []
|
627
|
+
|
628
|
+
# Make sure column names are valid snowflake identifiers.
|
629
|
+
assert output_cols is not None # Make MyPy happy
|
630
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
631
|
+
|
632
|
+
return rv
|
633
|
+
|
634
|
+
def _align_expected_output_names(
|
635
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
636
|
+
) -> List[str]:
|
637
|
+
# in case the inferred output column names dimension is different
|
638
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
639
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
640
|
+
output_df_columns = list(output_df_pd.columns)
|
641
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
642
|
+
if self.sample_weight_col:
|
643
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
644
|
+
# if the dimension of inferred output column names is correct; use it
|
645
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
646
|
+
return expected_output_cols_list
|
647
|
+
# otherwise, use the sklearn estimator's output
|
648
|
+
else:
|
649
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
584
650
|
|
585
651
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
586
652
|
@telemetry.send_api_usage_telemetry(
|
@@ -612,24 +678,26 @@ class GraphicalLassoCV(BaseTransformer):
|
|
612
678
|
# are specific to the type of dataset used.
|
613
679
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
614
680
|
|
681
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
682
|
+
|
615
683
|
if isinstance(dataset, DataFrame):
|
616
|
-
self.
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
684
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
685
|
+
self._deps = self._get_dependencies()
|
686
|
+
assert isinstance(
|
687
|
+
dataset._session, Session
|
688
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
621
689
|
transform_kwargs = dict(
|
622
690
|
session=dataset._session,
|
623
691
|
dependencies=self._deps,
|
624
|
-
drop_input_cols
|
692
|
+
drop_input_cols=self._drop_input_cols,
|
625
693
|
expected_output_cols_type="float",
|
626
694
|
)
|
695
|
+
expected_output_cols = self._align_expected_output_names(
|
696
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
697
|
+
)
|
627
698
|
|
628
699
|
elif isinstance(dataset, pd.DataFrame):
|
629
|
-
transform_kwargs = dict(
|
630
|
-
snowpark_input_cols = self._snowpark_cols,
|
631
|
-
drop_input_cols = self._drop_input_cols
|
632
|
-
)
|
700
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
633
701
|
|
634
702
|
transform_handlers = ModelTransformerBuilder.build(
|
635
703
|
dataset=dataset,
|
@@ -641,7 +709,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
641
709
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
642
710
|
inference_method=inference_method,
|
643
711
|
input_cols=self.input_cols,
|
644
|
-
expected_output_cols=
|
712
|
+
expected_output_cols=expected_output_cols,
|
645
713
|
**transform_kwargs
|
646
714
|
)
|
647
715
|
return output_df
|
@@ -671,29 +739,30 @@ class GraphicalLassoCV(BaseTransformer):
|
|
671
739
|
Output dataset with log probability of the sample for each class in the model.
|
672
740
|
"""
|
673
741
|
super()._check_dataset_type(dataset)
|
674
|
-
inference_method="predict_log_proba"
|
742
|
+
inference_method = "predict_log_proba"
|
743
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
675
744
|
|
676
745
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
677
746
|
# are specific to the type of dataset used.
|
678
747
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
679
748
|
|
680
749
|
if isinstance(dataset, DataFrame):
|
681
|
-
self.
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
750
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
751
|
+
self._deps = self._get_dependencies()
|
752
|
+
assert isinstance(
|
753
|
+
dataset._session, Session
|
754
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
686
755
|
transform_kwargs = dict(
|
687
756
|
session=dataset._session,
|
688
757
|
dependencies=self._deps,
|
689
|
-
drop_input_cols
|
758
|
+
drop_input_cols=self._drop_input_cols,
|
690
759
|
expected_output_cols_type="float",
|
691
760
|
)
|
761
|
+
expected_output_cols = self._align_expected_output_names(
|
762
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
763
|
+
)
|
692
764
|
elif isinstance(dataset, pd.DataFrame):
|
693
|
-
transform_kwargs = dict(
|
694
|
-
snowpark_input_cols = self._snowpark_cols,
|
695
|
-
drop_input_cols = self._drop_input_cols
|
696
|
-
)
|
765
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
697
766
|
|
698
767
|
transform_handlers = ModelTransformerBuilder.build(
|
699
768
|
dataset=dataset,
|
@@ -706,7 +775,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
706
775
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
707
776
|
inference_method=inference_method,
|
708
777
|
input_cols=self.input_cols,
|
709
|
-
expected_output_cols=
|
778
|
+
expected_output_cols=expected_output_cols,
|
710
779
|
**transform_kwargs
|
711
780
|
)
|
712
781
|
return output_df
|
@@ -732,30 +801,32 @@ class GraphicalLassoCV(BaseTransformer):
|
|
732
801
|
Output dataset with results of the decision function for the samples in input dataset.
|
733
802
|
"""
|
734
803
|
super()._check_dataset_type(dataset)
|
735
|
-
inference_method="decision_function"
|
804
|
+
inference_method = "decision_function"
|
736
805
|
|
737
806
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
738
807
|
# are specific to the type of dataset used.
|
739
808
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
740
809
|
|
810
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
811
|
+
|
741
812
|
if isinstance(dataset, DataFrame):
|
742
|
-
self.
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
813
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
814
|
+
self._deps = self._get_dependencies()
|
815
|
+
assert isinstance(
|
816
|
+
dataset._session, Session
|
817
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
747
818
|
transform_kwargs = dict(
|
748
819
|
session=dataset._session,
|
749
820
|
dependencies=self._deps,
|
750
|
-
drop_input_cols
|
821
|
+
drop_input_cols=self._drop_input_cols,
|
751
822
|
expected_output_cols_type="float",
|
752
823
|
)
|
824
|
+
expected_output_cols = self._align_expected_output_names(
|
825
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
826
|
+
)
|
753
827
|
|
754
828
|
elif isinstance(dataset, pd.DataFrame):
|
755
|
-
transform_kwargs = dict(
|
756
|
-
snowpark_input_cols = self._snowpark_cols,
|
757
|
-
drop_input_cols = self._drop_input_cols
|
758
|
-
)
|
829
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
759
830
|
|
760
831
|
transform_handlers = ModelTransformerBuilder.build(
|
761
832
|
dataset=dataset,
|
@@ -768,7 +839,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
768
839
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
769
840
|
inference_method=inference_method,
|
770
841
|
input_cols=self.input_cols,
|
771
|
-
expected_output_cols=
|
842
|
+
expected_output_cols=expected_output_cols,
|
772
843
|
**transform_kwargs
|
773
844
|
)
|
774
845
|
return output_df
|
@@ -797,17 +868,17 @@ class GraphicalLassoCV(BaseTransformer):
|
|
797
868
|
Output dataset with probability of the sample for each class in the model.
|
798
869
|
"""
|
799
870
|
super()._check_dataset_type(dataset)
|
800
|
-
inference_method="score_samples"
|
871
|
+
inference_method = "score_samples"
|
801
872
|
|
802
873
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
803
874
|
# are specific to the type of dataset used.
|
804
875
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
805
876
|
|
877
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
878
|
+
|
806
879
|
if isinstance(dataset, DataFrame):
|
807
|
-
self.
|
808
|
-
|
809
|
-
inference_method=inference_method,
|
810
|
-
)
|
880
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
881
|
+
self._deps = self._get_dependencies()
|
811
882
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
812
883
|
transform_kwargs = dict(
|
813
884
|
session=dataset._session,
|
@@ -815,6 +886,9 @@ class GraphicalLassoCV(BaseTransformer):
|
|
815
886
|
drop_input_cols = self._drop_input_cols,
|
816
887
|
expected_output_cols_type="float",
|
817
888
|
)
|
889
|
+
expected_output_cols = self._align_expected_output_names(
|
890
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
891
|
+
)
|
818
892
|
|
819
893
|
elif isinstance(dataset, pd.DataFrame):
|
820
894
|
transform_kwargs = dict(
|
@@ -833,7 +907,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
833
907
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
834
908
|
inference_method=inference_method,
|
835
909
|
input_cols=self.input_cols,
|
836
|
-
expected_output_cols=
|
910
|
+
expected_output_cols=expected_output_cols,
|
837
911
|
**transform_kwargs
|
838
912
|
)
|
839
913
|
return output_df
|
@@ -868,17 +942,15 @@ class GraphicalLassoCV(BaseTransformer):
|
|
868
942
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
869
943
|
|
870
944
|
if isinstance(dataset, DataFrame):
|
871
|
-
self.
|
872
|
-
|
873
|
-
inference_method="score",
|
874
|
-
)
|
945
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
946
|
+
self._deps = self._get_dependencies()
|
875
947
|
selected_cols = self._get_active_columns()
|
876
948
|
if len(selected_cols) > 0:
|
877
949
|
dataset = dataset.select(selected_cols)
|
878
950
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
879
951
|
transform_kwargs = dict(
|
880
952
|
session=dataset._session,
|
881
|
-
dependencies=
|
953
|
+
dependencies=self._deps,
|
882
954
|
score_sproc_imports=['sklearn'],
|
883
955
|
)
|
884
956
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -943,11 +1015,8 @@ class GraphicalLassoCV(BaseTransformer):
|
|
943
1015
|
|
944
1016
|
if isinstance(dataset, DataFrame):
|
945
1017
|
|
946
|
-
self.
|
947
|
-
|
948
|
-
inference_method=inference_method,
|
949
|
-
|
950
|
-
)
|
1018
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1019
|
+
self._deps = self._get_dependencies()
|
951
1020
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
952
1021
|
transform_kwargs = dict(
|
953
1022
|
session = dataset._session,
|
@@ -980,50 +1049,84 @@ class GraphicalLassoCV(BaseTransformer):
|
|
980
1049
|
)
|
981
1050
|
return output_df
|
982
1051
|
|
1052
|
+
|
1053
|
+
|
1054
|
+
def to_sklearn(self) -> Any:
|
1055
|
+
"""Get sklearn.covariance.GraphicalLassoCV object.
|
1056
|
+
"""
|
1057
|
+
if self._sklearn_object is None:
|
1058
|
+
self._sklearn_object = self._create_sklearn_object()
|
1059
|
+
return self._sklearn_object
|
1060
|
+
|
1061
|
+
def to_xgboost(self) -> Any:
|
1062
|
+
raise exceptions.SnowflakeMLException(
|
1063
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1064
|
+
original_exception=AttributeError(
|
1065
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1066
|
+
"to_xgboost()",
|
1067
|
+
"to_sklearn()"
|
1068
|
+
)
|
1069
|
+
),
|
1070
|
+
)
|
1071
|
+
|
1072
|
+
def to_lightgbm(self) -> Any:
|
1073
|
+
raise exceptions.SnowflakeMLException(
|
1074
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1075
|
+
original_exception=AttributeError(
|
1076
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1077
|
+
"to_lightgbm()",
|
1078
|
+
"to_sklearn()"
|
1079
|
+
)
|
1080
|
+
),
|
1081
|
+
)
|
1082
|
+
|
1083
|
+
def _get_dependencies(self) -> List[str]:
|
1084
|
+
return self._deps
|
1085
|
+
|
983
1086
|
|
984
|
-
def
|
1087
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
985
1088
|
self._model_signature_dict = dict()
|
986
1089
|
|
987
1090
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
988
1091
|
|
989
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1092
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
990
1093
|
outputs: List[BaseFeatureSpec] = []
|
991
1094
|
if hasattr(self, "predict"):
|
992
1095
|
# keep mypy happy
|
993
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1096
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
994
1097
|
# For classifier, the type of predict is the same as the type of label
|
995
|
-
if self._sklearn_object._estimator_type ==
|
996
|
-
|
1098
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1099
|
+
# label columns is the desired type for output
|
997
1100
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
998
1101
|
# rename the output columns
|
999
1102
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1000
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1001
|
-
|
1002
|
-
|
1103
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1104
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1105
|
+
)
|
1003
1106
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1004
1107
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1005
|
-
# Clusterer returns int64 cluster labels.
|
1108
|
+
# Clusterer returns int64 cluster labels.
|
1006
1109
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1007
1110
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1008
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1111
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1112
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1113
|
+
)
|
1114
|
+
|
1012
1115
|
# For regressor, the type of predict is float64
|
1013
|
-
elif self._sklearn_object._estimator_type ==
|
1116
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1014
1117
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1015
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1118
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1119
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1120
|
+
)
|
1121
|
+
|
1019
1122
|
for prob_func in PROB_FUNCTIONS:
|
1020
1123
|
if hasattr(self, prob_func):
|
1021
1124
|
output_cols_prefix: str = f"{prob_func}_"
|
1022
1125
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1023
1126
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1024
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1025
|
-
|
1026
|
-
|
1127
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1128
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1129
|
+
)
|
1027
1130
|
|
1028
1131
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1029
1132
|
items = list(self._model_signature_dict.items())
|
@@ -1036,10 +1139,10 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1036
1139
|
"""Returns model signature of current class.
|
1037
1140
|
|
1038
1141
|
Raises:
|
1039
|
-
|
1142
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1040
1143
|
|
1041
1144
|
Returns:
|
1042
|
-
Dict
|
1145
|
+
Dict with each method and its input output signature
|
1043
1146
|
"""
|
1044
1147
|
if self._model_signature_dict is None:
|
1045
1148
|
raise exceptions.SnowflakeMLException(
|
@@ -1047,35 +1150,3 @@ class GraphicalLassoCV(BaseTransformer):
|
|
1047
1150
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1048
1151
|
)
|
1049
1152
|
return self._model_signature_dict
|
1050
|
-
|
1051
|
-
def to_sklearn(self) -> Any:
|
1052
|
-
"""Get sklearn.covariance.GraphicalLassoCV object.
|
1053
|
-
"""
|
1054
|
-
if self._sklearn_object is None:
|
1055
|
-
self._sklearn_object = self._create_sklearn_object()
|
1056
|
-
return self._sklearn_object
|
1057
|
-
|
1058
|
-
def to_xgboost(self) -> Any:
|
1059
|
-
raise exceptions.SnowflakeMLException(
|
1060
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1061
|
-
original_exception=AttributeError(
|
1062
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1063
|
-
"to_xgboost()",
|
1064
|
-
"to_sklearn()"
|
1065
|
-
)
|
1066
|
-
),
|
1067
|
-
)
|
1068
|
-
|
1069
|
-
def to_lightgbm(self) -> Any:
|
1070
|
-
raise exceptions.SnowflakeMLException(
|
1071
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1072
|
-
original_exception=AttributeError(
|
1073
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1074
|
-
"to_lightgbm()",
|
1075
|
-
"to_sklearn()"
|
1076
|
-
)
|
1077
|
-
),
|
1078
|
-
)
|
1079
|
-
|
1080
|
-
def _get_dependencies(self) -> List[str]:
|
1081
|
-
return self._deps
|