snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.covariance".replace("skl
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class MinCovDet(BaseTransformer):
|
71
64
|
r"""Minimum Covariance Determinant (MCD): robust estimator of covariance
|
72
65
|
For more details on this class, see [sklearn.covariance.MinCovDet]
|
@@ -221,12 +214,7 @@ class MinCovDet(BaseTransformer):
|
|
221
214
|
)
|
222
215
|
return selected_cols
|
223
216
|
|
224
|
-
|
225
|
-
project=_PROJECT,
|
226
|
-
subproject=_SUBPROJECT,
|
227
|
-
custom_tags=dict([("autogen", True)]),
|
228
|
-
)
|
229
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MinCovDet":
|
217
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "MinCovDet":
|
230
218
|
"""Fit a Minimum Covariance Determinant with the FastMCD algorithm
|
231
219
|
For more details on this function, see [sklearn.covariance.MinCovDet.fit]
|
232
220
|
(https://scikit-learn.org/stable/modules/generated/sklearn.covariance.MinCovDet.html#sklearn.covariance.MinCovDet.fit)
|
@@ -253,12 +241,14 @@ class MinCovDet(BaseTransformer):
|
|
253
241
|
|
254
242
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
255
243
|
|
256
|
-
|
244
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
257
245
|
if SNOWML_SPROC_ENV in os.environ:
|
258
246
|
statement_params = telemetry.get_function_usage_statement_params(
|
259
247
|
project=_PROJECT,
|
260
248
|
subproject=_SUBPROJECT,
|
261
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
249
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
250
|
+
inspect.currentframe(), MinCovDet.__class__.__name__
|
251
|
+
),
|
262
252
|
api_calls=[Session.call],
|
263
253
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
264
254
|
)
|
@@ -279,27 +269,24 @@ class MinCovDet(BaseTransformer):
|
|
279
269
|
)
|
280
270
|
self._sklearn_object = model_trainer.train()
|
281
271
|
self._is_fitted = True
|
282
|
-
self.
|
272
|
+
self._generate_model_signatures(dataset)
|
283
273
|
return self
|
284
274
|
|
285
275
|
def _batch_inference_validate_snowpark(
|
286
276
|
self,
|
287
277
|
dataset: DataFrame,
|
288
278
|
inference_method: str,
|
289
|
-
) ->
|
290
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
291
|
-
return the available package that exists in the snowflake anaconda channel
|
279
|
+
) -> None:
|
280
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
292
281
|
|
293
282
|
Args:
|
294
283
|
dataset: snowpark dataframe
|
295
284
|
inference_method: the inference method such as predict, score...
|
296
|
-
|
285
|
+
|
297
286
|
Raises:
|
298
287
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
299
288
|
SnowflakeMLException: If the session is None, raise error
|
300
289
|
|
301
|
-
Returns:
|
302
|
-
A list of available package that exists in the snowflake anaconda channel
|
303
290
|
"""
|
304
291
|
if not self._is_fitted:
|
305
292
|
raise exceptions.SnowflakeMLException(
|
@@ -317,9 +304,7 @@ class MinCovDet(BaseTransformer):
|
|
317
304
|
"Session must not specified for snowpark dataset."
|
318
305
|
),
|
319
306
|
)
|
320
|
-
|
321
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
322
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
307
|
+
|
323
308
|
|
324
309
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
325
310
|
@telemetry.send_api_usage_telemetry(
|
@@ -353,7 +338,9 @@ class MinCovDet(BaseTransformer):
|
|
353
338
|
# when it is classifier, infer the datatype from label columns
|
354
339
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
355
340
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
356
|
-
label_cols_signatures = [
|
341
|
+
label_cols_signatures = [
|
342
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
343
|
+
]
|
357
344
|
if len(label_cols_signatures) == 0:
|
358
345
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
359
346
|
raise exceptions.SnowflakeMLException(
|
@@ -361,25 +348,23 @@ class MinCovDet(BaseTransformer):
|
|
361
348
|
original_exception=ValueError(error_str),
|
362
349
|
)
|
363
350
|
|
364
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
365
|
-
label_cols_signatures[0].as_snowpark_type()
|
366
|
-
)
|
351
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
367
352
|
|
368
|
-
self.
|
369
|
-
|
353
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
354
|
+
self._deps = self._get_dependencies()
|
355
|
+
assert isinstance(
|
356
|
+
dataset._session, Session
|
357
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
370
358
|
|
371
359
|
transform_kwargs = dict(
|
372
|
-
session
|
373
|
-
dependencies
|
374
|
-
drop_input_cols
|
375
|
-
expected_output_cols_type
|
360
|
+
session=dataset._session,
|
361
|
+
dependencies=self._deps,
|
362
|
+
drop_input_cols=self._drop_input_cols,
|
363
|
+
expected_output_cols_type=expected_type_inferred,
|
376
364
|
)
|
377
365
|
|
378
366
|
elif isinstance(dataset, pd.DataFrame):
|
379
|
-
transform_kwargs = dict(
|
380
|
-
snowpark_input_cols = self._snowpark_cols,
|
381
|
-
drop_input_cols = self._drop_input_cols
|
382
|
-
)
|
367
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
383
368
|
|
384
369
|
transform_handlers = ModelTransformerBuilder.build(
|
385
370
|
dataset=dataset,
|
@@ -419,7 +404,7 @@ class MinCovDet(BaseTransformer):
|
|
419
404
|
Transformed dataset.
|
420
405
|
"""
|
421
406
|
super()._check_dataset_type(dataset)
|
422
|
-
inference_method="transform"
|
407
|
+
inference_method = "transform"
|
423
408
|
|
424
409
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
425
410
|
# are specific to the type of dataset used.
|
@@ -449,24 +434,19 @@ class MinCovDet(BaseTransformer):
|
|
449
434
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
450
435
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
451
436
|
|
452
|
-
self.
|
453
|
-
|
454
|
-
inference_method=inference_method,
|
455
|
-
)
|
437
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
438
|
+
self._deps = self._get_dependencies()
|
456
439
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
457
440
|
|
458
441
|
transform_kwargs = dict(
|
459
|
-
session
|
460
|
-
dependencies
|
461
|
-
drop_input_cols
|
462
|
-
expected_output_cols_type
|
442
|
+
session=dataset._session,
|
443
|
+
dependencies=self._deps,
|
444
|
+
drop_input_cols=self._drop_input_cols,
|
445
|
+
expected_output_cols_type=expected_dtype,
|
463
446
|
)
|
464
447
|
|
465
448
|
elif isinstance(dataset, pd.DataFrame):
|
466
|
-
transform_kwargs = dict(
|
467
|
-
snowpark_input_cols = self._snowpark_cols,
|
468
|
-
drop_input_cols = self._drop_input_cols
|
469
|
-
)
|
449
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
470
450
|
|
471
451
|
transform_handlers = ModelTransformerBuilder.build(
|
472
452
|
dataset=dataset,
|
@@ -485,7 +465,11 @@ class MinCovDet(BaseTransformer):
|
|
485
465
|
return output_df
|
486
466
|
|
487
467
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
488
|
-
def fit_predict(
|
468
|
+
def fit_predict(
|
469
|
+
self,
|
470
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
471
|
+
output_cols_prefix: str = "fit_predict_",
|
472
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
489
473
|
""" Method not supported for this class.
|
490
474
|
|
491
475
|
|
@@ -510,22 +494,104 @@ class MinCovDet(BaseTransformer):
|
|
510
494
|
)
|
511
495
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
512
496
|
drop_input_cols=self._drop_input_cols,
|
513
|
-
expected_output_cols_list=
|
497
|
+
expected_output_cols_list=(
|
498
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
499
|
+
),
|
514
500
|
)
|
515
501
|
self._sklearn_object = fitted_estimator
|
516
502
|
self._is_fitted = True
|
517
503
|
return output_result
|
518
504
|
|
505
|
+
|
506
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
507
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
508
|
+
""" Method not supported for this class.
|
509
|
+
|
519
510
|
|
520
|
-
|
521
|
-
|
522
|
-
|
511
|
+
Raises:
|
512
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
513
|
+
|
514
|
+
Args:
|
515
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
516
|
+
Snowpark or Pandas DataFrame.
|
517
|
+
output_cols_prefix: Prefix for the response columns
|
523
518
|
Returns:
|
524
519
|
Transformed dataset.
|
525
520
|
"""
|
526
|
-
self.
|
527
|
-
|
528
|
-
|
521
|
+
self._infer_input_output_cols(dataset)
|
522
|
+
super()._check_dataset_type(dataset)
|
523
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
524
|
+
estimator=self._sklearn_object,
|
525
|
+
dataset=dataset,
|
526
|
+
input_cols=self.input_cols,
|
527
|
+
label_cols=self.label_cols,
|
528
|
+
sample_weight_col=self.sample_weight_col,
|
529
|
+
autogenerated=self._autogenerated,
|
530
|
+
subproject=_SUBPROJECT,
|
531
|
+
)
|
532
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
533
|
+
drop_input_cols=self._drop_input_cols,
|
534
|
+
expected_output_cols_list=self.output_cols,
|
535
|
+
)
|
536
|
+
self._sklearn_object = fitted_estimator
|
537
|
+
self._is_fitted = True
|
538
|
+
return output_result
|
539
|
+
|
540
|
+
|
541
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
542
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
543
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
544
|
+
"""
|
545
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
546
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
547
|
+
if output_cols:
|
548
|
+
output_cols = [
|
549
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
550
|
+
for c in output_cols
|
551
|
+
]
|
552
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
553
|
+
output_cols = [output_cols_prefix]
|
554
|
+
elif self._sklearn_object is not None:
|
555
|
+
classes = self._sklearn_object.classes_
|
556
|
+
if isinstance(classes, numpy.ndarray):
|
557
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
558
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
559
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
560
|
+
output_cols = []
|
561
|
+
for i, cl in enumerate(classes):
|
562
|
+
# For binary classification, there is only one output column for each class
|
563
|
+
# ndarray as the two classes are complementary.
|
564
|
+
if len(cl) == 2:
|
565
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
566
|
+
else:
|
567
|
+
output_cols.extend([
|
568
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
569
|
+
])
|
570
|
+
else:
|
571
|
+
output_cols = []
|
572
|
+
|
573
|
+
# Make sure column names are valid snowflake identifiers.
|
574
|
+
assert output_cols is not None # Make MyPy happy
|
575
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
576
|
+
|
577
|
+
return rv
|
578
|
+
|
579
|
+
def _align_expected_output_names(
|
580
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
581
|
+
) -> List[str]:
|
582
|
+
# in case the inferred output column names dimension is different
|
583
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
585
|
+
output_df_columns = list(output_df_pd.columns)
|
586
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
|
+
if self.sample_weight_col:
|
588
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
589
|
+
# if the dimension of inferred output column names is correct; use it
|
590
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
591
|
+
return expected_output_cols_list
|
592
|
+
# otherwise, use the sklearn estimator's output
|
593
|
+
else:
|
594
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
529
595
|
|
530
596
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
531
597
|
@telemetry.send_api_usage_telemetry(
|
@@ -557,24 +623,26 @@ class MinCovDet(BaseTransformer):
|
|
557
623
|
# are specific to the type of dataset used.
|
558
624
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
559
625
|
|
626
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
627
|
+
|
560
628
|
if isinstance(dataset, DataFrame):
|
561
|
-
self.
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
629
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
630
|
+
self._deps = self._get_dependencies()
|
631
|
+
assert isinstance(
|
632
|
+
dataset._session, Session
|
633
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
566
634
|
transform_kwargs = dict(
|
567
635
|
session=dataset._session,
|
568
636
|
dependencies=self._deps,
|
569
|
-
drop_input_cols
|
637
|
+
drop_input_cols=self._drop_input_cols,
|
570
638
|
expected_output_cols_type="float",
|
571
639
|
)
|
640
|
+
expected_output_cols = self._align_expected_output_names(
|
641
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
642
|
+
)
|
572
643
|
|
573
644
|
elif isinstance(dataset, pd.DataFrame):
|
574
|
-
transform_kwargs = dict(
|
575
|
-
snowpark_input_cols = self._snowpark_cols,
|
576
|
-
drop_input_cols = self._drop_input_cols
|
577
|
-
)
|
645
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
578
646
|
|
579
647
|
transform_handlers = ModelTransformerBuilder.build(
|
580
648
|
dataset=dataset,
|
@@ -586,7 +654,7 @@ class MinCovDet(BaseTransformer):
|
|
586
654
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
587
655
|
inference_method=inference_method,
|
588
656
|
input_cols=self.input_cols,
|
589
|
-
expected_output_cols=
|
657
|
+
expected_output_cols=expected_output_cols,
|
590
658
|
**transform_kwargs
|
591
659
|
)
|
592
660
|
return output_df
|
@@ -616,29 +684,30 @@ class MinCovDet(BaseTransformer):
|
|
616
684
|
Output dataset with log probability of the sample for each class in the model.
|
617
685
|
"""
|
618
686
|
super()._check_dataset_type(dataset)
|
619
|
-
inference_method="predict_log_proba"
|
687
|
+
inference_method = "predict_log_proba"
|
688
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
620
689
|
|
621
690
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
622
691
|
# are specific to the type of dataset used.
|
623
692
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
624
693
|
|
625
694
|
if isinstance(dataset, DataFrame):
|
626
|
-
self.
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
695
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
696
|
+
self._deps = self._get_dependencies()
|
697
|
+
assert isinstance(
|
698
|
+
dataset._session, Session
|
699
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
631
700
|
transform_kwargs = dict(
|
632
701
|
session=dataset._session,
|
633
702
|
dependencies=self._deps,
|
634
|
-
drop_input_cols
|
703
|
+
drop_input_cols=self._drop_input_cols,
|
635
704
|
expected_output_cols_type="float",
|
636
705
|
)
|
706
|
+
expected_output_cols = self._align_expected_output_names(
|
707
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
708
|
+
)
|
637
709
|
elif isinstance(dataset, pd.DataFrame):
|
638
|
-
transform_kwargs = dict(
|
639
|
-
snowpark_input_cols = self._snowpark_cols,
|
640
|
-
drop_input_cols = self._drop_input_cols
|
641
|
-
)
|
710
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
642
711
|
|
643
712
|
transform_handlers = ModelTransformerBuilder.build(
|
644
713
|
dataset=dataset,
|
@@ -651,7 +720,7 @@ class MinCovDet(BaseTransformer):
|
|
651
720
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
652
721
|
inference_method=inference_method,
|
653
722
|
input_cols=self.input_cols,
|
654
|
-
expected_output_cols=
|
723
|
+
expected_output_cols=expected_output_cols,
|
655
724
|
**transform_kwargs
|
656
725
|
)
|
657
726
|
return output_df
|
@@ -677,30 +746,32 @@ class MinCovDet(BaseTransformer):
|
|
677
746
|
Output dataset with results of the decision function for the samples in input dataset.
|
678
747
|
"""
|
679
748
|
super()._check_dataset_type(dataset)
|
680
|
-
inference_method="decision_function"
|
749
|
+
inference_method = "decision_function"
|
681
750
|
|
682
751
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
683
752
|
# are specific to the type of dataset used.
|
684
753
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
685
754
|
|
755
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
756
|
+
|
686
757
|
if isinstance(dataset, DataFrame):
|
687
|
-
self.
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
758
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
759
|
+
self._deps = self._get_dependencies()
|
760
|
+
assert isinstance(
|
761
|
+
dataset._session, Session
|
762
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
692
763
|
transform_kwargs = dict(
|
693
764
|
session=dataset._session,
|
694
765
|
dependencies=self._deps,
|
695
|
-
drop_input_cols
|
766
|
+
drop_input_cols=self._drop_input_cols,
|
696
767
|
expected_output_cols_type="float",
|
697
768
|
)
|
769
|
+
expected_output_cols = self._align_expected_output_names(
|
770
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
771
|
+
)
|
698
772
|
|
699
773
|
elif isinstance(dataset, pd.DataFrame):
|
700
|
-
transform_kwargs = dict(
|
701
|
-
snowpark_input_cols = self._snowpark_cols,
|
702
|
-
drop_input_cols = self._drop_input_cols
|
703
|
-
)
|
774
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
704
775
|
|
705
776
|
transform_handlers = ModelTransformerBuilder.build(
|
706
777
|
dataset=dataset,
|
@@ -713,7 +784,7 @@ class MinCovDet(BaseTransformer):
|
|
713
784
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
714
785
|
inference_method=inference_method,
|
715
786
|
input_cols=self.input_cols,
|
716
|
-
expected_output_cols=
|
787
|
+
expected_output_cols=expected_output_cols,
|
717
788
|
**transform_kwargs
|
718
789
|
)
|
719
790
|
return output_df
|
@@ -742,17 +813,17 @@ class MinCovDet(BaseTransformer):
|
|
742
813
|
Output dataset with probability of the sample for each class in the model.
|
743
814
|
"""
|
744
815
|
super()._check_dataset_type(dataset)
|
745
|
-
inference_method="score_samples"
|
816
|
+
inference_method = "score_samples"
|
746
817
|
|
747
818
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
748
819
|
# are specific to the type of dataset used.
|
749
820
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
750
821
|
|
822
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
823
|
+
|
751
824
|
if isinstance(dataset, DataFrame):
|
752
|
-
self.
|
753
|
-
|
754
|
-
inference_method=inference_method,
|
755
|
-
)
|
825
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
826
|
+
self._deps = self._get_dependencies()
|
756
827
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
757
828
|
transform_kwargs = dict(
|
758
829
|
session=dataset._session,
|
@@ -760,6 +831,9 @@ class MinCovDet(BaseTransformer):
|
|
760
831
|
drop_input_cols = self._drop_input_cols,
|
761
832
|
expected_output_cols_type="float",
|
762
833
|
)
|
834
|
+
expected_output_cols = self._align_expected_output_names(
|
835
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
836
|
+
)
|
763
837
|
|
764
838
|
elif isinstance(dataset, pd.DataFrame):
|
765
839
|
transform_kwargs = dict(
|
@@ -778,7 +852,7 @@ class MinCovDet(BaseTransformer):
|
|
778
852
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
779
853
|
inference_method=inference_method,
|
780
854
|
input_cols=self.input_cols,
|
781
|
-
expected_output_cols=
|
855
|
+
expected_output_cols=expected_output_cols,
|
782
856
|
**transform_kwargs
|
783
857
|
)
|
784
858
|
return output_df
|
@@ -813,17 +887,15 @@ class MinCovDet(BaseTransformer):
|
|
813
887
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
814
888
|
|
815
889
|
if isinstance(dataset, DataFrame):
|
816
|
-
self.
|
817
|
-
|
818
|
-
inference_method="score",
|
819
|
-
)
|
890
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
891
|
+
self._deps = self._get_dependencies()
|
820
892
|
selected_cols = self._get_active_columns()
|
821
893
|
if len(selected_cols) > 0:
|
822
894
|
dataset = dataset.select(selected_cols)
|
823
895
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
824
896
|
transform_kwargs = dict(
|
825
897
|
session=dataset._session,
|
826
|
-
dependencies=
|
898
|
+
dependencies=self._deps,
|
827
899
|
score_sproc_imports=['sklearn'],
|
828
900
|
)
|
829
901
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -888,11 +960,8 @@ class MinCovDet(BaseTransformer):
|
|
888
960
|
|
889
961
|
if isinstance(dataset, DataFrame):
|
890
962
|
|
891
|
-
self.
|
892
|
-
|
893
|
-
inference_method=inference_method,
|
894
|
-
|
895
|
-
)
|
963
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
964
|
+
self._deps = self._get_dependencies()
|
896
965
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
897
966
|
transform_kwargs = dict(
|
898
967
|
session = dataset._session,
|
@@ -925,50 +994,84 @@ class MinCovDet(BaseTransformer):
|
|
925
994
|
)
|
926
995
|
return output_df
|
927
996
|
|
997
|
+
|
998
|
+
|
999
|
+
def to_sklearn(self) -> Any:
|
1000
|
+
"""Get sklearn.covariance.MinCovDet object.
|
1001
|
+
"""
|
1002
|
+
if self._sklearn_object is None:
|
1003
|
+
self._sklearn_object = self._create_sklearn_object()
|
1004
|
+
return self._sklearn_object
|
1005
|
+
|
1006
|
+
def to_xgboost(self) -> Any:
|
1007
|
+
raise exceptions.SnowflakeMLException(
|
1008
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1009
|
+
original_exception=AttributeError(
|
1010
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1011
|
+
"to_xgboost()",
|
1012
|
+
"to_sklearn()"
|
1013
|
+
)
|
1014
|
+
),
|
1015
|
+
)
|
1016
|
+
|
1017
|
+
def to_lightgbm(self) -> Any:
|
1018
|
+
raise exceptions.SnowflakeMLException(
|
1019
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1020
|
+
original_exception=AttributeError(
|
1021
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1022
|
+
"to_lightgbm()",
|
1023
|
+
"to_sklearn()"
|
1024
|
+
)
|
1025
|
+
),
|
1026
|
+
)
|
1027
|
+
|
1028
|
+
def _get_dependencies(self) -> List[str]:
|
1029
|
+
return self._deps
|
1030
|
+
|
928
1031
|
|
929
|
-
def
|
1032
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
930
1033
|
self._model_signature_dict = dict()
|
931
1034
|
|
932
1035
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
933
1036
|
|
934
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1037
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
935
1038
|
outputs: List[BaseFeatureSpec] = []
|
936
1039
|
if hasattr(self, "predict"):
|
937
1040
|
# keep mypy happy
|
938
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1041
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
939
1042
|
# For classifier, the type of predict is the same as the type of label
|
940
|
-
if self._sklearn_object._estimator_type ==
|
941
|
-
|
1043
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1044
|
+
# label columns is the desired type for output
|
942
1045
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
943
1046
|
# rename the output columns
|
944
1047
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
945
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
946
|
-
|
947
|
-
|
1048
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1049
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1050
|
+
)
|
948
1051
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
949
1052
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
950
|
-
# Clusterer returns int64 cluster labels.
|
1053
|
+
# Clusterer returns int64 cluster labels.
|
951
1054
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
952
1055
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
953
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
954
|
-
|
955
|
-
|
956
|
-
|
1056
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1057
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1058
|
+
)
|
1059
|
+
|
957
1060
|
# For regressor, the type of predict is float64
|
958
|
-
elif self._sklearn_object._estimator_type ==
|
1061
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
959
1062
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
960
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
961
|
-
|
962
|
-
|
963
|
-
|
1063
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1064
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1065
|
+
)
|
1066
|
+
|
964
1067
|
for prob_func in PROB_FUNCTIONS:
|
965
1068
|
if hasattr(self, prob_func):
|
966
1069
|
output_cols_prefix: str = f"{prob_func}_"
|
967
1070
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
968
1071
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
969
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
970
|
-
|
971
|
-
|
1072
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1073
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1074
|
+
)
|
972
1075
|
|
973
1076
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
974
1077
|
items = list(self._model_signature_dict.items())
|
@@ -981,10 +1084,10 @@ class MinCovDet(BaseTransformer):
|
|
981
1084
|
"""Returns model signature of current class.
|
982
1085
|
|
983
1086
|
Raises:
|
984
|
-
|
1087
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
985
1088
|
|
986
1089
|
Returns:
|
987
|
-
Dict
|
1090
|
+
Dict with each method and its input output signature
|
988
1091
|
"""
|
989
1092
|
if self._model_signature_dict is None:
|
990
1093
|
raise exceptions.SnowflakeMLException(
|
@@ -992,35 +1095,3 @@ class MinCovDet(BaseTransformer):
|
|
992
1095
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
993
1096
|
)
|
994
1097
|
return self._model_signature_dict
|
995
|
-
|
996
|
-
def to_sklearn(self) -> Any:
|
997
|
-
"""Get sklearn.covariance.MinCovDet object.
|
998
|
-
"""
|
999
|
-
if self._sklearn_object is None:
|
1000
|
-
self._sklearn_object = self._create_sklearn_object()
|
1001
|
-
return self._sklearn_object
|
1002
|
-
|
1003
|
-
def to_xgboost(self) -> Any:
|
1004
|
-
raise exceptions.SnowflakeMLException(
|
1005
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1006
|
-
original_exception=AttributeError(
|
1007
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1008
|
-
"to_xgboost()",
|
1009
|
-
"to_sklearn()"
|
1010
|
-
)
|
1011
|
-
),
|
1012
|
-
)
|
1013
|
-
|
1014
|
-
def to_lightgbm(self) -> Any:
|
1015
|
-
raise exceptions.SnowflakeMLException(
|
1016
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1017
|
-
original_exception=AttributeError(
|
1018
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1019
|
-
"to_lightgbm()",
|
1020
|
-
"to_sklearn()"
|
1021
|
-
)
|
1022
|
-
),
|
1023
|
-
)
|
1024
|
-
|
1025
|
-
def _get_dependencies(self) -> List[str]:
|
1026
|
-
return self._deps
|