snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.manifold".replace("sklea
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return True and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class Isomap(BaseTransformer):
|
71
64
|
r"""Isomap Embedding
|
72
65
|
For more details on this class, see [sklearn.manifold.Isomap]
|
@@ -278,12 +271,7 @@ class Isomap(BaseTransformer):
|
|
278
271
|
)
|
279
272
|
return selected_cols
|
280
273
|
|
281
|
-
|
282
|
-
project=_PROJECT,
|
283
|
-
subproject=_SUBPROJECT,
|
284
|
-
custom_tags=dict([("autogen", True)]),
|
285
|
-
)
|
286
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Isomap":
|
274
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Isomap":
|
287
275
|
"""Compute the embedding vectors for data X
|
288
276
|
For more details on this function, see [sklearn.manifold.Isomap.fit]
|
289
277
|
(https://scikit-learn.org/stable/modules/generated/sklearn.manifold.Isomap.html#sklearn.manifold.Isomap.fit)
|
@@ -310,12 +298,14 @@ class Isomap(BaseTransformer):
|
|
310
298
|
|
311
299
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
312
300
|
|
313
|
-
|
301
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
314
302
|
if SNOWML_SPROC_ENV in os.environ:
|
315
303
|
statement_params = telemetry.get_function_usage_statement_params(
|
316
304
|
project=_PROJECT,
|
317
305
|
subproject=_SUBPROJECT,
|
318
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
306
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
307
|
+
inspect.currentframe(), Isomap.__class__.__name__
|
308
|
+
),
|
319
309
|
api_calls=[Session.call],
|
320
310
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
321
311
|
)
|
@@ -336,27 +326,24 @@ class Isomap(BaseTransformer):
|
|
336
326
|
)
|
337
327
|
self._sklearn_object = model_trainer.train()
|
338
328
|
self._is_fitted = True
|
339
|
-
self.
|
329
|
+
self._generate_model_signatures(dataset)
|
340
330
|
return self
|
341
331
|
|
342
332
|
def _batch_inference_validate_snowpark(
|
343
333
|
self,
|
344
334
|
dataset: DataFrame,
|
345
335
|
inference_method: str,
|
346
|
-
) ->
|
347
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
348
|
-
return the available package that exists in the snowflake anaconda channel
|
336
|
+
) -> None:
|
337
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
349
338
|
|
350
339
|
Args:
|
351
340
|
dataset: snowpark dataframe
|
352
341
|
inference_method: the inference method such as predict, score...
|
353
|
-
|
342
|
+
|
354
343
|
Raises:
|
355
344
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
356
345
|
SnowflakeMLException: If the session is None, raise error
|
357
346
|
|
358
|
-
Returns:
|
359
|
-
A list of available package that exists in the snowflake anaconda channel
|
360
347
|
"""
|
361
348
|
if not self._is_fitted:
|
362
349
|
raise exceptions.SnowflakeMLException(
|
@@ -374,9 +361,7 @@ class Isomap(BaseTransformer):
|
|
374
361
|
"Session must not specified for snowpark dataset."
|
375
362
|
),
|
376
363
|
)
|
377
|
-
|
378
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
379
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
364
|
+
|
380
365
|
|
381
366
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
382
367
|
@telemetry.send_api_usage_telemetry(
|
@@ -410,7 +395,9 @@ class Isomap(BaseTransformer):
|
|
410
395
|
# when it is classifier, infer the datatype from label columns
|
411
396
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
412
397
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
413
|
-
label_cols_signatures = [
|
398
|
+
label_cols_signatures = [
|
399
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
400
|
+
]
|
414
401
|
if len(label_cols_signatures) == 0:
|
415
402
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
416
403
|
raise exceptions.SnowflakeMLException(
|
@@ -418,25 +405,23 @@ class Isomap(BaseTransformer):
|
|
418
405
|
original_exception=ValueError(error_str),
|
419
406
|
)
|
420
407
|
|
421
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
422
|
-
label_cols_signatures[0].as_snowpark_type()
|
423
|
-
)
|
408
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
424
409
|
|
425
|
-
self.
|
426
|
-
|
410
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
411
|
+
self._deps = self._get_dependencies()
|
412
|
+
assert isinstance(
|
413
|
+
dataset._session, Session
|
414
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
427
415
|
|
428
416
|
transform_kwargs = dict(
|
429
|
-
session
|
430
|
-
dependencies
|
431
|
-
drop_input_cols
|
432
|
-
expected_output_cols_type
|
417
|
+
session=dataset._session,
|
418
|
+
dependencies=self._deps,
|
419
|
+
drop_input_cols=self._drop_input_cols,
|
420
|
+
expected_output_cols_type=expected_type_inferred,
|
433
421
|
)
|
434
422
|
|
435
423
|
elif isinstance(dataset, pd.DataFrame):
|
436
|
-
transform_kwargs = dict(
|
437
|
-
snowpark_input_cols = self._snowpark_cols,
|
438
|
-
drop_input_cols = self._drop_input_cols
|
439
|
-
)
|
424
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
440
425
|
|
441
426
|
transform_handlers = ModelTransformerBuilder.build(
|
442
427
|
dataset=dataset,
|
@@ -478,7 +463,7 @@ class Isomap(BaseTransformer):
|
|
478
463
|
Transformed dataset.
|
479
464
|
"""
|
480
465
|
super()._check_dataset_type(dataset)
|
481
|
-
inference_method="transform"
|
466
|
+
inference_method = "transform"
|
482
467
|
|
483
468
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
484
469
|
# are specific to the type of dataset used.
|
@@ -508,24 +493,19 @@ class Isomap(BaseTransformer):
|
|
508
493
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
509
494
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
510
495
|
|
511
|
-
self.
|
512
|
-
|
513
|
-
inference_method=inference_method,
|
514
|
-
)
|
496
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
497
|
+
self._deps = self._get_dependencies()
|
515
498
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
516
499
|
|
517
500
|
transform_kwargs = dict(
|
518
|
-
session
|
519
|
-
dependencies
|
520
|
-
drop_input_cols
|
521
|
-
expected_output_cols_type
|
501
|
+
session=dataset._session,
|
502
|
+
dependencies=self._deps,
|
503
|
+
drop_input_cols=self._drop_input_cols,
|
504
|
+
expected_output_cols_type=expected_dtype,
|
522
505
|
)
|
523
506
|
|
524
507
|
elif isinstance(dataset, pd.DataFrame):
|
525
|
-
transform_kwargs = dict(
|
526
|
-
snowpark_input_cols = self._snowpark_cols,
|
527
|
-
drop_input_cols = self._drop_input_cols
|
528
|
-
)
|
508
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
529
509
|
|
530
510
|
transform_handlers = ModelTransformerBuilder.build(
|
531
511
|
dataset=dataset,
|
@@ -544,7 +524,11 @@ class Isomap(BaseTransformer):
|
|
544
524
|
return output_df
|
545
525
|
|
546
526
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
547
|
-
def fit_predict(
|
527
|
+
def fit_predict(
|
528
|
+
self,
|
529
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
530
|
+
output_cols_prefix: str = "fit_predict_",
|
531
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
548
532
|
""" Method not supported for this class.
|
549
533
|
|
550
534
|
|
@@ -569,22 +553,106 @@ class Isomap(BaseTransformer):
|
|
569
553
|
)
|
570
554
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
571
555
|
drop_input_cols=self._drop_input_cols,
|
572
|
-
expected_output_cols_list=
|
556
|
+
expected_output_cols_list=(
|
557
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
558
|
+
),
|
573
559
|
)
|
574
560
|
self._sklearn_object = fitted_estimator
|
575
561
|
self._is_fitted = True
|
576
562
|
return output_result
|
577
563
|
|
564
|
+
|
565
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
566
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
567
|
+
""" Fit the model from data in X and transform X
|
568
|
+
For more details on this function, see [sklearn.manifold.Isomap.fit_transform]
|
569
|
+
(https://scikit-learn.org/stable/modules/generated/sklearn.manifold.Isomap.html#sklearn.manifold.Isomap.fit_transform)
|
570
|
+
|
571
|
+
|
572
|
+
Raises:
|
573
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
578
574
|
|
579
|
-
|
580
|
-
|
581
|
-
|
575
|
+
Args:
|
576
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
577
|
+
Snowpark or Pandas DataFrame.
|
578
|
+
output_cols_prefix: Prefix for the response columns
|
582
579
|
Returns:
|
583
580
|
Transformed dataset.
|
584
581
|
"""
|
585
|
-
self.
|
586
|
-
|
587
|
-
|
582
|
+
self._infer_input_output_cols(dataset)
|
583
|
+
super()._check_dataset_type(dataset)
|
584
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
585
|
+
estimator=self._sklearn_object,
|
586
|
+
dataset=dataset,
|
587
|
+
input_cols=self.input_cols,
|
588
|
+
label_cols=self.label_cols,
|
589
|
+
sample_weight_col=self.sample_weight_col,
|
590
|
+
autogenerated=self._autogenerated,
|
591
|
+
subproject=_SUBPROJECT,
|
592
|
+
)
|
593
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
594
|
+
drop_input_cols=self._drop_input_cols,
|
595
|
+
expected_output_cols_list=self.output_cols,
|
596
|
+
)
|
597
|
+
self._sklearn_object = fitted_estimator
|
598
|
+
self._is_fitted = True
|
599
|
+
return output_result
|
600
|
+
|
601
|
+
|
602
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
603
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
604
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
605
|
+
"""
|
606
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
607
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
608
|
+
if output_cols:
|
609
|
+
output_cols = [
|
610
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
611
|
+
for c in output_cols
|
612
|
+
]
|
613
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
614
|
+
output_cols = [output_cols_prefix]
|
615
|
+
elif self._sklearn_object is not None:
|
616
|
+
classes = self._sklearn_object.classes_
|
617
|
+
if isinstance(classes, numpy.ndarray):
|
618
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
619
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
620
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
621
|
+
output_cols = []
|
622
|
+
for i, cl in enumerate(classes):
|
623
|
+
# For binary classification, there is only one output column for each class
|
624
|
+
# ndarray as the two classes are complementary.
|
625
|
+
if len(cl) == 2:
|
626
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
627
|
+
else:
|
628
|
+
output_cols.extend([
|
629
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
630
|
+
])
|
631
|
+
else:
|
632
|
+
output_cols = []
|
633
|
+
|
634
|
+
# Make sure column names are valid snowflake identifiers.
|
635
|
+
assert output_cols is not None # Make MyPy happy
|
636
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
637
|
+
|
638
|
+
return rv
|
639
|
+
|
640
|
+
def _align_expected_output_names(
|
641
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
642
|
+
) -> List[str]:
|
643
|
+
# in case the inferred output column names dimension is different
|
644
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
645
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
646
|
+
output_df_columns = list(output_df_pd.columns)
|
647
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
648
|
+
if self.sample_weight_col:
|
649
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
650
|
+
# if the dimension of inferred output column names is correct; use it
|
651
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
652
|
+
return expected_output_cols_list
|
653
|
+
# otherwise, use the sklearn estimator's output
|
654
|
+
else:
|
655
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
588
656
|
|
589
657
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
590
658
|
@telemetry.send_api_usage_telemetry(
|
@@ -616,24 +684,26 @@ class Isomap(BaseTransformer):
|
|
616
684
|
# are specific to the type of dataset used.
|
617
685
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
618
686
|
|
687
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
688
|
+
|
619
689
|
if isinstance(dataset, DataFrame):
|
620
|
-
self.
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
690
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
691
|
+
self._deps = self._get_dependencies()
|
692
|
+
assert isinstance(
|
693
|
+
dataset._session, Session
|
694
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
625
695
|
transform_kwargs = dict(
|
626
696
|
session=dataset._session,
|
627
697
|
dependencies=self._deps,
|
628
|
-
drop_input_cols
|
698
|
+
drop_input_cols=self._drop_input_cols,
|
629
699
|
expected_output_cols_type="float",
|
630
700
|
)
|
701
|
+
expected_output_cols = self._align_expected_output_names(
|
702
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
703
|
+
)
|
631
704
|
|
632
705
|
elif isinstance(dataset, pd.DataFrame):
|
633
|
-
transform_kwargs = dict(
|
634
|
-
snowpark_input_cols = self._snowpark_cols,
|
635
|
-
drop_input_cols = self._drop_input_cols
|
636
|
-
)
|
706
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
637
707
|
|
638
708
|
transform_handlers = ModelTransformerBuilder.build(
|
639
709
|
dataset=dataset,
|
@@ -645,7 +715,7 @@ class Isomap(BaseTransformer):
|
|
645
715
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
646
716
|
inference_method=inference_method,
|
647
717
|
input_cols=self.input_cols,
|
648
|
-
expected_output_cols=
|
718
|
+
expected_output_cols=expected_output_cols,
|
649
719
|
**transform_kwargs
|
650
720
|
)
|
651
721
|
return output_df
|
@@ -675,29 +745,30 @@ class Isomap(BaseTransformer):
|
|
675
745
|
Output dataset with log probability of the sample for each class in the model.
|
676
746
|
"""
|
677
747
|
super()._check_dataset_type(dataset)
|
678
|
-
inference_method="predict_log_proba"
|
748
|
+
inference_method = "predict_log_proba"
|
749
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
679
750
|
|
680
751
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
681
752
|
# are specific to the type of dataset used.
|
682
753
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
683
754
|
|
684
755
|
if isinstance(dataset, DataFrame):
|
685
|
-
self.
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
756
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
757
|
+
self._deps = self._get_dependencies()
|
758
|
+
assert isinstance(
|
759
|
+
dataset._session, Session
|
760
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
690
761
|
transform_kwargs = dict(
|
691
762
|
session=dataset._session,
|
692
763
|
dependencies=self._deps,
|
693
|
-
drop_input_cols
|
764
|
+
drop_input_cols=self._drop_input_cols,
|
694
765
|
expected_output_cols_type="float",
|
695
766
|
)
|
767
|
+
expected_output_cols = self._align_expected_output_names(
|
768
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
769
|
+
)
|
696
770
|
elif isinstance(dataset, pd.DataFrame):
|
697
|
-
transform_kwargs = dict(
|
698
|
-
snowpark_input_cols = self._snowpark_cols,
|
699
|
-
drop_input_cols = self._drop_input_cols
|
700
|
-
)
|
771
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
701
772
|
|
702
773
|
transform_handlers = ModelTransformerBuilder.build(
|
703
774
|
dataset=dataset,
|
@@ -710,7 +781,7 @@ class Isomap(BaseTransformer):
|
|
710
781
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
711
782
|
inference_method=inference_method,
|
712
783
|
input_cols=self.input_cols,
|
713
|
-
expected_output_cols=
|
784
|
+
expected_output_cols=expected_output_cols,
|
714
785
|
**transform_kwargs
|
715
786
|
)
|
716
787
|
return output_df
|
@@ -736,30 +807,32 @@ class Isomap(BaseTransformer):
|
|
736
807
|
Output dataset with results of the decision function for the samples in input dataset.
|
737
808
|
"""
|
738
809
|
super()._check_dataset_type(dataset)
|
739
|
-
inference_method="decision_function"
|
810
|
+
inference_method = "decision_function"
|
740
811
|
|
741
812
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
742
813
|
# are specific to the type of dataset used.
|
743
814
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
744
815
|
|
816
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
817
|
+
|
745
818
|
if isinstance(dataset, DataFrame):
|
746
|
-
self.
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
819
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
820
|
+
self._deps = self._get_dependencies()
|
821
|
+
assert isinstance(
|
822
|
+
dataset._session, Session
|
823
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
751
824
|
transform_kwargs = dict(
|
752
825
|
session=dataset._session,
|
753
826
|
dependencies=self._deps,
|
754
|
-
drop_input_cols
|
827
|
+
drop_input_cols=self._drop_input_cols,
|
755
828
|
expected_output_cols_type="float",
|
756
829
|
)
|
830
|
+
expected_output_cols = self._align_expected_output_names(
|
831
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
832
|
+
)
|
757
833
|
|
758
834
|
elif isinstance(dataset, pd.DataFrame):
|
759
|
-
transform_kwargs = dict(
|
760
|
-
snowpark_input_cols = self._snowpark_cols,
|
761
|
-
drop_input_cols = self._drop_input_cols
|
762
|
-
)
|
835
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
763
836
|
|
764
837
|
transform_handlers = ModelTransformerBuilder.build(
|
765
838
|
dataset=dataset,
|
@@ -772,7 +845,7 @@ class Isomap(BaseTransformer):
|
|
772
845
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
773
846
|
inference_method=inference_method,
|
774
847
|
input_cols=self.input_cols,
|
775
|
-
expected_output_cols=
|
848
|
+
expected_output_cols=expected_output_cols,
|
776
849
|
**transform_kwargs
|
777
850
|
)
|
778
851
|
return output_df
|
@@ -801,17 +874,17 @@ class Isomap(BaseTransformer):
|
|
801
874
|
Output dataset with probability of the sample for each class in the model.
|
802
875
|
"""
|
803
876
|
super()._check_dataset_type(dataset)
|
804
|
-
inference_method="score_samples"
|
877
|
+
inference_method = "score_samples"
|
805
878
|
|
806
879
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
807
880
|
# are specific to the type of dataset used.
|
808
881
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
809
882
|
|
883
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
884
|
+
|
810
885
|
if isinstance(dataset, DataFrame):
|
811
|
-
self.
|
812
|
-
|
813
|
-
inference_method=inference_method,
|
814
|
-
)
|
886
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
887
|
+
self._deps = self._get_dependencies()
|
815
888
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
816
889
|
transform_kwargs = dict(
|
817
890
|
session=dataset._session,
|
@@ -819,6 +892,9 @@ class Isomap(BaseTransformer):
|
|
819
892
|
drop_input_cols = self._drop_input_cols,
|
820
893
|
expected_output_cols_type="float",
|
821
894
|
)
|
895
|
+
expected_output_cols = self._align_expected_output_names(
|
896
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
897
|
+
)
|
822
898
|
|
823
899
|
elif isinstance(dataset, pd.DataFrame):
|
824
900
|
transform_kwargs = dict(
|
@@ -837,7 +913,7 @@ class Isomap(BaseTransformer):
|
|
837
913
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
838
914
|
inference_method=inference_method,
|
839
915
|
input_cols=self.input_cols,
|
840
|
-
expected_output_cols=
|
916
|
+
expected_output_cols=expected_output_cols,
|
841
917
|
**transform_kwargs
|
842
918
|
)
|
843
919
|
return output_df
|
@@ -870,17 +946,15 @@ class Isomap(BaseTransformer):
|
|
870
946
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
871
947
|
|
872
948
|
if isinstance(dataset, DataFrame):
|
873
|
-
self.
|
874
|
-
|
875
|
-
inference_method="score",
|
876
|
-
)
|
949
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
950
|
+
self._deps = self._get_dependencies()
|
877
951
|
selected_cols = self._get_active_columns()
|
878
952
|
if len(selected_cols) > 0:
|
879
953
|
dataset = dataset.select(selected_cols)
|
880
954
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
881
955
|
transform_kwargs = dict(
|
882
956
|
session=dataset._session,
|
883
|
-
dependencies=
|
957
|
+
dependencies=self._deps,
|
884
958
|
score_sproc_imports=['sklearn'],
|
885
959
|
)
|
886
960
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -945,11 +1019,8 @@ class Isomap(BaseTransformer):
|
|
945
1019
|
|
946
1020
|
if isinstance(dataset, DataFrame):
|
947
1021
|
|
948
|
-
self.
|
949
|
-
|
950
|
-
inference_method=inference_method,
|
951
|
-
|
952
|
-
)
|
1022
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1023
|
+
self._deps = self._get_dependencies()
|
953
1024
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
954
1025
|
transform_kwargs = dict(
|
955
1026
|
session = dataset._session,
|
@@ -982,50 +1053,84 @@ class Isomap(BaseTransformer):
|
|
982
1053
|
)
|
983
1054
|
return output_df
|
984
1055
|
|
1056
|
+
|
1057
|
+
|
1058
|
+
def to_sklearn(self) -> Any:
|
1059
|
+
"""Get sklearn.manifold.Isomap object.
|
1060
|
+
"""
|
1061
|
+
if self._sklearn_object is None:
|
1062
|
+
self._sklearn_object = self._create_sklearn_object()
|
1063
|
+
return self._sklearn_object
|
1064
|
+
|
1065
|
+
def to_xgboost(self) -> Any:
|
1066
|
+
raise exceptions.SnowflakeMLException(
|
1067
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1068
|
+
original_exception=AttributeError(
|
1069
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1070
|
+
"to_xgboost()",
|
1071
|
+
"to_sklearn()"
|
1072
|
+
)
|
1073
|
+
),
|
1074
|
+
)
|
985
1075
|
|
986
|
-
def
|
1076
|
+
def to_lightgbm(self) -> Any:
|
1077
|
+
raise exceptions.SnowflakeMLException(
|
1078
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1079
|
+
original_exception=AttributeError(
|
1080
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1081
|
+
"to_lightgbm()",
|
1082
|
+
"to_sklearn()"
|
1083
|
+
)
|
1084
|
+
),
|
1085
|
+
)
|
1086
|
+
|
1087
|
+
def _get_dependencies(self) -> List[str]:
|
1088
|
+
return self._deps
|
1089
|
+
|
1090
|
+
|
1091
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
987
1092
|
self._model_signature_dict = dict()
|
988
1093
|
|
989
1094
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
990
1095
|
|
991
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1096
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
992
1097
|
outputs: List[BaseFeatureSpec] = []
|
993
1098
|
if hasattr(self, "predict"):
|
994
1099
|
# keep mypy happy
|
995
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1100
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
996
1101
|
# For classifier, the type of predict is the same as the type of label
|
997
|
-
if self._sklearn_object._estimator_type ==
|
998
|
-
|
1102
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1103
|
+
# label columns is the desired type for output
|
999
1104
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1000
1105
|
# rename the output columns
|
1001
1106
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1002
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1003
|
-
|
1004
|
-
|
1107
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1108
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1109
|
+
)
|
1005
1110
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1006
1111
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1007
|
-
# Clusterer returns int64 cluster labels.
|
1112
|
+
# Clusterer returns int64 cluster labels.
|
1008
1113
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1009
1114
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1010
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1115
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1116
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1117
|
+
)
|
1118
|
+
|
1014
1119
|
# For regressor, the type of predict is float64
|
1015
|
-
elif self._sklearn_object._estimator_type ==
|
1120
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1016
1121
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1017
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1122
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1123
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1124
|
+
)
|
1125
|
+
|
1021
1126
|
for prob_func in PROB_FUNCTIONS:
|
1022
1127
|
if hasattr(self, prob_func):
|
1023
1128
|
output_cols_prefix: str = f"{prob_func}_"
|
1024
1129
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1025
1130
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1026
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1027
|
-
|
1028
|
-
|
1131
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1132
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1133
|
+
)
|
1029
1134
|
|
1030
1135
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1031
1136
|
items = list(self._model_signature_dict.items())
|
@@ -1038,10 +1143,10 @@ class Isomap(BaseTransformer):
|
|
1038
1143
|
"""Returns model signature of current class.
|
1039
1144
|
|
1040
1145
|
Raises:
|
1041
|
-
|
1146
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1042
1147
|
|
1043
1148
|
Returns:
|
1044
|
-
Dict
|
1149
|
+
Dict with each method and its input output signature
|
1045
1150
|
"""
|
1046
1151
|
if self._model_signature_dict is None:
|
1047
1152
|
raise exceptions.SnowflakeMLException(
|
@@ -1049,35 +1154,3 @@ class Isomap(BaseTransformer):
|
|
1049
1154
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1050
1155
|
)
|
1051
1156
|
return self._model_signature_dict
|
1052
|
-
|
1053
|
-
def to_sklearn(self) -> Any:
|
1054
|
-
"""Get sklearn.manifold.Isomap object.
|
1055
|
-
"""
|
1056
|
-
if self._sklearn_object is None:
|
1057
|
-
self._sklearn_object = self._create_sklearn_object()
|
1058
|
-
return self._sklearn_object
|
1059
|
-
|
1060
|
-
def to_xgboost(self) -> Any:
|
1061
|
-
raise exceptions.SnowflakeMLException(
|
1062
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1063
|
-
original_exception=AttributeError(
|
1064
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1065
|
-
"to_xgboost()",
|
1066
|
-
"to_sklearn()"
|
1067
|
-
)
|
1068
|
-
),
|
1069
|
-
)
|
1070
|
-
|
1071
|
-
def to_lightgbm(self) -> Any:
|
1072
|
-
raise exceptions.SnowflakeMLException(
|
1073
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1074
|
-
original_exception=AttributeError(
|
1075
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1076
|
-
"to_lightgbm()",
|
1077
|
-
"to_sklearn()"
|
1078
|
-
)
|
1079
|
-
),
|
1080
|
-
)
|
1081
|
-
|
1082
|
-
def _get_dependencies(self) -> List[str]:
|
1083
|
-
return self._deps
|