snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.cluster".replace("sklear
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class SpectralCoclustering(BaseTransformer):
|
71
64
|
r"""Spectral Co-Clustering algorithm (Dhillon, 2001)
|
72
65
|
For more details on this class, see [sklearn.cluster.SpectralCoclustering]
|
@@ -240,12 +233,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
240
233
|
)
|
241
234
|
return selected_cols
|
242
235
|
|
243
|
-
|
244
|
-
project=_PROJECT,
|
245
|
-
subproject=_SUBPROJECT,
|
246
|
-
custom_tags=dict([("autogen", True)]),
|
247
|
-
)
|
248
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "SpectralCoclustering":
|
236
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "SpectralCoclustering":
|
249
237
|
"""Create a biclustering for X
|
250
238
|
For more details on this function, see [sklearn.cluster.SpectralCoclustering.fit]
|
251
239
|
(https://scikit-learn.org/stable/modules/generated/sklearn.cluster.SpectralCoclustering.html#sklearn.cluster.SpectralCoclustering.fit)
|
@@ -272,12 +260,14 @@ class SpectralCoclustering(BaseTransformer):
|
|
272
260
|
|
273
261
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
274
262
|
|
275
|
-
|
263
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
276
264
|
if SNOWML_SPROC_ENV in os.environ:
|
277
265
|
statement_params = telemetry.get_function_usage_statement_params(
|
278
266
|
project=_PROJECT,
|
279
267
|
subproject=_SUBPROJECT,
|
280
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
268
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
269
|
+
inspect.currentframe(), SpectralCoclustering.__class__.__name__
|
270
|
+
),
|
281
271
|
api_calls=[Session.call],
|
282
272
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
283
273
|
)
|
@@ -298,27 +288,24 @@ class SpectralCoclustering(BaseTransformer):
|
|
298
288
|
)
|
299
289
|
self._sklearn_object = model_trainer.train()
|
300
290
|
self._is_fitted = True
|
301
|
-
self.
|
291
|
+
self._generate_model_signatures(dataset)
|
302
292
|
return self
|
303
293
|
|
304
294
|
def _batch_inference_validate_snowpark(
|
305
295
|
self,
|
306
296
|
dataset: DataFrame,
|
307
297
|
inference_method: str,
|
308
|
-
) ->
|
309
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
310
|
-
return the available package that exists in the snowflake anaconda channel
|
298
|
+
) -> None:
|
299
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
311
300
|
|
312
301
|
Args:
|
313
302
|
dataset: snowpark dataframe
|
314
303
|
inference_method: the inference method such as predict, score...
|
315
|
-
|
304
|
+
|
316
305
|
Raises:
|
317
306
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
318
307
|
SnowflakeMLException: If the session is None, raise error
|
319
308
|
|
320
|
-
Returns:
|
321
|
-
A list of available package that exists in the snowflake anaconda channel
|
322
309
|
"""
|
323
310
|
if not self._is_fitted:
|
324
311
|
raise exceptions.SnowflakeMLException(
|
@@ -336,9 +323,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
336
323
|
"Session must not specified for snowpark dataset."
|
337
324
|
),
|
338
325
|
)
|
339
|
-
|
340
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
341
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
326
|
+
|
342
327
|
|
343
328
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
344
329
|
@telemetry.send_api_usage_telemetry(
|
@@ -372,7 +357,9 @@ class SpectralCoclustering(BaseTransformer):
|
|
372
357
|
# when it is classifier, infer the datatype from label columns
|
373
358
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
374
359
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
375
|
-
label_cols_signatures = [
|
360
|
+
label_cols_signatures = [
|
361
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
362
|
+
]
|
376
363
|
if len(label_cols_signatures) == 0:
|
377
364
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
378
365
|
raise exceptions.SnowflakeMLException(
|
@@ -380,25 +367,23 @@ class SpectralCoclustering(BaseTransformer):
|
|
380
367
|
original_exception=ValueError(error_str),
|
381
368
|
)
|
382
369
|
|
383
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
384
|
-
label_cols_signatures[0].as_snowpark_type()
|
385
|
-
)
|
370
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
386
371
|
|
387
|
-
self.
|
388
|
-
|
372
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
373
|
+
self._deps = self._get_dependencies()
|
374
|
+
assert isinstance(
|
375
|
+
dataset._session, Session
|
376
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
389
377
|
|
390
378
|
transform_kwargs = dict(
|
391
|
-
session
|
392
|
-
dependencies
|
393
|
-
drop_input_cols
|
394
|
-
expected_output_cols_type
|
379
|
+
session=dataset._session,
|
380
|
+
dependencies=self._deps,
|
381
|
+
drop_input_cols=self._drop_input_cols,
|
382
|
+
expected_output_cols_type=expected_type_inferred,
|
395
383
|
)
|
396
384
|
|
397
385
|
elif isinstance(dataset, pd.DataFrame):
|
398
|
-
transform_kwargs = dict(
|
399
|
-
snowpark_input_cols = self._snowpark_cols,
|
400
|
-
drop_input_cols = self._drop_input_cols
|
401
|
-
)
|
386
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
402
387
|
|
403
388
|
transform_handlers = ModelTransformerBuilder.build(
|
404
389
|
dataset=dataset,
|
@@ -438,7 +423,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
438
423
|
Transformed dataset.
|
439
424
|
"""
|
440
425
|
super()._check_dataset_type(dataset)
|
441
|
-
inference_method="transform"
|
426
|
+
inference_method = "transform"
|
442
427
|
|
443
428
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
444
429
|
# are specific to the type of dataset used.
|
@@ -468,24 +453,19 @@ class SpectralCoclustering(BaseTransformer):
|
|
468
453
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
469
454
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
470
455
|
|
471
|
-
self.
|
472
|
-
|
473
|
-
inference_method=inference_method,
|
474
|
-
)
|
456
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
457
|
+
self._deps = self._get_dependencies()
|
475
458
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
476
459
|
|
477
460
|
transform_kwargs = dict(
|
478
|
-
session
|
479
|
-
dependencies
|
480
|
-
drop_input_cols
|
481
|
-
expected_output_cols_type
|
461
|
+
session=dataset._session,
|
462
|
+
dependencies=self._deps,
|
463
|
+
drop_input_cols=self._drop_input_cols,
|
464
|
+
expected_output_cols_type=expected_dtype,
|
482
465
|
)
|
483
466
|
|
484
467
|
elif isinstance(dataset, pd.DataFrame):
|
485
|
-
transform_kwargs = dict(
|
486
|
-
snowpark_input_cols = self._snowpark_cols,
|
487
|
-
drop_input_cols = self._drop_input_cols
|
488
|
-
)
|
468
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
489
469
|
|
490
470
|
transform_handlers = ModelTransformerBuilder.build(
|
491
471
|
dataset=dataset,
|
@@ -504,7 +484,11 @@ class SpectralCoclustering(BaseTransformer):
|
|
504
484
|
return output_df
|
505
485
|
|
506
486
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
507
|
-
def fit_predict(
|
487
|
+
def fit_predict(
|
488
|
+
self,
|
489
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
490
|
+
output_cols_prefix: str = "fit_predict_",
|
491
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
508
492
|
""" Method not supported for this class.
|
509
493
|
|
510
494
|
|
@@ -529,22 +513,104 @@ class SpectralCoclustering(BaseTransformer):
|
|
529
513
|
)
|
530
514
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
531
515
|
drop_input_cols=self._drop_input_cols,
|
532
|
-
expected_output_cols_list=
|
516
|
+
expected_output_cols_list=(
|
517
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
518
|
+
),
|
533
519
|
)
|
534
520
|
self._sklearn_object = fitted_estimator
|
535
521
|
self._is_fitted = True
|
536
522
|
return output_result
|
537
523
|
|
524
|
+
|
525
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
526
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
527
|
+
""" Method not supported for this class.
|
528
|
+
|
538
529
|
|
539
|
-
|
540
|
-
|
541
|
-
|
530
|
+
Raises:
|
531
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
532
|
+
|
533
|
+
Args:
|
534
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
535
|
+
Snowpark or Pandas DataFrame.
|
536
|
+
output_cols_prefix: Prefix for the response columns
|
542
537
|
Returns:
|
543
538
|
Transformed dataset.
|
544
539
|
"""
|
545
|
-
self.
|
546
|
-
|
547
|
-
|
540
|
+
self._infer_input_output_cols(dataset)
|
541
|
+
super()._check_dataset_type(dataset)
|
542
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
543
|
+
estimator=self._sklearn_object,
|
544
|
+
dataset=dataset,
|
545
|
+
input_cols=self.input_cols,
|
546
|
+
label_cols=self.label_cols,
|
547
|
+
sample_weight_col=self.sample_weight_col,
|
548
|
+
autogenerated=self._autogenerated,
|
549
|
+
subproject=_SUBPROJECT,
|
550
|
+
)
|
551
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
552
|
+
drop_input_cols=self._drop_input_cols,
|
553
|
+
expected_output_cols_list=self.output_cols,
|
554
|
+
)
|
555
|
+
self._sklearn_object = fitted_estimator
|
556
|
+
self._is_fitted = True
|
557
|
+
return output_result
|
558
|
+
|
559
|
+
|
560
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
561
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
562
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
563
|
+
"""
|
564
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
565
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
566
|
+
if output_cols:
|
567
|
+
output_cols = [
|
568
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
569
|
+
for c in output_cols
|
570
|
+
]
|
571
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
572
|
+
output_cols = [output_cols_prefix]
|
573
|
+
elif self._sklearn_object is not None:
|
574
|
+
classes = self._sklearn_object.classes_
|
575
|
+
if isinstance(classes, numpy.ndarray):
|
576
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
577
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
578
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
579
|
+
output_cols = []
|
580
|
+
for i, cl in enumerate(classes):
|
581
|
+
# For binary classification, there is only one output column for each class
|
582
|
+
# ndarray as the two classes are complementary.
|
583
|
+
if len(cl) == 2:
|
584
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
585
|
+
else:
|
586
|
+
output_cols.extend([
|
587
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
588
|
+
])
|
589
|
+
else:
|
590
|
+
output_cols = []
|
591
|
+
|
592
|
+
# Make sure column names are valid snowflake identifiers.
|
593
|
+
assert output_cols is not None # Make MyPy happy
|
594
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
595
|
+
|
596
|
+
return rv
|
597
|
+
|
598
|
+
def _align_expected_output_names(
|
599
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
600
|
+
) -> List[str]:
|
601
|
+
# in case the inferred output column names dimension is different
|
602
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
603
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
604
|
+
output_df_columns = list(output_df_pd.columns)
|
605
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
606
|
+
if self.sample_weight_col:
|
607
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
608
|
+
# if the dimension of inferred output column names is correct; use it
|
609
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
610
|
+
return expected_output_cols_list
|
611
|
+
# otherwise, use the sklearn estimator's output
|
612
|
+
else:
|
613
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
548
614
|
|
549
615
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
550
616
|
@telemetry.send_api_usage_telemetry(
|
@@ -576,24 +642,26 @@ class SpectralCoclustering(BaseTransformer):
|
|
576
642
|
# are specific to the type of dataset used.
|
577
643
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
578
644
|
|
645
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
646
|
+
|
579
647
|
if isinstance(dataset, DataFrame):
|
580
|
-
self.
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
648
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
649
|
+
self._deps = self._get_dependencies()
|
650
|
+
assert isinstance(
|
651
|
+
dataset._session, Session
|
652
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
585
653
|
transform_kwargs = dict(
|
586
654
|
session=dataset._session,
|
587
655
|
dependencies=self._deps,
|
588
|
-
drop_input_cols
|
656
|
+
drop_input_cols=self._drop_input_cols,
|
589
657
|
expected_output_cols_type="float",
|
590
658
|
)
|
659
|
+
expected_output_cols = self._align_expected_output_names(
|
660
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
661
|
+
)
|
591
662
|
|
592
663
|
elif isinstance(dataset, pd.DataFrame):
|
593
|
-
transform_kwargs = dict(
|
594
|
-
snowpark_input_cols = self._snowpark_cols,
|
595
|
-
drop_input_cols = self._drop_input_cols
|
596
|
-
)
|
664
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
597
665
|
|
598
666
|
transform_handlers = ModelTransformerBuilder.build(
|
599
667
|
dataset=dataset,
|
@@ -605,7 +673,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
605
673
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
606
674
|
inference_method=inference_method,
|
607
675
|
input_cols=self.input_cols,
|
608
|
-
expected_output_cols=
|
676
|
+
expected_output_cols=expected_output_cols,
|
609
677
|
**transform_kwargs
|
610
678
|
)
|
611
679
|
return output_df
|
@@ -635,29 +703,30 @@ class SpectralCoclustering(BaseTransformer):
|
|
635
703
|
Output dataset with log probability of the sample for each class in the model.
|
636
704
|
"""
|
637
705
|
super()._check_dataset_type(dataset)
|
638
|
-
inference_method="predict_log_proba"
|
706
|
+
inference_method = "predict_log_proba"
|
707
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
639
708
|
|
640
709
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
641
710
|
# are specific to the type of dataset used.
|
642
711
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
643
712
|
|
644
713
|
if isinstance(dataset, DataFrame):
|
645
|
-
self.
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
714
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
715
|
+
self._deps = self._get_dependencies()
|
716
|
+
assert isinstance(
|
717
|
+
dataset._session, Session
|
718
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
650
719
|
transform_kwargs = dict(
|
651
720
|
session=dataset._session,
|
652
721
|
dependencies=self._deps,
|
653
|
-
drop_input_cols
|
722
|
+
drop_input_cols=self._drop_input_cols,
|
654
723
|
expected_output_cols_type="float",
|
655
724
|
)
|
725
|
+
expected_output_cols = self._align_expected_output_names(
|
726
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
727
|
+
)
|
656
728
|
elif isinstance(dataset, pd.DataFrame):
|
657
|
-
transform_kwargs = dict(
|
658
|
-
snowpark_input_cols = self._snowpark_cols,
|
659
|
-
drop_input_cols = self._drop_input_cols
|
660
|
-
)
|
729
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
661
730
|
|
662
731
|
transform_handlers = ModelTransformerBuilder.build(
|
663
732
|
dataset=dataset,
|
@@ -670,7 +739,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
670
739
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
671
740
|
inference_method=inference_method,
|
672
741
|
input_cols=self.input_cols,
|
673
|
-
expected_output_cols=
|
742
|
+
expected_output_cols=expected_output_cols,
|
674
743
|
**transform_kwargs
|
675
744
|
)
|
676
745
|
return output_df
|
@@ -696,30 +765,32 @@ class SpectralCoclustering(BaseTransformer):
|
|
696
765
|
Output dataset with results of the decision function for the samples in input dataset.
|
697
766
|
"""
|
698
767
|
super()._check_dataset_type(dataset)
|
699
|
-
inference_method="decision_function"
|
768
|
+
inference_method = "decision_function"
|
700
769
|
|
701
770
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
702
771
|
# are specific to the type of dataset used.
|
703
772
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
704
773
|
|
774
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
775
|
+
|
705
776
|
if isinstance(dataset, DataFrame):
|
706
|
-
self.
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
777
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
778
|
+
self._deps = self._get_dependencies()
|
779
|
+
assert isinstance(
|
780
|
+
dataset._session, Session
|
781
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
711
782
|
transform_kwargs = dict(
|
712
783
|
session=dataset._session,
|
713
784
|
dependencies=self._deps,
|
714
|
-
drop_input_cols
|
785
|
+
drop_input_cols=self._drop_input_cols,
|
715
786
|
expected_output_cols_type="float",
|
716
787
|
)
|
788
|
+
expected_output_cols = self._align_expected_output_names(
|
789
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
790
|
+
)
|
717
791
|
|
718
792
|
elif isinstance(dataset, pd.DataFrame):
|
719
|
-
transform_kwargs = dict(
|
720
|
-
snowpark_input_cols = self._snowpark_cols,
|
721
|
-
drop_input_cols = self._drop_input_cols
|
722
|
-
)
|
793
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
723
794
|
|
724
795
|
transform_handlers = ModelTransformerBuilder.build(
|
725
796
|
dataset=dataset,
|
@@ -732,7 +803,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
732
803
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
733
804
|
inference_method=inference_method,
|
734
805
|
input_cols=self.input_cols,
|
735
|
-
expected_output_cols=
|
806
|
+
expected_output_cols=expected_output_cols,
|
736
807
|
**transform_kwargs
|
737
808
|
)
|
738
809
|
return output_df
|
@@ -761,17 +832,17 @@ class SpectralCoclustering(BaseTransformer):
|
|
761
832
|
Output dataset with probability of the sample for each class in the model.
|
762
833
|
"""
|
763
834
|
super()._check_dataset_type(dataset)
|
764
|
-
inference_method="score_samples"
|
835
|
+
inference_method = "score_samples"
|
765
836
|
|
766
837
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
767
838
|
# are specific to the type of dataset used.
|
768
839
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
769
840
|
|
841
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
842
|
+
|
770
843
|
if isinstance(dataset, DataFrame):
|
771
|
-
self.
|
772
|
-
|
773
|
-
inference_method=inference_method,
|
774
|
-
)
|
844
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
845
|
+
self._deps = self._get_dependencies()
|
775
846
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
776
847
|
transform_kwargs = dict(
|
777
848
|
session=dataset._session,
|
@@ -779,6 +850,9 @@ class SpectralCoclustering(BaseTransformer):
|
|
779
850
|
drop_input_cols = self._drop_input_cols,
|
780
851
|
expected_output_cols_type="float",
|
781
852
|
)
|
853
|
+
expected_output_cols = self._align_expected_output_names(
|
854
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
855
|
+
)
|
782
856
|
|
783
857
|
elif isinstance(dataset, pd.DataFrame):
|
784
858
|
transform_kwargs = dict(
|
@@ -797,7 +871,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
797
871
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
798
872
|
inference_method=inference_method,
|
799
873
|
input_cols=self.input_cols,
|
800
|
-
expected_output_cols=
|
874
|
+
expected_output_cols=expected_output_cols,
|
801
875
|
**transform_kwargs
|
802
876
|
)
|
803
877
|
return output_df
|
@@ -830,17 +904,15 @@ class SpectralCoclustering(BaseTransformer):
|
|
830
904
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
831
905
|
|
832
906
|
if isinstance(dataset, DataFrame):
|
833
|
-
self.
|
834
|
-
|
835
|
-
inference_method="score",
|
836
|
-
)
|
907
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
908
|
+
self._deps = self._get_dependencies()
|
837
909
|
selected_cols = self._get_active_columns()
|
838
910
|
if len(selected_cols) > 0:
|
839
911
|
dataset = dataset.select(selected_cols)
|
840
912
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
841
913
|
transform_kwargs = dict(
|
842
914
|
session=dataset._session,
|
843
|
-
dependencies=
|
915
|
+
dependencies=self._deps,
|
844
916
|
score_sproc_imports=['sklearn'],
|
845
917
|
)
|
846
918
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -905,11 +977,8 @@ class SpectralCoclustering(BaseTransformer):
|
|
905
977
|
|
906
978
|
if isinstance(dataset, DataFrame):
|
907
979
|
|
908
|
-
self.
|
909
|
-
|
910
|
-
inference_method=inference_method,
|
911
|
-
|
912
|
-
)
|
980
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
981
|
+
self._deps = self._get_dependencies()
|
913
982
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
914
983
|
transform_kwargs = dict(
|
915
984
|
session = dataset._session,
|
@@ -942,50 +1011,84 @@ class SpectralCoclustering(BaseTransformer):
|
|
942
1011
|
)
|
943
1012
|
return output_df
|
944
1013
|
|
1014
|
+
|
1015
|
+
|
1016
|
+
def to_sklearn(self) -> Any:
|
1017
|
+
"""Get sklearn.cluster.SpectralCoclustering object.
|
1018
|
+
"""
|
1019
|
+
if self._sklearn_object is None:
|
1020
|
+
self._sklearn_object = self._create_sklearn_object()
|
1021
|
+
return self._sklearn_object
|
1022
|
+
|
1023
|
+
def to_xgboost(self) -> Any:
|
1024
|
+
raise exceptions.SnowflakeMLException(
|
1025
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1026
|
+
original_exception=AttributeError(
|
1027
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1028
|
+
"to_xgboost()",
|
1029
|
+
"to_sklearn()"
|
1030
|
+
)
|
1031
|
+
),
|
1032
|
+
)
|
1033
|
+
|
1034
|
+
def to_lightgbm(self) -> Any:
|
1035
|
+
raise exceptions.SnowflakeMLException(
|
1036
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1037
|
+
original_exception=AttributeError(
|
1038
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1039
|
+
"to_lightgbm()",
|
1040
|
+
"to_sklearn()"
|
1041
|
+
)
|
1042
|
+
),
|
1043
|
+
)
|
1044
|
+
|
1045
|
+
def _get_dependencies(self) -> List[str]:
|
1046
|
+
return self._deps
|
1047
|
+
|
945
1048
|
|
946
|
-
def
|
1049
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
947
1050
|
self._model_signature_dict = dict()
|
948
1051
|
|
949
1052
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
950
1053
|
|
951
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1054
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
952
1055
|
outputs: List[BaseFeatureSpec] = []
|
953
1056
|
if hasattr(self, "predict"):
|
954
1057
|
# keep mypy happy
|
955
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1058
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
956
1059
|
# For classifier, the type of predict is the same as the type of label
|
957
|
-
if self._sklearn_object._estimator_type ==
|
958
|
-
|
1060
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1061
|
+
# label columns is the desired type for output
|
959
1062
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
960
1063
|
# rename the output columns
|
961
1064
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
962
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
963
|
-
|
964
|
-
|
1065
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1066
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1067
|
+
)
|
965
1068
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
966
1069
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
967
|
-
# Clusterer returns int64 cluster labels.
|
1070
|
+
# Clusterer returns int64 cluster labels.
|
968
1071
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
969
1072
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
970
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
971
|
-
|
972
|
-
|
973
|
-
|
1073
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1074
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1075
|
+
)
|
1076
|
+
|
974
1077
|
# For regressor, the type of predict is float64
|
975
|
-
elif self._sklearn_object._estimator_type ==
|
1078
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
976
1079
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
977
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
978
|
-
|
979
|
-
|
980
|
-
|
1080
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1081
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1082
|
+
)
|
1083
|
+
|
981
1084
|
for prob_func in PROB_FUNCTIONS:
|
982
1085
|
if hasattr(self, prob_func):
|
983
1086
|
output_cols_prefix: str = f"{prob_func}_"
|
984
1087
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
985
1088
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
986
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
987
|
-
|
988
|
-
|
1089
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1090
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1091
|
+
)
|
989
1092
|
|
990
1093
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
991
1094
|
items = list(self._model_signature_dict.items())
|
@@ -998,10 +1101,10 @@ class SpectralCoclustering(BaseTransformer):
|
|
998
1101
|
"""Returns model signature of current class.
|
999
1102
|
|
1000
1103
|
Raises:
|
1001
|
-
|
1104
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1002
1105
|
|
1003
1106
|
Returns:
|
1004
|
-
Dict
|
1107
|
+
Dict with each method and its input output signature
|
1005
1108
|
"""
|
1006
1109
|
if self._model_signature_dict is None:
|
1007
1110
|
raise exceptions.SnowflakeMLException(
|
@@ -1009,35 +1112,3 @@ class SpectralCoclustering(BaseTransformer):
|
|
1009
1112
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1010
1113
|
)
|
1011
1114
|
return self._model_signature_dict
|
1012
|
-
|
1013
|
-
def to_sklearn(self) -> Any:
|
1014
|
-
"""Get sklearn.cluster.SpectralCoclustering object.
|
1015
|
-
"""
|
1016
|
-
if self._sklearn_object is None:
|
1017
|
-
self._sklearn_object = self._create_sklearn_object()
|
1018
|
-
return self._sklearn_object
|
1019
|
-
|
1020
|
-
def to_xgboost(self) -> Any:
|
1021
|
-
raise exceptions.SnowflakeMLException(
|
1022
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1023
|
-
original_exception=AttributeError(
|
1024
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1025
|
-
"to_xgboost()",
|
1026
|
-
"to_sklearn()"
|
1027
|
-
)
|
1028
|
-
),
|
1029
|
-
)
|
1030
|
-
|
1031
|
-
def to_lightgbm(self) -> Any:
|
1032
|
-
raise exceptions.SnowflakeMLException(
|
1033
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1034
|
-
original_exception=AttributeError(
|
1035
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1036
|
-
"to_lightgbm()",
|
1037
|
-
"to_sklearn()"
|
1038
|
-
)
|
1039
|
-
),
|
1040
|
-
)
|
1041
|
-
|
1042
|
-
def _get_dependencies(self) -> List[str]:
|
1043
|
-
return self._deps
|