snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.decomposition".replace("
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class FastICA(BaseTransformer):
|
71
64
|
r"""FastICA: a fast algorithm for Independent Component Analysis
|
72
65
|
For more details on this class, see [sklearn.decomposition.FastICA]
|
@@ -269,12 +262,7 @@ class FastICA(BaseTransformer):
|
|
269
262
|
)
|
270
263
|
return selected_cols
|
271
264
|
|
272
|
-
|
273
|
-
project=_PROJECT,
|
274
|
-
subproject=_SUBPROJECT,
|
275
|
-
custom_tags=dict([("autogen", True)]),
|
276
|
-
)
|
277
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "FastICA":
|
265
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "FastICA":
|
278
266
|
"""Fit the model to X
|
279
267
|
For more details on this function, see [sklearn.decomposition.FastICA.fit]
|
280
268
|
(https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FastICA.html#sklearn.decomposition.FastICA.fit)
|
@@ -301,12 +289,14 @@ class FastICA(BaseTransformer):
|
|
301
289
|
|
302
290
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
303
291
|
|
304
|
-
|
292
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
305
293
|
if SNOWML_SPROC_ENV in os.environ:
|
306
294
|
statement_params = telemetry.get_function_usage_statement_params(
|
307
295
|
project=_PROJECT,
|
308
296
|
subproject=_SUBPROJECT,
|
309
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
297
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
298
|
+
inspect.currentframe(), FastICA.__class__.__name__
|
299
|
+
),
|
310
300
|
api_calls=[Session.call],
|
311
301
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
312
302
|
)
|
@@ -327,27 +317,24 @@ class FastICA(BaseTransformer):
|
|
327
317
|
)
|
328
318
|
self._sklearn_object = model_trainer.train()
|
329
319
|
self._is_fitted = True
|
330
|
-
self.
|
320
|
+
self._generate_model_signatures(dataset)
|
331
321
|
return self
|
332
322
|
|
333
323
|
def _batch_inference_validate_snowpark(
|
334
324
|
self,
|
335
325
|
dataset: DataFrame,
|
336
326
|
inference_method: str,
|
337
|
-
) ->
|
338
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
339
|
-
return the available package that exists in the snowflake anaconda channel
|
327
|
+
) -> None:
|
328
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
340
329
|
|
341
330
|
Args:
|
342
331
|
dataset: snowpark dataframe
|
343
332
|
inference_method: the inference method such as predict, score...
|
344
|
-
|
333
|
+
|
345
334
|
Raises:
|
346
335
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
347
336
|
SnowflakeMLException: If the session is None, raise error
|
348
337
|
|
349
|
-
Returns:
|
350
|
-
A list of available package that exists in the snowflake anaconda channel
|
351
338
|
"""
|
352
339
|
if not self._is_fitted:
|
353
340
|
raise exceptions.SnowflakeMLException(
|
@@ -365,9 +352,7 @@ class FastICA(BaseTransformer):
|
|
365
352
|
"Session must not specified for snowpark dataset."
|
366
353
|
),
|
367
354
|
)
|
368
|
-
|
369
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
370
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
355
|
+
|
371
356
|
|
372
357
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
373
358
|
@telemetry.send_api_usage_telemetry(
|
@@ -401,7 +386,9 @@ class FastICA(BaseTransformer):
|
|
401
386
|
# when it is classifier, infer the datatype from label columns
|
402
387
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
403
388
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
404
|
-
label_cols_signatures = [
|
389
|
+
label_cols_signatures = [
|
390
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
391
|
+
]
|
405
392
|
if len(label_cols_signatures) == 0:
|
406
393
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
407
394
|
raise exceptions.SnowflakeMLException(
|
@@ -409,25 +396,23 @@ class FastICA(BaseTransformer):
|
|
409
396
|
original_exception=ValueError(error_str),
|
410
397
|
)
|
411
398
|
|
412
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
413
|
-
label_cols_signatures[0].as_snowpark_type()
|
414
|
-
)
|
399
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
415
400
|
|
416
|
-
self.
|
417
|
-
|
401
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
402
|
+
self._deps = self._get_dependencies()
|
403
|
+
assert isinstance(
|
404
|
+
dataset._session, Session
|
405
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
418
406
|
|
419
407
|
transform_kwargs = dict(
|
420
|
-
session
|
421
|
-
dependencies
|
422
|
-
drop_input_cols
|
423
|
-
expected_output_cols_type
|
408
|
+
session=dataset._session,
|
409
|
+
dependencies=self._deps,
|
410
|
+
drop_input_cols=self._drop_input_cols,
|
411
|
+
expected_output_cols_type=expected_type_inferred,
|
424
412
|
)
|
425
413
|
|
426
414
|
elif isinstance(dataset, pd.DataFrame):
|
427
|
-
transform_kwargs = dict(
|
428
|
-
snowpark_input_cols = self._snowpark_cols,
|
429
|
-
drop_input_cols = self._drop_input_cols
|
430
|
-
)
|
415
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
431
416
|
|
432
417
|
transform_handlers = ModelTransformerBuilder.build(
|
433
418
|
dataset=dataset,
|
@@ -469,7 +454,7 @@ class FastICA(BaseTransformer):
|
|
469
454
|
Transformed dataset.
|
470
455
|
"""
|
471
456
|
super()._check_dataset_type(dataset)
|
472
|
-
inference_method="transform"
|
457
|
+
inference_method = "transform"
|
473
458
|
|
474
459
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
475
460
|
# are specific to the type of dataset used.
|
@@ -499,24 +484,19 @@ class FastICA(BaseTransformer):
|
|
499
484
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
500
485
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
501
486
|
|
502
|
-
self.
|
503
|
-
|
504
|
-
inference_method=inference_method,
|
505
|
-
)
|
487
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
488
|
+
self._deps = self._get_dependencies()
|
506
489
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
507
490
|
|
508
491
|
transform_kwargs = dict(
|
509
|
-
session
|
510
|
-
dependencies
|
511
|
-
drop_input_cols
|
512
|
-
expected_output_cols_type
|
492
|
+
session=dataset._session,
|
493
|
+
dependencies=self._deps,
|
494
|
+
drop_input_cols=self._drop_input_cols,
|
495
|
+
expected_output_cols_type=expected_dtype,
|
513
496
|
)
|
514
497
|
|
515
498
|
elif isinstance(dataset, pd.DataFrame):
|
516
|
-
transform_kwargs = dict(
|
517
|
-
snowpark_input_cols = self._snowpark_cols,
|
518
|
-
drop_input_cols = self._drop_input_cols
|
519
|
-
)
|
499
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
520
500
|
|
521
501
|
transform_handlers = ModelTransformerBuilder.build(
|
522
502
|
dataset=dataset,
|
@@ -535,7 +515,11 @@ class FastICA(BaseTransformer):
|
|
535
515
|
return output_df
|
536
516
|
|
537
517
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
538
|
-
def fit_predict(
|
518
|
+
def fit_predict(
|
519
|
+
self,
|
520
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
521
|
+
output_cols_prefix: str = "fit_predict_",
|
522
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
539
523
|
""" Method not supported for this class.
|
540
524
|
|
541
525
|
|
@@ -560,22 +544,106 @@ class FastICA(BaseTransformer):
|
|
560
544
|
)
|
561
545
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
562
546
|
drop_input_cols=self._drop_input_cols,
|
563
|
-
expected_output_cols_list=
|
547
|
+
expected_output_cols_list=(
|
548
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
549
|
+
),
|
564
550
|
)
|
565
551
|
self._sklearn_object = fitted_estimator
|
566
552
|
self._is_fitted = True
|
567
553
|
return output_result
|
568
554
|
|
555
|
+
|
556
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
557
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
558
|
+
""" Fit the model and recover the sources from X
|
559
|
+
For more details on this function, see [sklearn.decomposition.FastICA.fit_transform]
|
560
|
+
(https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FastICA.html#sklearn.decomposition.FastICA.fit_transform)
|
561
|
+
|
562
|
+
|
563
|
+
Raises:
|
564
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
569
565
|
|
570
|
-
|
571
|
-
|
572
|
-
|
566
|
+
Args:
|
567
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
568
|
+
Snowpark or Pandas DataFrame.
|
569
|
+
output_cols_prefix: Prefix for the response columns
|
573
570
|
Returns:
|
574
571
|
Transformed dataset.
|
575
572
|
"""
|
576
|
-
self.
|
577
|
-
|
578
|
-
|
573
|
+
self._infer_input_output_cols(dataset)
|
574
|
+
super()._check_dataset_type(dataset)
|
575
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
576
|
+
estimator=self._sklearn_object,
|
577
|
+
dataset=dataset,
|
578
|
+
input_cols=self.input_cols,
|
579
|
+
label_cols=self.label_cols,
|
580
|
+
sample_weight_col=self.sample_weight_col,
|
581
|
+
autogenerated=self._autogenerated,
|
582
|
+
subproject=_SUBPROJECT,
|
583
|
+
)
|
584
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
585
|
+
drop_input_cols=self._drop_input_cols,
|
586
|
+
expected_output_cols_list=self.output_cols,
|
587
|
+
)
|
588
|
+
self._sklearn_object = fitted_estimator
|
589
|
+
self._is_fitted = True
|
590
|
+
return output_result
|
591
|
+
|
592
|
+
|
593
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
594
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
595
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
596
|
+
"""
|
597
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
598
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
599
|
+
if output_cols:
|
600
|
+
output_cols = [
|
601
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
602
|
+
for c in output_cols
|
603
|
+
]
|
604
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
605
|
+
output_cols = [output_cols_prefix]
|
606
|
+
elif self._sklearn_object is not None:
|
607
|
+
classes = self._sklearn_object.classes_
|
608
|
+
if isinstance(classes, numpy.ndarray):
|
609
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
610
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
611
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
612
|
+
output_cols = []
|
613
|
+
for i, cl in enumerate(classes):
|
614
|
+
# For binary classification, there is only one output column for each class
|
615
|
+
# ndarray as the two classes are complementary.
|
616
|
+
if len(cl) == 2:
|
617
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
618
|
+
else:
|
619
|
+
output_cols.extend([
|
620
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
621
|
+
])
|
622
|
+
else:
|
623
|
+
output_cols = []
|
624
|
+
|
625
|
+
# Make sure column names are valid snowflake identifiers.
|
626
|
+
assert output_cols is not None # Make MyPy happy
|
627
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
628
|
+
|
629
|
+
return rv
|
630
|
+
|
631
|
+
def _align_expected_output_names(
|
632
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
633
|
+
) -> List[str]:
|
634
|
+
# in case the inferred output column names dimension is different
|
635
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
636
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
637
|
+
output_df_columns = list(output_df_pd.columns)
|
638
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
639
|
+
if self.sample_weight_col:
|
640
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
641
|
+
# if the dimension of inferred output column names is correct; use it
|
642
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
643
|
+
return expected_output_cols_list
|
644
|
+
# otherwise, use the sklearn estimator's output
|
645
|
+
else:
|
646
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
579
647
|
|
580
648
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
581
649
|
@telemetry.send_api_usage_telemetry(
|
@@ -607,24 +675,26 @@ class FastICA(BaseTransformer):
|
|
607
675
|
# are specific to the type of dataset used.
|
608
676
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
609
677
|
|
678
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
679
|
+
|
610
680
|
if isinstance(dataset, DataFrame):
|
611
|
-
self.
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
681
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
682
|
+
self._deps = self._get_dependencies()
|
683
|
+
assert isinstance(
|
684
|
+
dataset._session, Session
|
685
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
616
686
|
transform_kwargs = dict(
|
617
687
|
session=dataset._session,
|
618
688
|
dependencies=self._deps,
|
619
|
-
drop_input_cols
|
689
|
+
drop_input_cols=self._drop_input_cols,
|
620
690
|
expected_output_cols_type="float",
|
621
691
|
)
|
692
|
+
expected_output_cols = self._align_expected_output_names(
|
693
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
694
|
+
)
|
622
695
|
|
623
696
|
elif isinstance(dataset, pd.DataFrame):
|
624
|
-
transform_kwargs = dict(
|
625
|
-
snowpark_input_cols = self._snowpark_cols,
|
626
|
-
drop_input_cols = self._drop_input_cols
|
627
|
-
)
|
697
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
628
698
|
|
629
699
|
transform_handlers = ModelTransformerBuilder.build(
|
630
700
|
dataset=dataset,
|
@@ -636,7 +706,7 @@ class FastICA(BaseTransformer):
|
|
636
706
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
637
707
|
inference_method=inference_method,
|
638
708
|
input_cols=self.input_cols,
|
639
|
-
expected_output_cols=
|
709
|
+
expected_output_cols=expected_output_cols,
|
640
710
|
**transform_kwargs
|
641
711
|
)
|
642
712
|
return output_df
|
@@ -666,29 +736,30 @@ class FastICA(BaseTransformer):
|
|
666
736
|
Output dataset with log probability of the sample for each class in the model.
|
667
737
|
"""
|
668
738
|
super()._check_dataset_type(dataset)
|
669
|
-
inference_method="predict_log_proba"
|
739
|
+
inference_method = "predict_log_proba"
|
740
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
670
741
|
|
671
742
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
672
743
|
# are specific to the type of dataset used.
|
673
744
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
674
745
|
|
675
746
|
if isinstance(dataset, DataFrame):
|
676
|
-
self.
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
747
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
748
|
+
self._deps = self._get_dependencies()
|
749
|
+
assert isinstance(
|
750
|
+
dataset._session, Session
|
751
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
681
752
|
transform_kwargs = dict(
|
682
753
|
session=dataset._session,
|
683
754
|
dependencies=self._deps,
|
684
|
-
drop_input_cols
|
755
|
+
drop_input_cols=self._drop_input_cols,
|
685
756
|
expected_output_cols_type="float",
|
686
757
|
)
|
758
|
+
expected_output_cols = self._align_expected_output_names(
|
759
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
760
|
+
)
|
687
761
|
elif isinstance(dataset, pd.DataFrame):
|
688
|
-
transform_kwargs = dict(
|
689
|
-
snowpark_input_cols = self._snowpark_cols,
|
690
|
-
drop_input_cols = self._drop_input_cols
|
691
|
-
)
|
762
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
692
763
|
|
693
764
|
transform_handlers = ModelTransformerBuilder.build(
|
694
765
|
dataset=dataset,
|
@@ -701,7 +772,7 @@ class FastICA(BaseTransformer):
|
|
701
772
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
702
773
|
inference_method=inference_method,
|
703
774
|
input_cols=self.input_cols,
|
704
|
-
expected_output_cols=
|
775
|
+
expected_output_cols=expected_output_cols,
|
705
776
|
**transform_kwargs
|
706
777
|
)
|
707
778
|
return output_df
|
@@ -727,30 +798,32 @@ class FastICA(BaseTransformer):
|
|
727
798
|
Output dataset with results of the decision function for the samples in input dataset.
|
728
799
|
"""
|
729
800
|
super()._check_dataset_type(dataset)
|
730
|
-
inference_method="decision_function"
|
801
|
+
inference_method = "decision_function"
|
731
802
|
|
732
803
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
733
804
|
# are specific to the type of dataset used.
|
734
805
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
735
806
|
|
807
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
808
|
+
|
736
809
|
if isinstance(dataset, DataFrame):
|
737
|
-
self.
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
810
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
811
|
+
self._deps = self._get_dependencies()
|
812
|
+
assert isinstance(
|
813
|
+
dataset._session, Session
|
814
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
742
815
|
transform_kwargs = dict(
|
743
816
|
session=dataset._session,
|
744
817
|
dependencies=self._deps,
|
745
|
-
drop_input_cols
|
818
|
+
drop_input_cols=self._drop_input_cols,
|
746
819
|
expected_output_cols_type="float",
|
747
820
|
)
|
821
|
+
expected_output_cols = self._align_expected_output_names(
|
822
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
823
|
+
)
|
748
824
|
|
749
825
|
elif isinstance(dataset, pd.DataFrame):
|
750
|
-
transform_kwargs = dict(
|
751
|
-
snowpark_input_cols = self._snowpark_cols,
|
752
|
-
drop_input_cols = self._drop_input_cols
|
753
|
-
)
|
826
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
754
827
|
|
755
828
|
transform_handlers = ModelTransformerBuilder.build(
|
756
829
|
dataset=dataset,
|
@@ -763,7 +836,7 @@ class FastICA(BaseTransformer):
|
|
763
836
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
764
837
|
inference_method=inference_method,
|
765
838
|
input_cols=self.input_cols,
|
766
|
-
expected_output_cols=
|
839
|
+
expected_output_cols=expected_output_cols,
|
767
840
|
**transform_kwargs
|
768
841
|
)
|
769
842
|
return output_df
|
@@ -792,17 +865,17 @@ class FastICA(BaseTransformer):
|
|
792
865
|
Output dataset with probability of the sample for each class in the model.
|
793
866
|
"""
|
794
867
|
super()._check_dataset_type(dataset)
|
795
|
-
inference_method="score_samples"
|
868
|
+
inference_method = "score_samples"
|
796
869
|
|
797
870
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
798
871
|
# are specific to the type of dataset used.
|
799
872
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
800
873
|
|
874
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
875
|
+
|
801
876
|
if isinstance(dataset, DataFrame):
|
802
|
-
self.
|
803
|
-
|
804
|
-
inference_method=inference_method,
|
805
|
-
)
|
877
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
878
|
+
self._deps = self._get_dependencies()
|
806
879
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
807
880
|
transform_kwargs = dict(
|
808
881
|
session=dataset._session,
|
@@ -810,6 +883,9 @@ class FastICA(BaseTransformer):
|
|
810
883
|
drop_input_cols = self._drop_input_cols,
|
811
884
|
expected_output_cols_type="float",
|
812
885
|
)
|
886
|
+
expected_output_cols = self._align_expected_output_names(
|
887
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
888
|
+
)
|
813
889
|
|
814
890
|
elif isinstance(dataset, pd.DataFrame):
|
815
891
|
transform_kwargs = dict(
|
@@ -828,7 +904,7 @@ class FastICA(BaseTransformer):
|
|
828
904
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
829
905
|
inference_method=inference_method,
|
830
906
|
input_cols=self.input_cols,
|
831
|
-
expected_output_cols=
|
907
|
+
expected_output_cols=expected_output_cols,
|
832
908
|
**transform_kwargs
|
833
909
|
)
|
834
910
|
return output_df
|
@@ -861,17 +937,15 @@ class FastICA(BaseTransformer):
|
|
861
937
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
862
938
|
|
863
939
|
if isinstance(dataset, DataFrame):
|
864
|
-
self.
|
865
|
-
|
866
|
-
inference_method="score",
|
867
|
-
)
|
940
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
941
|
+
self._deps = self._get_dependencies()
|
868
942
|
selected_cols = self._get_active_columns()
|
869
943
|
if len(selected_cols) > 0:
|
870
944
|
dataset = dataset.select(selected_cols)
|
871
945
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
872
946
|
transform_kwargs = dict(
|
873
947
|
session=dataset._session,
|
874
|
-
dependencies=
|
948
|
+
dependencies=self._deps,
|
875
949
|
score_sproc_imports=['sklearn'],
|
876
950
|
)
|
877
951
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -936,11 +1010,8 @@ class FastICA(BaseTransformer):
|
|
936
1010
|
|
937
1011
|
if isinstance(dataset, DataFrame):
|
938
1012
|
|
939
|
-
self.
|
940
|
-
|
941
|
-
inference_method=inference_method,
|
942
|
-
|
943
|
-
)
|
1013
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1014
|
+
self._deps = self._get_dependencies()
|
944
1015
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
945
1016
|
transform_kwargs = dict(
|
946
1017
|
session = dataset._session,
|
@@ -973,50 +1044,84 @@ class FastICA(BaseTransformer):
|
|
973
1044
|
)
|
974
1045
|
return output_df
|
975
1046
|
|
1047
|
+
|
1048
|
+
|
1049
|
+
def to_sklearn(self) -> Any:
|
1050
|
+
"""Get sklearn.decomposition.FastICA object.
|
1051
|
+
"""
|
1052
|
+
if self._sklearn_object is None:
|
1053
|
+
self._sklearn_object = self._create_sklearn_object()
|
1054
|
+
return self._sklearn_object
|
1055
|
+
|
1056
|
+
def to_xgboost(self) -> Any:
|
1057
|
+
raise exceptions.SnowflakeMLException(
|
1058
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1059
|
+
original_exception=AttributeError(
|
1060
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1061
|
+
"to_xgboost()",
|
1062
|
+
"to_sklearn()"
|
1063
|
+
)
|
1064
|
+
),
|
1065
|
+
)
|
976
1066
|
|
977
|
-
def
|
1067
|
+
def to_lightgbm(self) -> Any:
|
1068
|
+
raise exceptions.SnowflakeMLException(
|
1069
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1070
|
+
original_exception=AttributeError(
|
1071
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1072
|
+
"to_lightgbm()",
|
1073
|
+
"to_sklearn()"
|
1074
|
+
)
|
1075
|
+
),
|
1076
|
+
)
|
1077
|
+
|
1078
|
+
def _get_dependencies(self) -> List[str]:
|
1079
|
+
return self._deps
|
1080
|
+
|
1081
|
+
|
1082
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
978
1083
|
self._model_signature_dict = dict()
|
979
1084
|
|
980
1085
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
981
1086
|
|
982
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1087
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
983
1088
|
outputs: List[BaseFeatureSpec] = []
|
984
1089
|
if hasattr(self, "predict"):
|
985
1090
|
# keep mypy happy
|
986
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1091
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
987
1092
|
# For classifier, the type of predict is the same as the type of label
|
988
|
-
if self._sklearn_object._estimator_type ==
|
989
|
-
|
1093
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1094
|
+
# label columns is the desired type for output
|
990
1095
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
991
1096
|
# rename the output columns
|
992
1097
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
993
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
994
|
-
|
995
|
-
|
1098
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1099
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1100
|
+
)
|
996
1101
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
997
1102
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
998
|
-
# Clusterer returns int64 cluster labels.
|
1103
|
+
# Clusterer returns int64 cluster labels.
|
999
1104
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1000
1105
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1001
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1106
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1107
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1108
|
+
)
|
1109
|
+
|
1005
1110
|
# For regressor, the type of predict is float64
|
1006
|
-
elif self._sklearn_object._estimator_type ==
|
1111
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1007
1112
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1008
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1113
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1114
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1115
|
+
)
|
1116
|
+
|
1012
1117
|
for prob_func in PROB_FUNCTIONS:
|
1013
1118
|
if hasattr(self, prob_func):
|
1014
1119
|
output_cols_prefix: str = f"{prob_func}_"
|
1015
1120
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1016
1121
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1017
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1018
|
-
|
1019
|
-
|
1122
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1123
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1124
|
+
)
|
1020
1125
|
|
1021
1126
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1022
1127
|
items = list(self._model_signature_dict.items())
|
@@ -1029,10 +1134,10 @@ class FastICA(BaseTransformer):
|
|
1029
1134
|
"""Returns model signature of current class.
|
1030
1135
|
|
1031
1136
|
Raises:
|
1032
|
-
|
1137
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1033
1138
|
|
1034
1139
|
Returns:
|
1035
|
-
Dict
|
1140
|
+
Dict with each method and its input output signature
|
1036
1141
|
"""
|
1037
1142
|
if self._model_signature_dict is None:
|
1038
1143
|
raise exceptions.SnowflakeMLException(
|
@@ -1040,35 +1145,3 @@ class FastICA(BaseTransformer):
|
|
1040
1145
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1041
1146
|
)
|
1042
1147
|
return self._model_signature_dict
|
1043
|
-
|
1044
|
-
def to_sklearn(self) -> Any:
|
1045
|
-
"""Get sklearn.decomposition.FastICA object.
|
1046
|
-
"""
|
1047
|
-
if self._sklearn_object is None:
|
1048
|
-
self._sklearn_object = self._create_sklearn_object()
|
1049
|
-
return self._sklearn_object
|
1050
|
-
|
1051
|
-
def to_xgboost(self) -> Any:
|
1052
|
-
raise exceptions.SnowflakeMLException(
|
1053
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1054
|
-
original_exception=AttributeError(
|
1055
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1056
|
-
"to_xgboost()",
|
1057
|
-
"to_sklearn()"
|
1058
|
-
)
|
1059
|
-
),
|
1060
|
-
)
|
1061
|
-
|
1062
|
-
def to_lightgbm(self) -> Any:
|
1063
|
-
raise exceptions.SnowflakeMLException(
|
1064
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1065
|
-
original_exception=AttributeError(
|
1066
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1067
|
-
"to_lightgbm()",
|
1068
|
-
"to_sklearn()"
|
1069
|
-
)
|
1070
|
-
),
|
1071
|
-
)
|
1072
|
-
|
1073
|
-
def _get_dependencies(self) -> List[str]:
|
1074
|
-
return self._deps
|