snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -269,12 +268,7 @@ class FastICA(BaseTransformer):
|
|
269
268
|
)
|
270
269
|
return selected_cols
|
271
270
|
|
272
|
-
|
273
|
-
project=_PROJECT,
|
274
|
-
subproject=_SUBPROJECT,
|
275
|
-
custom_tags=dict([("autogen", True)]),
|
276
|
-
)
|
277
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "FastICA":
|
271
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "FastICA":
|
278
272
|
"""Fit the model to X
|
279
273
|
For more details on this function, see [sklearn.decomposition.FastICA.fit]
|
280
274
|
(https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FastICA.html#sklearn.decomposition.FastICA.fit)
|
@@ -301,12 +295,14 @@ class FastICA(BaseTransformer):
|
|
301
295
|
|
302
296
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
303
297
|
|
304
|
-
|
298
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
305
299
|
if SNOWML_SPROC_ENV in os.environ:
|
306
300
|
statement_params = telemetry.get_function_usage_statement_params(
|
307
301
|
project=_PROJECT,
|
308
302
|
subproject=_SUBPROJECT,
|
309
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
303
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
304
|
+
inspect.currentframe(), FastICA.__class__.__name__
|
305
|
+
),
|
310
306
|
api_calls=[Session.call],
|
311
307
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
312
308
|
)
|
@@ -327,7 +323,7 @@ class FastICA(BaseTransformer):
|
|
327
323
|
)
|
328
324
|
self._sklearn_object = model_trainer.train()
|
329
325
|
self._is_fitted = True
|
330
|
-
self.
|
326
|
+
self._generate_model_signatures(dataset)
|
331
327
|
return self
|
332
328
|
|
333
329
|
def _batch_inference_validate_snowpark(
|
@@ -401,7 +397,9 @@ class FastICA(BaseTransformer):
|
|
401
397
|
# when it is classifier, infer the datatype from label columns
|
402
398
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
403
399
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
404
|
-
label_cols_signatures = [
|
400
|
+
label_cols_signatures = [
|
401
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
402
|
+
]
|
405
403
|
if len(label_cols_signatures) == 0:
|
406
404
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
407
405
|
raise exceptions.SnowflakeMLException(
|
@@ -409,25 +407,22 @@ class FastICA(BaseTransformer):
|
|
409
407
|
original_exception=ValueError(error_str),
|
410
408
|
)
|
411
409
|
|
412
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
413
|
-
label_cols_signatures[0].as_snowpark_type()
|
414
|
-
)
|
410
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
415
411
|
|
416
412
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
417
|
-
assert isinstance(
|
413
|
+
assert isinstance(
|
414
|
+
dataset._session, Session
|
415
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
418
416
|
|
419
417
|
transform_kwargs = dict(
|
420
|
-
session
|
421
|
-
dependencies
|
422
|
-
drop_input_cols
|
423
|
-
expected_output_cols_type
|
418
|
+
session=dataset._session,
|
419
|
+
dependencies=self._deps,
|
420
|
+
drop_input_cols=self._drop_input_cols,
|
421
|
+
expected_output_cols_type=expected_type_inferred,
|
424
422
|
)
|
425
423
|
|
426
424
|
elif isinstance(dataset, pd.DataFrame):
|
427
|
-
transform_kwargs = dict(
|
428
|
-
snowpark_input_cols = self._snowpark_cols,
|
429
|
-
drop_input_cols = self._drop_input_cols
|
430
|
-
)
|
425
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
431
426
|
|
432
427
|
transform_handlers = ModelTransformerBuilder.build(
|
433
428
|
dataset=dataset,
|
@@ -469,7 +464,7 @@ class FastICA(BaseTransformer):
|
|
469
464
|
Transformed dataset.
|
470
465
|
"""
|
471
466
|
super()._check_dataset_type(dataset)
|
472
|
-
inference_method="transform"
|
467
|
+
inference_method = "transform"
|
473
468
|
|
474
469
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
475
470
|
# are specific to the type of dataset used.
|
@@ -506,17 +501,14 @@ class FastICA(BaseTransformer):
|
|
506
501
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
507
502
|
|
508
503
|
transform_kwargs = dict(
|
509
|
-
session
|
510
|
-
dependencies
|
511
|
-
drop_input_cols
|
512
|
-
expected_output_cols_type
|
504
|
+
session=dataset._session,
|
505
|
+
dependencies=self._deps,
|
506
|
+
drop_input_cols=self._drop_input_cols,
|
507
|
+
expected_output_cols_type=expected_dtype,
|
513
508
|
)
|
514
509
|
|
515
510
|
elif isinstance(dataset, pd.DataFrame):
|
516
|
-
transform_kwargs = dict(
|
517
|
-
snowpark_input_cols = self._snowpark_cols,
|
518
|
-
drop_input_cols = self._drop_input_cols
|
519
|
-
)
|
511
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
520
512
|
|
521
513
|
transform_handlers = ModelTransformerBuilder.build(
|
522
514
|
dataset=dataset,
|
@@ -535,7 +527,11 @@ class FastICA(BaseTransformer):
|
|
535
527
|
return output_df
|
536
528
|
|
537
529
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
538
|
-
def fit_predict(
|
530
|
+
def fit_predict(
|
531
|
+
self,
|
532
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
533
|
+
output_cols_prefix: str = "fit_predict_",
|
534
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
539
535
|
""" Method not supported for this class.
|
540
536
|
|
541
537
|
|
@@ -560,7 +556,9 @@ class FastICA(BaseTransformer):
|
|
560
556
|
)
|
561
557
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
562
558
|
drop_input_cols=self._drop_input_cols,
|
563
|
-
expected_output_cols_list=
|
559
|
+
expected_output_cols_list=(
|
560
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
561
|
+
),
|
564
562
|
)
|
565
563
|
self._sklearn_object = fitted_estimator
|
566
564
|
self._is_fitted = True
|
@@ -577,6 +575,62 @@ class FastICA(BaseTransformer):
|
|
577
575
|
assert self._sklearn_object is not None
|
578
576
|
return self._sklearn_object.embedding_
|
579
577
|
|
578
|
+
|
579
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
580
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
581
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
582
|
+
"""
|
583
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
584
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
585
|
+
if output_cols:
|
586
|
+
output_cols = [
|
587
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
588
|
+
for c in output_cols
|
589
|
+
]
|
590
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
591
|
+
output_cols = [output_cols_prefix]
|
592
|
+
elif self._sklearn_object is not None:
|
593
|
+
classes = self._sklearn_object.classes_
|
594
|
+
if isinstance(classes, numpy.ndarray):
|
595
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
596
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
597
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
598
|
+
output_cols = []
|
599
|
+
for i, cl in enumerate(classes):
|
600
|
+
# For binary classification, there is only one output column for each class
|
601
|
+
# ndarray as the two classes are complementary.
|
602
|
+
if len(cl) == 2:
|
603
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
604
|
+
else:
|
605
|
+
output_cols.extend([
|
606
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
607
|
+
])
|
608
|
+
else:
|
609
|
+
output_cols = []
|
610
|
+
|
611
|
+
# Make sure column names are valid snowflake identifiers.
|
612
|
+
assert output_cols is not None # Make MyPy happy
|
613
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
614
|
+
|
615
|
+
return rv
|
616
|
+
|
617
|
+
def _align_expected_output_names(
|
618
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
619
|
+
) -> List[str]:
|
620
|
+
# in case the inferred output column names dimension is different
|
621
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
622
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
623
|
+
output_df_columns = list(output_df_pd.columns)
|
624
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
625
|
+
if self.sample_weight_col:
|
626
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
627
|
+
# if the dimension of inferred output column names is correct; use it
|
628
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
629
|
+
return expected_output_cols_list
|
630
|
+
# otherwise, use the sklearn estimator's output
|
631
|
+
else:
|
632
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
633
|
+
|
580
634
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
581
635
|
@telemetry.send_api_usage_telemetry(
|
582
636
|
project=_PROJECT,
|
@@ -607,24 +661,28 @@ class FastICA(BaseTransformer):
|
|
607
661
|
# are specific to the type of dataset used.
|
608
662
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
609
663
|
|
664
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
665
|
+
|
610
666
|
if isinstance(dataset, DataFrame):
|
611
667
|
self._deps = self._batch_inference_validate_snowpark(
|
612
668
|
dataset=dataset,
|
613
669
|
inference_method=inference_method,
|
614
670
|
)
|
615
|
-
assert isinstance(
|
671
|
+
assert isinstance(
|
672
|
+
dataset._session, Session
|
673
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
616
674
|
transform_kwargs = dict(
|
617
675
|
session=dataset._session,
|
618
676
|
dependencies=self._deps,
|
619
|
-
drop_input_cols
|
677
|
+
drop_input_cols=self._drop_input_cols,
|
620
678
|
expected_output_cols_type="float",
|
621
679
|
)
|
680
|
+
expected_output_cols = self._align_expected_output_names(
|
681
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
682
|
+
)
|
622
683
|
|
623
684
|
elif isinstance(dataset, pd.DataFrame):
|
624
|
-
transform_kwargs = dict(
|
625
|
-
snowpark_input_cols = self._snowpark_cols,
|
626
|
-
drop_input_cols = self._drop_input_cols
|
627
|
-
)
|
685
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
628
686
|
|
629
687
|
transform_handlers = ModelTransformerBuilder.build(
|
630
688
|
dataset=dataset,
|
@@ -636,7 +694,7 @@ class FastICA(BaseTransformer):
|
|
636
694
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
637
695
|
inference_method=inference_method,
|
638
696
|
input_cols=self.input_cols,
|
639
|
-
expected_output_cols=
|
697
|
+
expected_output_cols=expected_output_cols,
|
640
698
|
**transform_kwargs
|
641
699
|
)
|
642
700
|
return output_df
|
@@ -666,7 +724,8 @@ class FastICA(BaseTransformer):
|
|
666
724
|
Output dataset with log probability of the sample for each class in the model.
|
667
725
|
"""
|
668
726
|
super()._check_dataset_type(dataset)
|
669
|
-
inference_method="predict_log_proba"
|
727
|
+
inference_method = "predict_log_proba"
|
728
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
670
729
|
|
671
730
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
672
731
|
# are specific to the type of dataset used.
|
@@ -677,18 +736,20 @@ class FastICA(BaseTransformer):
|
|
677
736
|
dataset=dataset,
|
678
737
|
inference_method=inference_method,
|
679
738
|
)
|
680
|
-
assert isinstance(
|
739
|
+
assert isinstance(
|
740
|
+
dataset._session, Session
|
741
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
681
742
|
transform_kwargs = dict(
|
682
743
|
session=dataset._session,
|
683
744
|
dependencies=self._deps,
|
684
|
-
drop_input_cols
|
745
|
+
drop_input_cols=self._drop_input_cols,
|
685
746
|
expected_output_cols_type="float",
|
686
747
|
)
|
748
|
+
expected_output_cols = self._align_expected_output_names(
|
749
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
750
|
+
)
|
687
751
|
elif isinstance(dataset, pd.DataFrame):
|
688
|
-
transform_kwargs = dict(
|
689
|
-
snowpark_input_cols = self._snowpark_cols,
|
690
|
-
drop_input_cols = self._drop_input_cols
|
691
|
-
)
|
752
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
692
753
|
|
693
754
|
transform_handlers = ModelTransformerBuilder.build(
|
694
755
|
dataset=dataset,
|
@@ -701,7 +762,7 @@ class FastICA(BaseTransformer):
|
|
701
762
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
702
763
|
inference_method=inference_method,
|
703
764
|
input_cols=self.input_cols,
|
704
|
-
expected_output_cols=
|
765
|
+
expected_output_cols=expected_output_cols,
|
705
766
|
**transform_kwargs
|
706
767
|
)
|
707
768
|
return output_df
|
@@ -727,30 +788,34 @@ class FastICA(BaseTransformer):
|
|
727
788
|
Output dataset with results of the decision function for the samples in input dataset.
|
728
789
|
"""
|
729
790
|
super()._check_dataset_type(dataset)
|
730
|
-
inference_method="decision_function"
|
791
|
+
inference_method = "decision_function"
|
731
792
|
|
732
793
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
733
794
|
# are specific to the type of dataset used.
|
734
795
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
735
796
|
|
797
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
798
|
+
|
736
799
|
if isinstance(dataset, DataFrame):
|
737
800
|
self._deps = self._batch_inference_validate_snowpark(
|
738
801
|
dataset=dataset,
|
739
802
|
inference_method=inference_method,
|
740
803
|
)
|
741
|
-
assert isinstance(
|
804
|
+
assert isinstance(
|
805
|
+
dataset._session, Session
|
806
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
742
807
|
transform_kwargs = dict(
|
743
808
|
session=dataset._session,
|
744
809
|
dependencies=self._deps,
|
745
|
-
drop_input_cols
|
810
|
+
drop_input_cols=self._drop_input_cols,
|
746
811
|
expected_output_cols_type="float",
|
747
812
|
)
|
813
|
+
expected_output_cols = self._align_expected_output_names(
|
814
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
815
|
+
)
|
748
816
|
|
749
817
|
elif isinstance(dataset, pd.DataFrame):
|
750
|
-
transform_kwargs = dict(
|
751
|
-
snowpark_input_cols = self._snowpark_cols,
|
752
|
-
drop_input_cols = self._drop_input_cols
|
753
|
-
)
|
818
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
754
819
|
|
755
820
|
transform_handlers = ModelTransformerBuilder.build(
|
756
821
|
dataset=dataset,
|
@@ -763,7 +828,7 @@ class FastICA(BaseTransformer):
|
|
763
828
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
764
829
|
inference_method=inference_method,
|
765
830
|
input_cols=self.input_cols,
|
766
|
-
expected_output_cols=
|
831
|
+
expected_output_cols=expected_output_cols,
|
767
832
|
**transform_kwargs
|
768
833
|
)
|
769
834
|
return output_df
|
@@ -792,12 +857,14 @@ class FastICA(BaseTransformer):
|
|
792
857
|
Output dataset with probability of the sample for each class in the model.
|
793
858
|
"""
|
794
859
|
super()._check_dataset_type(dataset)
|
795
|
-
inference_method="score_samples"
|
860
|
+
inference_method = "score_samples"
|
796
861
|
|
797
862
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
798
863
|
# are specific to the type of dataset used.
|
799
864
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
800
865
|
|
866
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
867
|
+
|
801
868
|
if isinstance(dataset, DataFrame):
|
802
869
|
self._deps = self._batch_inference_validate_snowpark(
|
803
870
|
dataset=dataset,
|
@@ -810,6 +877,9 @@ class FastICA(BaseTransformer):
|
|
810
877
|
drop_input_cols = self._drop_input_cols,
|
811
878
|
expected_output_cols_type="float",
|
812
879
|
)
|
880
|
+
expected_output_cols = self._align_expected_output_names(
|
881
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
882
|
+
)
|
813
883
|
|
814
884
|
elif isinstance(dataset, pd.DataFrame):
|
815
885
|
transform_kwargs = dict(
|
@@ -828,7 +898,7 @@ class FastICA(BaseTransformer):
|
|
828
898
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
829
899
|
inference_method=inference_method,
|
830
900
|
input_cols=self.input_cols,
|
831
|
-
expected_output_cols=
|
901
|
+
expected_output_cols=expected_output_cols,
|
832
902
|
**transform_kwargs
|
833
903
|
)
|
834
904
|
return output_df
|
@@ -973,50 +1043,84 @@ class FastICA(BaseTransformer):
|
|
973
1043
|
)
|
974
1044
|
return output_df
|
975
1045
|
|
1046
|
+
|
1047
|
+
|
1048
|
+
def to_sklearn(self) -> Any:
|
1049
|
+
"""Get sklearn.decomposition.FastICA object.
|
1050
|
+
"""
|
1051
|
+
if self._sklearn_object is None:
|
1052
|
+
self._sklearn_object = self._create_sklearn_object()
|
1053
|
+
return self._sklearn_object
|
1054
|
+
|
1055
|
+
def to_xgboost(self) -> Any:
|
1056
|
+
raise exceptions.SnowflakeMLException(
|
1057
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1058
|
+
original_exception=AttributeError(
|
1059
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1060
|
+
"to_xgboost()",
|
1061
|
+
"to_sklearn()"
|
1062
|
+
)
|
1063
|
+
),
|
1064
|
+
)
|
1065
|
+
|
1066
|
+
def to_lightgbm(self) -> Any:
|
1067
|
+
raise exceptions.SnowflakeMLException(
|
1068
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1069
|
+
original_exception=AttributeError(
|
1070
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1071
|
+
"to_lightgbm()",
|
1072
|
+
"to_sklearn()"
|
1073
|
+
)
|
1074
|
+
),
|
1075
|
+
)
|
976
1076
|
|
977
|
-
def
|
1077
|
+
def _get_dependencies(self) -> List[str]:
|
1078
|
+
return self._deps
|
1079
|
+
|
1080
|
+
|
1081
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
978
1082
|
self._model_signature_dict = dict()
|
979
1083
|
|
980
1084
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
981
1085
|
|
982
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1086
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
983
1087
|
outputs: List[BaseFeatureSpec] = []
|
984
1088
|
if hasattr(self, "predict"):
|
985
1089
|
# keep mypy happy
|
986
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1090
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
987
1091
|
# For classifier, the type of predict is the same as the type of label
|
988
|
-
if self._sklearn_object._estimator_type ==
|
989
|
-
|
1092
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1093
|
+
# label columns is the desired type for output
|
990
1094
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
991
1095
|
# rename the output columns
|
992
1096
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
993
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
994
|
-
|
995
|
-
|
1097
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1098
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1099
|
+
)
|
996
1100
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
997
1101
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
998
|
-
# Clusterer returns int64 cluster labels.
|
1102
|
+
# Clusterer returns int64 cluster labels.
|
999
1103
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1000
1104
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1001
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1105
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1106
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1107
|
+
)
|
1108
|
+
|
1005
1109
|
# For regressor, the type of predict is float64
|
1006
|
-
elif self._sklearn_object._estimator_type ==
|
1110
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1007
1111
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1008
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1112
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1113
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1114
|
+
)
|
1115
|
+
|
1012
1116
|
for prob_func in PROB_FUNCTIONS:
|
1013
1117
|
if hasattr(self, prob_func):
|
1014
1118
|
output_cols_prefix: str = f"{prob_func}_"
|
1015
1119
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1016
1120
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1017
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1018
|
-
|
1019
|
-
|
1121
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1122
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1123
|
+
)
|
1020
1124
|
|
1021
1125
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1022
1126
|
items = list(self._model_signature_dict.items())
|
@@ -1029,10 +1133,10 @@ class FastICA(BaseTransformer):
|
|
1029
1133
|
"""Returns model signature of current class.
|
1030
1134
|
|
1031
1135
|
Raises:
|
1032
|
-
|
1136
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1033
1137
|
|
1034
1138
|
Returns:
|
1035
|
-
Dict
|
1139
|
+
Dict with each method and its input output signature
|
1036
1140
|
"""
|
1037
1141
|
if self._model_signature_dict is None:
|
1038
1142
|
raise exceptions.SnowflakeMLException(
|
@@ -1040,35 +1144,3 @@ class FastICA(BaseTransformer):
|
|
1040
1144
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1041
1145
|
)
|
1042
1146
|
return self._model_signature_dict
|
1043
|
-
|
1044
|
-
def to_sklearn(self) -> Any:
|
1045
|
-
"""Get sklearn.decomposition.FastICA object.
|
1046
|
-
"""
|
1047
|
-
if self._sklearn_object is None:
|
1048
|
-
self._sklearn_object = self._create_sklearn_object()
|
1049
|
-
return self._sklearn_object
|
1050
|
-
|
1051
|
-
def to_xgboost(self) -> Any:
|
1052
|
-
raise exceptions.SnowflakeMLException(
|
1053
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1054
|
-
original_exception=AttributeError(
|
1055
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1056
|
-
"to_xgboost()",
|
1057
|
-
"to_sklearn()"
|
1058
|
-
)
|
1059
|
-
),
|
1060
|
-
)
|
1061
|
-
|
1062
|
-
def to_lightgbm(self) -> Any:
|
1063
|
-
raise exceptions.SnowflakeMLException(
|
1064
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1065
|
-
original_exception=AttributeError(
|
1066
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1067
|
-
"to_lightgbm()",
|
1068
|
-
"to_sklearn()"
|
1069
|
-
)
|
1070
|
-
),
|
1071
|
-
)
|
1072
|
-
|
1073
|
-
def _get_dependencies(self) -> List[str]:
|
1074
|
-
return self._deps
|