snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -242,12 +241,7 @@ class AffinityPropagation(BaseTransformer):
|
|
242
241
|
)
|
243
242
|
return selected_cols
|
244
243
|
|
245
|
-
|
246
|
-
project=_PROJECT,
|
247
|
-
subproject=_SUBPROJECT,
|
248
|
-
custom_tags=dict([("autogen", True)]),
|
249
|
-
)
|
250
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "AffinityPropagation":
|
244
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "AffinityPropagation":
|
251
245
|
"""Fit the clustering from features, or affinity matrix
|
252
246
|
For more details on this function, see [sklearn.cluster.AffinityPropagation.fit]
|
253
247
|
(https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AffinityPropagation.html#sklearn.cluster.AffinityPropagation.fit)
|
@@ -274,12 +268,14 @@ class AffinityPropagation(BaseTransformer):
|
|
274
268
|
|
275
269
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
276
270
|
|
277
|
-
|
271
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
278
272
|
if SNOWML_SPROC_ENV in os.environ:
|
279
273
|
statement_params = telemetry.get_function_usage_statement_params(
|
280
274
|
project=_PROJECT,
|
281
275
|
subproject=_SUBPROJECT,
|
282
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
276
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
277
|
+
inspect.currentframe(), AffinityPropagation.__class__.__name__
|
278
|
+
),
|
283
279
|
api_calls=[Session.call],
|
284
280
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
285
281
|
)
|
@@ -300,7 +296,7 @@ class AffinityPropagation(BaseTransformer):
|
|
300
296
|
)
|
301
297
|
self._sklearn_object = model_trainer.train()
|
302
298
|
self._is_fitted = True
|
303
|
-
self.
|
299
|
+
self._generate_model_signatures(dataset)
|
304
300
|
return self
|
305
301
|
|
306
302
|
def _batch_inference_validate_snowpark(
|
@@ -376,7 +372,9 @@ class AffinityPropagation(BaseTransformer):
|
|
376
372
|
# when it is classifier, infer the datatype from label columns
|
377
373
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
378
374
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
379
|
-
label_cols_signatures = [
|
375
|
+
label_cols_signatures = [
|
376
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
377
|
+
]
|
380
378
|
if len(label_cols_signatures) == 0:
|
381
379
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
382
380
|
raise exceptions.SnowflakeMLException(
|
@@ -384,25 +382,22 @@ class AffinityPropagation(BaseTransformer):
|
|
384
382
|
original_exception=ValueError(error_str),
|
385
383
|
)
|
386
384
|
|
387
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
388
|
-
label_cols_signatures[0].as_snowpark_type()
|
389
|
-
)
|
385
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
390
386
|
|
391
387
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
392
|
-
assert isinstance(
|
388
|
+
assert isinstance(
|
389
|
+
dataset._session, Session
|
390
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
393
391
|
|
394
392
|
transform_kwargs = dict(
|
395
|
-
session
|
396
|
-
dependencies
|
397
|
-
drop_input_cols
|
398
|
-
expected_output_cols_type
|
393
|
+
session=dataset._session,
|
394
|
+
dependencies=self._deps,
|
395
|
+
drop_input_cols=self._drop_input_cols,
|
396
|
+
expected_output_cols_type=expected_type_inferred,
|
399
397
|
)
|
400
398
|
|
401
399
|
elif isinstance(dataset, pd.DataFrame):
|
402
|
-
transform_kwargs = dict(
|
403
|
-
snowpark_input_cols = self._snowpark_cols,
|
404
|
-
drop_input_cols = self._drop_input_cols
|
405
|
-
)
|
400
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
406
401
|
|
407
402
|
transform_handlers = ModelTransformerBuilder.build(
|
408
403
|
dataset=dataset,
|
@@ -442,7 +437,7 @@ class AffinityPropagation(BaseTransformer):
|
|
442
437
|
Transformed dataset.
|
443
438
|
"""
|
444
439
|
super()._check_dataset_type(dataset)
|
445
|
-
inference_method="transform"
|
440
|
+
inference_method = "transform"
|
446
441
|
|
447
442
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
448
443
|
# are specific to the type of dataset used.
|
@@ -479,17 +474,14 @@ class AffinityPropagation(BaseTransformer):
|
|
479
474
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
480
475
|
|
481
476
|
transform_kwargs = dict(
|
482
|
-
session
|
483
|
-
dependencies
|
484
|
-
drop_input_cols
|
485
|
-
expected_output_cols_type
|
477
|
+
session=dataset._session,
|
478
|
+
dependencies=self._deps,
|
479
|
+
drop_input_cols=self._drop_input_cols,
|
480
|
+
expected_output_cols_type=expected_dtype,
|
486
481
|
)
|
487
482
|
|
488
483
|
elif isinstance(dataset, pd.DataFrame):
|
489
|
-
transform_kwargs = dict(
|
490
|
-
snowpark_input_cols = self._snowpark_cols,
|
491
|
-
drop_input_cols = self._drop_input_cols
|
492
|
-
)
|
484
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
493
485
|
|
494
486
|
transform_handlers = ModelTransformerBuilder.build(
|
495
487
|
dataset=dataset,
|
@@ -508,7 +500,11 @@ class AffinityPropagation(BaseTransformer):
|
|
508
500
|
return output_df
|
509
501
|
|
510
502
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
511
|
-
def fit_predict(
|
503
|
+
def fit_predict(
|
504
|
+
self,
|
505
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
506
|
+
output_cols_prefix: str = "fit_predict_",
|
507
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
512
508
|
""" Fit clustering from features/affinity matrix; return cluster labels
|
513
509
|
For more details on this function, see [sklearn.cluster.AffinityPropagation.fit_predict]
|
514
510
|
(https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AffinityPropagation.html#sklearn.cluster.AffinityPropagation.fit_predict)
|
@@ -535,7 +531,9 @@ class AffinityPropagation(BaseTransformer):
|
|
535
531
|
)
|
536
532
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
537
533
|
drop_input_cols=self._drop_input_cols,
|
538
|
-
expected_output_cols_list=
|
534
|
+
expected_output_cols_list=(
|
535
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
536
|
+
),
|
539
537
|
)
|
540
538
|
self._sklearn_object = fitted_estimator
|
541
539
|
self._is_fitted = True
|
@@ -552,6 +550,62 @@ class AffinityPropagation(BaseTransformer):
|
|
552
550
|
assert self._sklearn_object is not None
|
553
551
|
return self._sklearn_object.embedding_
|
554
552
|
|
553
|
+
|
554
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
555
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
556
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
557
|
+
"""
|
558
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
559
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
560
|
+
if output_cols:
|
561
|
+
output_cols = [
|
562
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
563
|
+
for c in output_cols
|
564
|
+
]
|
565
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
566
|
+
output_cols = [output_cols_prefix]
|
567
|
+
elif self._sklearn_object is not None:
|
568
|
+
classes = self._sklearn_object.classes_
|
569
|
+
if isinstance(classes, numpy.ndarray):
|
570
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
571
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
572
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
573
|
+
output_cols = []
|
574
|
+
for i, cl in enumerate(classes):
|
575
|
+
# For binary classification, there is only one output column for each class
|
576
|
+
# ndarray as the two classes are complementary.
|
577
|
+
if len(cl) == 2:
|
578
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
579
|
+
else:
|
580
|
+
output_cols.extend([
|
581
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
582
|
+
])
|
583
|
+
else:
|
584
|
+
output_cols = []
|
585
|
+
|
586
|
+
# Make sure column names are valid snowflake identifiers.
|
587
|
+
assert output_cols is not None # Make MyPy happy
|
588
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
589
|
+
|
590
|
+
return rv
|
591
|
+
|
592
|
+
def _align_expected_output_names(
|
593
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
594
|
+
) -> List[str]:
|
595
|
+
# in case the inferred output column names dimension is different
|
596
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
597
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
598
|
+
output_df_columns = list(output_df_pd.columns)
|
599
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
600
|
+
if self.sample_weight_col:
|
601
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
602
|
+
# if the dimension of inferred output column names is correct; use it
|
603
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
604
|
+
return expected_output_cols_list
|
605
|
+
# otherwise, use the sklearn estimator's output
|
606
|
+
else:
|
607
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
608
|
+
|
555
609
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
556
610
|
@telemetry.send_api_usage_telemetry(
|
557
611
|
project=_PROJECT,
|
@@ -582,24 +636,28 @@ class AffinityPropagation(BaseTransformer):
|
|
582
636
|
# are specific to the type of dataset used.
|
583
637
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
584
638
|
|
639
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
640
|
+
|
585
641
|
if isinstance(dataset, DataFrame):
|
586
642
|
self._deps = self._batch_inference_validate_snowpark(
|
587
643
|
dataset=dataset,
|
588
644
|
inference_method=inference_method,
|
589
645
|
)
|
590
|
-
assert isinstance(
|
646
|
+
assert isinstance(
|
647
|
+
dataset._session, Session
|
648
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
591
649
|
transform_kwargs = dict(
|
592
650
|
session=dataset._session,
|
593
651
|
dependencies=self._deps,
|
594
|
-
drop_input_cols
|
652
|
+
drop_input_cols=self._drop_input_cols,
|
595
653
|
expected_output_cols_type="float",
|
596
654
|
)
|
655
|
+
expected_output_cols = self._align_expected_output_names(
|
656
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
657
|
+
)
|
597
658
|
|
598
659
|
elif isinstance(dataset, pd.DataFrame):
|
599
|
-
transform_kwargs = dict(
|
600
|
-
snowpark_input_cols = self._snowpark_cols,
|
601
|
-
drop_input_cols = self._drop_input_cols
|
602
|
-
)
|
660
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
603
661
|
|
604
662
|
transform_handlers = ModelTransformerBuilder.build(
|
605
663
|
dataset=dataset,
|
@@ -611,7 +669,7 @@ class AffinityPropagation(BaseTransformer):
|
|
611
669
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
612
670
|
inference_method=inference_method,
|
613
671
|
input_cols=self.input_cols,
|
614
|
-
expected_output_cols=
|
672
|
+
expected_output_cols=expected_output_cols,
|
615
673
|
**transform_kwargs
|
616
674
|
)
|
617
675
|
return output_df
|
@@ -641,7 +699,8 @@ class AffinityPropagation(BaseTransformer):
|
|
641
699
|
Output dataset with log probability of the sample for each class in the model.
|
642
700
|
"""
|
643
701
|
super()._check_dataset_type(dataset)
|
644
|
-
inference_method="predict_log_proba"
|
702
|
+
inference_method = "predict_log_proba"
|
703
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
645
704
|
|
646
705
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
647
706
|
# are specific to the type of dataset used.
|
@@ -652,18 +711,20 @@ class AffinityPropagation(BaseTransformer):
|
|
652
711
|
dataset=dataset,
|
653
712
|
inference_method=inference_method,
|
654
713
|
)
|
655
|
-
assert isinstance(
|
714
|
+
assert isinstance(
|
715
|
+
dataset._session, Session
|
716
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
656
717
|
transform_kwargs = dict(
|
657
718
|
session=dataset._session,
|
658
719
|
dependencies=self._deps,
|
659
|
-
drop_input_cols
|
720
|
+
drop_input_cols=self._drop_input_cols,
|
660
721
|
expected_output_cols_type="float",
|
661
722
|
)
|
723
|
+
expected_output_cols = self._align_expected_output_names(
|
724
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
725
|
+
)
|
662
726
|
elif isinstance(dataset, pd.DataFrame):
|
663
|
-
transform_kwargs = dict(
|
664
|
-
snowpark_input_cols = self._snowpark_cols,
|
665
|
-
drop_input_cols = self._drop_input_cols
|
666
|
-
)
|
727
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
667
728
|
|
668
729
|
transform_handlers = ModelTransformerBuilder.build(
|
669
730
|
dataset=dataset,
|
@@ -676,7 +737,7 @@ class AffinityPropagation(BaseTransformer):
|
|
676
737
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
677
738
|
inference_method=inference_method,
|
678
739
|
input_cols=self.input_cols,
|
679
|
-
expected_output_cols=
|
740
|
+
expected_output_cols=expected_output_cols,
|
680
741
|
**transform_kwargs
|
681
742
|
)
|
682
743
|
return output_df
|
@@ -702,30 +763,34 @@ class AffinityPropagation(BaseTransformer):
|
|
702
763
|
Output dataset with results of the decision function for the samples in input dataset.
|
703
764
|
"""
|
704
765
|
super()._check_dataset_type(dataset)
|
705
|
-
inference_method="decision_function"
|
766
|
+
inference_method = "decision_function"
|
706
767
|
|
707
768
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
708
769
|
# are specific to the type of dataset used.
|
709
770
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
710
771
|
|
772
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
773
|
+
|
711
774
|
if isinstance(dataset, DataFrame):
|
712
775
|
self._deps = self._batch_inference_validate_snowpark(
|
713
776
|
dataset=dataset,
|
714
777
|
inference_method=inference_method,
|
715
778
|
)
|
716
|
-
assert isinstance(
|
779
|
+
assert isinstance(
|
780
|
+
dataset._session, Session
|
781
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
717
782
|
transform_kwargs = dict(
|
718
783
|
session=dataset._session,
|
719
784
|
dependencies=self._deps,
|
720
|
-
drop_input_cols
|
785
|
+
drop_input_cols=self._drop_input_cols,
|
721
786
|
expected_output_cols_type="float",
|
722
787
|
)
|
788
|
+
expected_output_cols = self._align_expected_output_names(
|
789
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
790
|
+
)
|
723
791
|
|
724
792
|
elif isinstance(dataset, pd.DataFrame):
|
725
|
-
transform_kwargs = dict(
|
726
|
-
snowpark_input_cols = self._snowpark_cols,
|
727
|
-
drop_input_cols = self._drop_input_cols
|
728
|
-
)
|
793
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
729
794
|
|
730
795
|
transform_handlers = ModelTransformerBuilder.build(
|
731
796
|
dataset=dataset,
|
@@ -738,7 +803,7 @@ class AffinityPropagation(BaseTransformer):
|
|
738
803
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
739
804
|
inference_method=inference_method,
|
740
805
|
input_cols=self.input_cols,
|
741
|
-
expected_output_cols=
|
806
|
+
expected_output_cols=expected_output_cols,
|
742
807
|
**transform_kwargs
|
743
808
|
)
|
744
809
|
return output_df
|
@@ -767,12 +832,14 @@ class AffinityPropagation(BaseTransformer):
|
|
767
832
|
Output dataset with probability of the sample for each class in the model.
|
768
833
|
"""
|
769
834
|
super()._check_dataset_type(dataset)
|
770
|
-
inference_method="score_samples"
|
835
|
+
inference_method = "score_samples"
|
771
836
|
|
772
837
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
773
838
|
# are specific to the type of dataset used.
|
774
839
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
775
840
|
|
841
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
842
|
+
|
776
843
|
if isinstance(dataset, DataFrame):
|
777
844
|
self._deps = self._batch_inference_validate_snowpark(
|
778
845
|
dataset=dataset,
|
@@ -785,6 +852,9 @@ class AffinityPropagation(BaseTransformer):
|
|
785
852
|
drop_input_cols = self._drop_input_cols,
|
786
853
|
expected_output_cols_type="float",
|
787
854
|
)
|
855
|
+
expected_output_cols = self._align_expected_output_names(
|
856
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
857
|
+
)
|
788
858
|
|
789
859
|
elif isinstance(dataset, pd.DataFrame):
|
790
860
|
transform_kwargs = dict(
|
@@ -803,7 +873,7 @@ class AffinityPropagation(BaseTransformer):
|
|
803
873
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
804
874
|
inference_method=inference_method,
|
805
875
|
input_cols=self.input_cols,
|
806
|
-
expected_output_cols=
|
876
|
+
expected_output_cols=expected_output_cols,
|
807
877
|
**transform_kwargs
|
808
878
|
)
|
809
879
|
return output_df
|
@@ -948,50 +1018,84 @@ class AffinityPropagation(BaseTransformer):
|
|
948
1018
|
)
|
949
1019
|
return output_df
|
950
1020
|
|
1021
|
+
|
1022
|
+
|
1023
|
+
def to_sklearn(self) -> Any:
|
1024
|
+
"""Get sklearn.cluster.AffinityPropagation object.
|
1025
|
+
"""
|
1026
|
+
if self._sklearn_object is None:
|
1027
|
+
self._sklearn_object = self._create_sklearn_object()
|
1028
|
+
return self._sklearn_object
|
1029
|
+
|
1030
|
+
def to_xgboost(self) -> Any:
|
1031
|
+
raise exceptions.SnowflakeMLException(
|
1032
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1033
|
+
original_exception=AttributeError(
|
1034
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1035
|
+
"to_xgboost()",
|
1036
|
+
"to_sklearn()"
|
1037
|
+
)
|
1038
|
+
),
|
1039
|
+
)
|
1040
|
+
|
1041
|
+
def to_lightgbm(self) -> Any:
|
1042
|
+
raise exceptions.SnowflakeMLException(
|
1043
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1044
|
+
original_exception=AttributeError(
|
1045
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1046
|
+
"to_lightgbm()",
|
1047
|
+
"to_sklearn()"
|
1048
|
+
)
|
1049
|
+
),
|
1050
|
+
)
|
951
1051
|
|
952
|
-
def
|
1052
|
+
def _get_dependencies(self) -> List[str]:
|
1053
|
+
return self._deps
|
1054
|
+
|
1055
|
+
|
1056
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
953
1057
|
self._model_signature_dict = dict()
|
954
1058
|
|
955
1059
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
956
1060
|
|
957
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1061
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
958
1062
|
outputs: List[BaseFeatureSpec] = []
|
959
1063
|
if hasattr(self, "predict"):
|
960
1064
|
# keep mypy happy
|
961
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1065
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
962
1066
|
# For classifier, the type of predict is the same as the type of label
|
963
|
-
if self._sklearn_object._estimator_type ==
|
964
|
-
|
1067
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1068
|
+
# label columns is the desired type for output
|
965
1069
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
966
1070
|
# rename the output columns
|
967
1071
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
968
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
969
|
-
|
970
|
-
|
1072
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1073
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1074
|
+
)
|
971
1075
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
972
1076
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
973
|
-
# Clusterer returns int64 cluster labels.
|
1077
|
+
# Clusterer returns int64 cluster labels.
|
974
1078
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
975
1079
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
976
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
977
|
-
|
978
|
-
|
979
|
-
|
1080
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1081
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1082
|
+
)
|
1083
|
+
|
980
1084
|
# For regressor, the type of predict is float64
|
981
|
-
elif self._sklearn_object._estimator_type ==
|
1085
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
982
1086
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
983
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
984
|
-
|
985
|
-
|
986
|
-
|
1087
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1088
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1089
|
+
)
|
1090
|
+
|
987
1091
|
for prob_func in PROB_FUNCTIONS:
|
988
1092
|
if hasattr(self, prob_func):
|
989
1093
|
output_cols_prefix: str = f"{prob_func}_"
|
990
1094
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
991
1095
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
992
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
993
|
-
|
994
|
-
|
1096
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1097
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1098
|
+
)
|
995
1099
|
|
996
1100
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
997
1101
|
items = list(self._model_signature_dict.items())
|
@@ -1004,10 +1108,10 @@ class AffinityPropagation(BaseTransformer):
|
|
1004
1108
|
"""Returns model signature of current class.
|
1005
1109
|
|
1006
1110
|
Raises:
|
1007
|
-
|
1111
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1008
1112
|
|
1009
1113
|
Returns:
|
1010
|
-
Dict
|
1114
|
+
Dict with each method and its input output signature
|
1011
1115
|
"""
|
1012
1116
|
if self._model_signature_dict is None:
|
1013
1117
|
raise exceptions.SnowflakeMLException(
|
@@ -1015,35 +1119,3 @@ class AffinityPropagation(BaseTransformer):
|
|
1015
1119
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1016
1120
|
)
|
1017
1121
|
return self._model_signature_dict
|
1018
|
-
|
1019
|
-
def to_sklearn(self) -> Any:
|
1020
|
-
"""Get sklearn.cluster.AffinityPropagation object.
|
1021
|
-
"""
|
1022
|
-
if self._sklearn_object is None:
|
1023
|
-
self._sklearn_object = self._create_sklearn_object()
|
1024
|
-
return self._sklearn_object
|
1025
|
-
|
1026
|
-
def to_xgboost(self) -> Any:
|
1027
|
-
raise exceptions.SnowflakeMLException(
|
1028
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1029
|
-
original_exception=AttributeError(
|
1030
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1031
|
-
"to_xgboost()",
|
1032
|
-
"to_sklearn()"
|
1033
|
-
)
|
1034
|
-
),
|
1035
|
-
)
|
1036
|
-
|
1037
|
-
def to_lightgbm(self) -> Any:
|
1038
|
-
raise exceptions.SnowflakeMLException(
|
1039
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1040
|
-
original_exception=AttributeError(
|
1041
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1042
|
-
"to_lightgbm()",
|
1043
|
-
"to_sklearn()"
|
1044
|
-
)
|
1045
|
-
),
|
1046
|
-
)
|
1047
|
-
|
1048
|
-
def _get_dependencies(self) -> List[str]:
|
1049
|
-
return self._deps
|