snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -301,12 +300,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
301
300
|
)
|
302
301
|
return selected_cols
|
303
302
|
|
304
|
-
|
305
|
-
project=_PROJECT,
|
306
|
-
subproject=_SUBPROJECT,
|
307
|
-
custom_tags=dict([("autogen", True)]),
|
308
|
-
)
|
309
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveClassifier":
|
303
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveClassifier":
|
310
304
|
"""Fit linear model with Passive Aggressive algorithm
|
311
305
|
For more details on this function, see [sklearn.linear_model.PassiveAggressiveClassifier.fit]
|
312
306
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveClassifier.html#sklearn.linear_model.PassiveAggressiveClassifier.fit)
|
@@ -333,12 +327,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
333
327
|
|
334
328
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
335
329
|
|
336
|
-
|
330
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
337
331
|
if SNOWML_SPROC_ENV in os.environ:
|
338
332
|
statement_params = telemetry.get_function_usage_statement_params(
|
339
333
|
project=_PROJECT,
|
340
334
|
subproject=_SUBPROJECT,
|
341
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
335
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
336
|
+
inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__
|
337
|
+
),
|
342
338
|
api_calls=[Session.call],
|
343
339
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
344
340
|
)
|
@@ -359,7 +355,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
359
355
|
)
|
360
356
|
self._sklearn_object = model_trainer.train()
|
361
357
|
self._is_fitted = True
|
362
|
-
self.
|
358
|
+
self._generate_model_signatures(dataset)
|
363
359
|
return self
|
364
360
|
|
365
361
|
def _batch_inference_validate_snowpark(
|
@@ -435,7 +431,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
435
431
|
# when it is classifier, infer the datatype from label columns
|
436
432
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
437
433
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
438
|
-
label_cols_signatures = [
|
434
|
+
label_cols_signatures = [
|
435
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
436
|
+
]
|
439
437
|
if len(label_cols_signatures) == 0:
|
440
438
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
441
439
|
raise exceptions.SnowflakeMLException(
|
@@ -443,25 +441,22 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
443
441
|
original_exception=ValueError(error_str),
|
444
442
|
)
|
445
443
|
|
446
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
447
|
-
label_cols_signatures[0].as_snowpark_type()
|
448
|
-
)
|
444
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
449
445
|
|
450
446
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
451
|
-
assert isinstance(
|
447
|
+
assert isinstance(
|
448
|
+
dataset._session, Session
|
449
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
452
450
|
|
453
451
|
transform_kwargs = dict(
|
454
|
-
session
|
455
|
-
dependencies
|
456
|
-
drop_input_cols
|
457
|
-
expected_output_cols_type
|
452
|
+
session=dataset._session,
|
453
|
+
dependencies=self._deps,
|
454
|
+
drop_input_cols=self._drop_input_cols,
|
455
|
+
expected_output_cols_type=expected_type_inferred,
|
458
456
|
)
|
459
457
|
|
460
458
|
elif isinstance(dataset, pd.DataFrame):
|
461
|
-
transform_kwargs = dict(
|
462
|
-
snowpark_input_cols = self._snowpark_cols,
|
463
|
-
drop_input_cols = self._drop_input_cols
|
464
|
-
)
|
459
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
465
460
|
|
466
461
|
transform_handlers = ModelTransformerBuilder.build(
|
467
462
|
dataset=dataset,
|
@@ -501,7 +496,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
501
496
|
Transformed dataset.
|
502
497
|
"""
|
503
498
|
super()._check_dataset_type(dataset)
|
504
|
-
inference_method="transform"
|
499
|
+
inference_method = "transform"
|
505
500
|
|
506
501
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
507
502
|
# are specific to the type of dataset used.
|
@@ -538,17 +533,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
538
533
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
539
534
|
|
540
535
|
transform_kwargs = dict(
|
541
|
-
session
|
542
|
-
dependencies
|
543
|
-
drop_input_cols
|
544
|
-
expected_output_cols_type
|
536
|
+
session=dataset._session,
|
537
|
+
dependencies=self._deps,
|
538
|
+
drop_input_cols=self._drop_input_cols,
|
539
|
+
expected_output_cols_type=expected_dtype,
|
545
540
|
)
|
546
541
|
|
547
542
|
elif isinstance(dataset, pd.DataFrame):
|
548
|
-
transform_kwargs = dict(
|
549
|
-
snowpark_input_cols = self._snowpark_cols,
|
550
|
-
drop_input_cols = self._drop_input_cols
|
551
|
-
)
|
543
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
552
544
|
|
553
545
|
transform_handlers = ModelTransformerBuilder.build(
|
554
546
|
dataset=dataset,
|
@@ -567,7 +559,11 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
567
559
|
return output_df
|
568
560
|
|
569
561
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
570
|
-
def fit_predict(
|
562
|
+
def fit_predict(
|
563
|
+
self,
|
564
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
565
|
+
output_cols_prefix: str = "fit_predict_",
|
566
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
571
567
|
""" Method not supported for this class.
|
572
568
|
|
573
569
|
|
@@ -592,7 +588,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
592
588
|
)
|
593
589
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
594
590
|
drop_input_cols=self._drop_input_cols,
|
595
|
-
expected_output_cols_list=
|
591
|
+
expected_output_cols_list=(
|
592
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
593
|
+
),
|
596
594
|
)
|
597
595
|
self._sklearn_object = fitted_estimator
|
598
596
|
self._is_fitted = True
|
@@ -609,6 +607,62 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
609
607
|
assert self._sklearn_object is not None
|
610
608
|
return self._sklearn_object.embedding_
|
611
609
|
|
610
|
+
|
611
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
612
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
613
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
614
|
+
"""
|
615
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
616
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
617
|
+
if output_cols:
|
618
|
+
output_cols = [
|
619
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
620
|
+
for c in output_cols
|
621
|
+
]
|
622
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
623
|
+
output_cols = [output_cols_prefix]
|
624
|
+
elif self._sklearn_object is not None:
|
625
|
+
classes = self._sklearn_object.classes_
|
626
|
+
if isinstance(classes, numpy.ndarray):
|
627
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
628
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
629
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
630
|
+
output_cols = []
|
631
|
+
for i, cl in enumerate(classes):
|
632
|
+
# For binary classification, there is only one output column for each class
|
633
|
+
# ndarray as the two classes are complementary.
|
634
|
+
if len(cl) == 2:
|
635
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
636
|
+
else:
|
637
|
+
output_cols.extend([
|
638
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
639
|
+
])
|
640
|
+
else:
|
641
|
+
output_cols = []
|
642
|
+
|
643
|
+
# Make sure column names are valid snowflake identifiers.
|
644
|
+
assert output_cols is not None # Make MyPy happy
|
645
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
646
|
+
|
647
|
+
return rv
|
648
|
+
|
649
|
+
def _align_expected_output_names(
|
650
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
651
|
+
) -> List[str]:
|
652
|
+
# in case the inferred output column names dimension is different
|
653
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
654
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
655
|
+
output_df_columns = list(output_df_pd.columns)
|
656
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
657
|
+
if self.sample_weight_col:
|
658
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
659
|
+
# if the dimension of inferred output column names is correct; use it
|
660
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
661
|
+
return expected_output_cols_list
|
662
|
+
# otherwise, use the sklearn estimator's output
|
663
|
+
else:
|
664
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
665
|
+
|
612
666
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
613
667
|
@telemetry.send_api_usage_telemetry(
|
614
668
|
project=_PROJECT,
|
@@ -639,24 +693,28 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
639
693
|
# are specific to the type of dataset used.
|
640
694
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
641
695
|
|
696
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
697
|
+
|
642
698
|
if isinstance(dataset, DataFrame):
|
643
699
|
self._deps = self._batch_inference_validate_snowpark(
|
644
700
|
dataset=dataset,
|
645
701
|
inference_method=inference_method,
|
646
702
|
)
|
647
|
-
assert isinstance(
|
703
|
+
assert isinstance(
|
704
|
+
dataset._session, Session
|
705
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
648
706
|
transform_kwargs = dict(
|
649
707
|
session=dataset._session,
|
650
708
|
dependencies=self._deps,
|
651
|
-
drop_input_cols
|
709
|
+
drop_input_cols=self._drop_input_cols,
|
652
710
|
expected_output_cols_type="float",
|
653
711
|
)
|
712
|
+
expected_output_cols = self._align_expected_output_names(
|
713
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
714
|
+
)
|
654
715
|
|
655
716
|
elif isinstance(dataset, pd.DataFrame):
|
656
|
-
transform_kwargs = dict(
|
657
|
-
snowpark_input_cols = self._snowpark_cols,
|
658
|
-
drop_input_cols = self._drop_input_cols
|
659
|
-
)
|
717
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
660
718
|
|
661
719
|
transform_handlers = ModelTransformerBuilder.build(
|
662
720
|
dataset=dataset,
|
@@ -668,7 +726,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
668
726
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
669
727
|
inference_method=inference_method,
|
670
728
|
input_cols=self.input_cols,
|
671
|
-
expected_output_cols=
|
729
|
+
expected_output_cols=expected_output_cols,
|
672
730
|
**transform_kwargs
|
673
731
|
)
|
674
732
|
return output_df
|
@@ -698,7 +756,8 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
698
756
|
Output dataset with log probability of the sample for each class in the model.
|
699
757
|
"""
|
700
758
|
super()._check_dataset_type(dataset)
|
701
|
-
inference_method="predict_log_proba"
|
759
|
+
inference_method = "predict_log_proba"
|
760
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
702
761
|
|
703
762
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
704
763
|
# are specific to the type of dataset used.
|
@@ -709,18 +768,20 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
709
768
|
dataset=dataset,
|
710
769
|
inference_method=inference_method,
|
711
770
|
)
|
712
|
-
assert isinstance(
|
771
|
+
assert isinstance(
|
772
|
+
dataset._session, Session
|
773
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
713
774
|
transform_kwargs = dict(
|
714
775
|
session=dataset._session,
|
715
776
|
dependencies=self._deps,
|
716
|
-
drop_input_cols
|
777
|
+
drop_input_cols=self._drop_input_cols,
|
717
778
|
expected_output_cols_type="float",
|
718
779
|
)
|
780
|
+
expected_output_cols = self._align_expected_output_names(
|
781
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
782
|
+
)
|
719
783
|
elif isinstance(dataset, pd.DataFrame):
|
720
|
-
transform_kwargs = dict(
|
721
|
-
snowpark_input_cols = self._snowpark_cols,
|
722
|
-
drop_input_cols = self._drop_input_cols
|
723
|
-
)
|
784
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
724
785
|
|
725
786
|
transform_handlers = ModelTransformerBuilder.build(
|
726
787
|
dataset=dataset,
|
@@ -733,7 +794,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
733
794
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
734
795
|
inference_method=inference_method,
|
735
796
|
input_cols=self.input_cols,
|
736
|
-
expected_output_cols=
|
797
|
+
expected_output_cols=expected_output_cols,
|
737
798
|
**transform_kwargs
|
738
799
|
)
|
739
800
|
return output_df
|
@@ -761,30 +822,34 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
761
822
|
Output dataset with results of the decision function for the samples in input dataset.
|
762
823
|
"""
|
763
824
|
super()._check_dataset_type(dataset)
|
764
|
-
inference_method="decision_function"
|
825
|
+
inference_method = "decision_function"
|
765
826
|
|
766
827
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
767
828
|
# are specific to the type of dataset used.
|
768
829
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
769
830
|
|
831
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
832
|
+
|
770
833
|
if isinstance(dataset, DataFrame):
|
771
834
|
self._deps = self._batch_inference_validate_snowpark(
|
772
835
|
dataset=dataset,
|
773
836
|
inference_method=inference_method,
|
774
837
|
)
|
775
|
-
assert isinstance(
|
838
|
+
assert isinstance(
|
839
|
+
dataset._session, Session
|
840
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
776
841
|
transform_kwargs = dict(
|
777
842
|
session=dataset._session,
|
778
843
|
dependencies=self._deps,
|
779
|
-
drop_input_cols
|
844
|
+
drop_input_cols=self._drop_input_cols,
|
780
845
|
expected_output_cols_type="float",
|
781
846
|
)
|
847
|
+
expected_output_cols = self._align_expected_output_names(
|
848
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
849
|
+
)
|
782
850
|
|
783
851
|
elif isinstance(dataset, pd.DataFrame):
|
784
|
-
transform_kwargs = dict(
|
785
|
-
snowpark_input_cols = self._snowpark_cols,
|
786
|
-
drop_input_cols = self._drop_input_cols
|
787
|
-
)
|
852
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
788
853
|
|
789
854
|
transform_handlers = ModelTransformerBuilder.build(
|
790
855
|
dataset=dataset,
|
@@ -797,7 +862,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
797
862
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
798
863
|
inference_method=inference_method,
|
799
864
|
input_cols=self.input_cols,
|
800
|
-
expected_output_cols=
|
865
|
+
expected_output_cols=expected_output_cols,
|
801
866
|
**transform_kwargs
|
802
867
|
)
|
803
868
|
return output_df
|
@@ -826,12 +891,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
826
891
|
Output dataset with probability of the sample for each class in the model.
|
827
892
|
"""
|
828
893
|
super()._check_dataset_type(dataset)
|
829
|
-
inference_method="score_samples"
|
894
|
+
inference_method = "score_samples"
|
830
895
|
|
831
896
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
832
897
|
# are specific to the type of dataset used.
|
833
898
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
834
899
|
|
900
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
901
|
+
|
835
902
|
if isinstance(dataset, DataFrame):
|
836
903
|
self._deps = self._batch_inference_validate_snowpark(
|
837
904
|
dataset=dataset,
|
@@ -844,6 +911,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
844
911
|
drop_input_cols = self._drop_input_cols,
|
845
912
|
expected_output_cols_type="float",
|
846
913
|
)
|
914
|
+
expected_output_cols = self._align_expected_output_names(
|
915
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
916
|
+
)
|
847
917
|
|
848
918
|
elif isinstance(dataset, pd.DataFrame):
|
849
919
|
transform_kwargs = dict(
|
@@ -862,7 +932,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
862
932
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
863
933
|
inference_method=inference_method,
|
864
934
|
input_cols=self.input_cols,
|
865
|
-
expected_output_cols=
|
935
|
+
expected_output_cols=expected_output_cols,
|
866
936
|
**transform_kwargs
|
867
937
|
)
|
868
938
|
return output_df
|
@@ -1009,50 +1079,84 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1009
1079
|
)
|
1010
1080
|
return output_df
|
1011
1081
|
|
1082
|
+
|
1083
|
+
|
1084
|
+
def to_sklearn(self) -> Any:
|
1085
|
+
"""Get sklearn.linear_model.PassiveAggressiveClassifier object.
|
1086
|
+
"""
|
1087
|
+
if self._sklearn_object is None:
|
1088
|
+
self._sklearn_object = self._create_sklearn_object()
|
1089
|
+
return self._sklearn_object
|
1090
|
+
|
1091
|
+
def to_xgboost(self) -> Any:
|
1092
|
+
raise exceptions.SnowflakeMLException(
|
1093
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1094
|
+
original_exception=AttributeError(
|
1095
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1096
|
+
"to_xgboost()",
|
1097
|
+
"to_sklearn()"
|
1098
|
+
)
|
1099
|
+
),
|
1100
|
+
)
|
1101
|
+
|
1102
|
+
def to_lightgbm(self) -> Any:
|
1103
|
+
raise exceptions.SnowflakeMLException(
|
1104
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1105
|
+
original_exception=AttributeError(
|
1106
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1107
|
+
"to_lightgbm()",
|
1108
|
+
"to_sklearn()"
|
1109
|
+
)
|
1110
|
+
),
|
1111
|
+
)
|
1012
1112
|
|
1013
|
-
def
|
1113
|
+
def _get_dependencies(self) -> List[str]:
|
1114
|
+
return self._deps
|
1115
|
+
|
1116
|
+
|
1117
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1014
1118
|
self._model_signature_dict = dict()
|
1015
1119
|
|
1016
1120
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1017
1121
|
|
1018
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1122
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1019
1123
|
outputs: List[BaseFeatureSpec] = []
|
1020
1124
|
if hasattr(self, "predict"):
|
1021
1125
|
# keep mypy happy
|
1022
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1126
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1023
1127
|
# For classifier, the type of predict is the same as the type of label
|
1024
|
-
if self._sklearn_object._estimator_type ==
|
1025
|
-
|
1128
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1129
|
+
# label columns is the desired type for output
|
1026
1130
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1027
1131
|
# rename the output columns
|
1028
1132
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1029
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1030
|
-
|
1031
|
-
|
1133
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1134
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1135
|
+
)
|
1032
1136
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1033
1137
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1034
|
-
# Clusterer returns int64 cluster labels.
|
1138
|
+
# Clusterer returns int64 cluster labels.
|
1035
1139
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1036
1140
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1037
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1141
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1142
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1143
|
+
)
|
1144
|
+
|
1041
1145
|
# For regressor, the type of predict is float64
|
1042
|
-
elif self._sklearn_object._estimator_type ==
|
1146
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1043
1147
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1044
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1148
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1149
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1150
|
+
)
|
1151
|
+
|
1048
1152
|
for prob_func in PROB_FUNCTIONS:
|
1049
1153
|
if hasattr(self, prob_func):
|
1050
1154
|
output_cols_prefix: str = f"{prob_func}_"
|
1051
1155
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1052
1156
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1053
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1054
|
-
|
1055
|
-
|
1157
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1158
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1159
|
+
)
|
1056
1160
|
|
1057
1161
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1058
1162
|
items = list(self._model_signature_dict.items())
|
@@ -1065,10 +1169,10 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1065
1169
|
"""Returns model signature of current class.
|
1066
1170
|
|
1067
1171
|
Raises:
|
1068
|
-
|
1172
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1069
1173
|
|
1070
1174
|
Returns:
|
1071
|
-
Dict
|
1175
|
+
Dict with each method and its input output signature
|
1072
1176
|
"""
|
1073
1177
|
if self._model_signature_dict is None:
|
1074
1178
|
raise exceptions.SnowflakeMLException(
|
@@ -1076,35 +1180,3 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
1076
1180
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1077
1181
|
)
|
1078
1182
|
return self._model_signature_dict
|
1079
|
-
|
1080
|
-
def to_sklearn(self) -> Any:
|
1081
|
-
"""Get sklearn.linear_model.PassiveAggressiveClassifier object.
|
1082
|
-
"""
|
1083
|
-
if self._sklearn_object is None:
|
1084
|
-
self._sklearn_object = self._create_sklearn_object()
|
1085
|
-
return self._sklearn_object
|
1086
|
-
|
1087
|
-
def to_xgboost(self) -> Any:
|
1088
|
-
raise exceptions.SnowflakeMLException(
|
1089
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1090
|
-
original_exception=AttributeError(
|
1091
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1092
|
-
"to_xgboost()",
|
1093
|
-
"to_sklearn()"
|
1094
|
-
)
|
1095
|
-
),
|
1096
|
-
)
|
1097
|
-
|
1098
|
-
def to_lightgbm(self) -> Any:
|
1099
|
-
raise exceptions.SnowflakeMLException(
|
1100
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1101
|
-
original_exception=AttributeError(
|
1102
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1103
|
-
"to_lightgbm()",
|
1104
|
-
"to_sklearn()"
|
1105
|
-
)
|
1106
|
-
),
|
1107
|
-
)
|
1108
|
-
|
1109
|
-
def _get_dependencies(self) -> List[str]:
|
1110
|
-
return self._deps
|