snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -287,12 +286,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
287
286
|
)
|
288
287
|
return selected_cols
|
289
288
|
|
290
|
-
|
291
|
-
project=_PROJECT,
|
292
|
-
subproject=_SUBPROJECT,
|
293
|
-
custom_tags=dict([("autogen", True)]),
|
294
|
-
)
|
295
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveRegressor":
|
289
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveRegressor":
|
296
290
|
"""Fit linear model with Passive Aggressive algorithm
|
297
291
|
For more details on this function, see [sklearn.linear_model.PassiveAggressiveRegressor.fit]
|
298
292
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveRegressor.html#sklearn.linear_model.PassiveAggressiveRegressor.fit)
|
@@ -319,12 +313,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
319
313
|
|
320
314
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
321
315
|
|
322
|
-
|
316
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
323
317
|
if SNOWML_SPROC_ENV in os.environ:
|
324
318
|
statement_params = telemetry.get_function_usage_statement_params(
|
325
319
|
project=_PROJECT,
|
326
320
|
subproject=_SUBPROJECT,
|
327
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
321
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
322
|
+
inspect.currentframe(), PassiveAggressiveRegressor.__class__.__name__
|
323
|
+
),
|
328
324
|
api_calls=[Session.call],
|
329
325
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
330
326
|
)
|
@@ -345,7 +341,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
345
341
|
)
|
346
342
|
self._sklearn_object = model_trainer.train()
|
347
343
|
self._is_fitted = True
|
348
|
-
self.
|
344
|
+
self._generate_model_signatures(dataset)
|
349
345
|
return self
|
350
346
|
|
351
347
|
def _batch_inference_validate_snowpark(
|
@@ -421,7 +417,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
421
417
|
# when it is classifier, infer the datatype from label columns
|
422
418
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
423
419
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
424
|
-
label_cols_signatures = [
|
420
|
+
label_cols_signatures = [
|
421
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
422
|
+
]
|
425
423
|
if len(label_cols_signatures) == 0:
|
426
424
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
427
425
|
raise exceptions.SnowflakeMLException(
|
@@ -429,25 +427,22 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
429
427
|
original_exception=ValueError(error_str),
|
430
428
|
)
|
431
429
|
|
432
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
433
|
-
label_cols_signatures[0].as_snowpark_type()
|
434
|
-
)
|
430
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
435
431
|
|
436
432
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
437
|
-
assert isinstance(
|
433
|
+
assert isinstance(
|
434
|
+
dataset._session, Session
|
435
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
438
436
|
|
439
437
|
transform_kwargs = dict(
|
440
|
-
session
|
441
|
-
dependencies
|
442
|
-
drop_input_cols
|
443
|
-
expected_output_cols_type
|
438
|
+
session=dataset._session,
|
439
|
+
dependencies=self._deps,
|
440
|
+
drop_input_cols=self._drop_input_cols,
|
441
|
+
expected_output_cols_type=expected_type_inferred,
|
444
442
|
)
|
445
443
|
|
446
444
|
elif isinstance(dataset, pd.DataFrame):
|
447
|
-
transform_kwargs = dict(
|
448
|
-
snowpark_input_cols = self._snowpark_cols,
|
449
|
-
drop_input_cols = self._drop_input_cols
|
450
|
-
)
|
445
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
451
446
|
|
452
447
|
transform_handlers = ModelTransformerBuilder.build(
|
453
448
|
dataset=dataset,
|
@@ -487,7 +482,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
487
482
|
Transformed dataset.
|
488
483
|
"""
|
489
484
|
super()._check_dataset_type(dataset)
|
490
|
-
inference_method="transform"
|
485
|
+
inference_method = "transform"
|
491
486
|
|
492
487
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
493
488
|
# are specific to the type of dataset used.
|
@@ -524,17 +519,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
524
519
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
525
520
|
|
526
521
|
transform_kwargs = dict(
|
527
|
-
session
|
528
|
-
dependencies
|
529
|
-
drop_input_cols
|
530
|
-
expected_output_cols_type
|
522
|
+
session=dataset._session,
|
523
|
+
dependencies=self._deps,
|
524
|
+
drop_input_cols=self._drop_input_cols,
|
525
|
+
expected_output_cols_type=expected_dtype,
|
531
526
|
)
|
532
527
|
|
533
528
|
elif isinstance(dataset, pd.DataFrame):
|
534
|
-
transform_kwargs = dict(
|
535
|
-
snowpark_input_cols = self._snowpark_cols,
|
536
|
-
drop_input_cols = self._drop_input_cols
|
537
|
-
)
|
529
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
538
530
|
|
539
531
|
transform_handlers = ModelTransformerBuilder.build(
|
540
532
|
dataset=dataset,
|
@@ -553,7 +545,11 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
553
545
|
return output_df
|
554
546
|
|
555
547
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
556
|
-
def fit_predict(
|
548
|
+
def fit_predict(
|
549
|
+
self,
|
550
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
551
|
+
output_cols_prefix: str = "fit_predict_",
|
552
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
557
553
|
""" Method not supported for this class.
|
558
554
|
|
559
555
|
|
@@ -578,7 +574,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
578
574
|
)
|
579
575
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
580
576
|
drop_input_cols=self._drop_input_cols,
|
581
|
-
expected_output_cols_list=
|
577
|
+
expected_output_cols_list=(
|
578
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
579
|
+
),
|
582
580
|
)
|
583
581
|
self._sklearn_object = fitted_estimator
|
584
582
|
self._is_fitted = True
|
@@ -595,6 +593,62 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
595
593
|
assert self._sklearn_object is not None
|
596
594
|
return self._sklearn_object.embedding_
|
597
595
|
|
596
|
+
|
597
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
598
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
599
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
600
|
+
"""
|
601
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
602
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
603
|
+
if output_cols:
|
604
|
+
output_cols = [
|
605
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
606
|
+
for c in output_cols
|
607
|
+
]
|
608
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
609
|
+
output_cols = [output_cols_prefix]
|
610
|
+
elif self._sklearn_object is not None:
|
611
|
+
classes = self._sklearn_object.classes_
|
612
|
+
if isinstance(classes, numpy.ndarray):
|
613
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
614
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
615
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
616
|
+
output_cols = []
|
617
|
+
for i, cl in enumerate(classes):
|
618
|
+
# For binary classification, there is only one output column for each class
|
619
|
+
# ndarray as the two classes are complementary.
|
620
|
+
if len(cl) == 2:
|
621
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
622
|
+
else:
|
623
|
+
output_cols.extend([
|
624
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
625
|
+
])
|
626
|
+
else:
|
627
|
+
output_cols = []
|
628
|
+
|
629
|
+
# Make sure column names are valid snowflake identifiers.
|
630
|
+
assert output_cols is not None # Make MyPy happy
|
631
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
632
|
+
|
633
|
+
return rv
|
634
|
+
|
635
|
+
def _align_expected_output_names(
|
636
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
637
|
+
) -> List[str]:
|
638
|
+
# in case the inferred output column names dimension is different
|
639
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
640
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
641
|
+
output_df_columns = list(output_df_pd.columns)
|
642
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
643
|
+
if self.sample_weight_col:
|
644
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
645
|
+
# if the dimension of inferred output column names is correct; use it
|
646
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
647
|
+
return expected_output_cols_list
|
648
|
+
# otherwise, use the sklearn estimator's output
|
649
|
+
else:
|
650
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
651
|
+
|
598
652
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
599
653
|
@telemetry.send_api_usage_telemetry(
|
600
654
|
project=_PROJECT,
|
@@ -625,24 +679,28 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
625
679
|
# are specific to the type of dataset used.
|
626
680
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
627
681
|
|
682
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
683
|
+
|
628
684
|
if isinstance(dataset, DataFrame):
|
629
685
|
self._deps = self._batch_inference_validate_snowpark(
|
630
686
|
dataset=dataset,
|
631
687
|
inference_method=inference_method,
|
632
688
|
)
|
633
|
-
assert isinstance(
|
689
|
+
assert isinstance(
|
690
|
+
dataset._session, Session
|
691
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
634
692
|
transform_kwargs = dict(
|
635
693
|
session=dataset._session,
|
636
694
|
dependencies=self._deps,
|
637
|
-
drop_input_cols
|
695
|
+
drop_input_cols=self._drop_input_cols,
|
638
696
|
expected_output_cols_type="float",
|
639
697
|
)
|
698
|
+
expected_output_cols = self._align_expected_output_names(
|
699
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
700
|
+
)
|
640
701
|
|
641
702
|
elif isinstance(dataset, pd.DataFrame):
|
642
|
-
transform_kwargs = dict(
|
643
|
-
snowpark_input_cols = self._snowpark_cols,
|
644
|
-
drop_input_cols = self._drop_input_cols
|
645
|
-
)
|
703
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
646
704
|
|
647
705
|
transform_handlers = ModelTransformerBuilder.build(
|
648
706
|
dataset=dataset,
|
@@ -654,7 +712,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
654
712
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
655
713
|
inference_method=inference_method,
|
656
714
|
input_cols=self.input_cols,
|
657
|
-
expected_output_cols=
|
715
|
+
expected_output_cols=expected_output_cols,
|
658
716
|
**transform_kwargs
|
659
717
|
)
|
660
718
|
return output_df
|
@@ -684,7 +742,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
684
742
|
Output dataset with log probability of the sample for each class in the model.
|
685
743
|
"""
|
686
744
|
super()._check_dataset_type(dataset)
|
687
|
-
inference_method="predict_log_proba"
|
745
|
+
inference_method = "predict_log_proba"
|
746
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
688
747
|
|
689
748
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
690
749
|
# are specific to the type of dataset used.
|
@@ -695,18 +754,20 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
695
754
|
dataset=dataset,
|
696
755
|
inference_method=inference_method,
|
697
756
|
)
|
698
|
-
assert isinstance(
|
757
|
+
assert isinstance(
|
758
|
+
dataset._session, Session
|
759
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
699
760
|
transform_kwargs = dict(
|
700
761
|
session=dataset._session,
|
701
762
|
dependencies=self._deps,
|
702
|
-
drop_input_cols
|
763
|
+
drop_input_cols=self._drop_input_cols,
|
703
764
|
expected_output_cols_type="float",
|
704
765
|
)
|
766
|
+
expected_output_cols = self._align_expected_output_names(
|
767
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
768
|
+
)
|
705
769
|
elif isinstance(dataset, pd.DataFrame):
|
706
|
-
transform_kwargs = dict(
|
707
|
-
snowpark_input_cols = self._snowpark_cols,
|
708
|
-
drop_input_cols = self._drop_input_cols
|
709
|
-
)
|
770
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
710
771
|
|
711
772
|
transform_handlers = ModelTransformerBuilder.build(
|
712
773
|
dataset=dataset,
|
@@ -719,7 +780,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
719
780
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
720
781
|
inference_method=inference_method,
|
721
782
|
input_cols=self.input_cols,
|
722
|
-
expected_output_cols=
|
783
|
+
expected_output_cols=expected_output_cols,
|
723
784
|
**transform_kwargs
|
724
785
|
)
|
725
786
|
return output_df
|
@@ -745,30 +806,34 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
745
806
|
Output dataset with results of the decision function for the samples in input dataset.
|
746
807
|
"""
|
747
808
|
super()._check_dataset_type(dataset)
|
748
|
-
inference_method="decision_function"
|
809
|
+
inference_method = "decision_function"
|
749
810
|
|
750
811
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
751
812
|
# are specific to the type of dataset used.
|
752
813
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
753
814
|
|
815
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
816
|
+
|
754
817
|
if isinstance(dataset, DataFrame):
|
755
818
|
self._deps = self._batch_inference_validate_snowpark(
|
756
819
|
dataset=dataset,
|
757
820
|
inference_method=inference_method,
|
758
821
|
)
|
759
|
-
assert isinstance(
|
822
|
+
assert isinstance(
|
823
|
+
dataset._session, Session
|
824
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
760
825
|
transform_kwargs = dict(
|
761
826
|
session=dataset._session,
|
762
827
|
dependencies=self._deps,
|
763
|
-
drop_input_cols
|
828
|
+
drop_input_cols=self._drop_input_cols,
|
764
829
|
expected_output_cols_type="float",
|
765
830
|
)
|
831
|
+
expected_output_cols = self._align_expected_output_names(
|
832
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
833
|
+
)
|
766
834
|
|
767
835
|
elif isinstance(dataset, pd.DataFrame):
|
768
|
-
transform_kwargs = dict(
|
769
|
-
snowpark_input_cols = self._snowpark_cols,
|
770
|
-
drop_input_cols = self._drop_input_cols
|
771
|
-
)
|
836
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
772
837
|
|
773
838
|
transform_handlers = ModelTransformerBuilder.build(
|
774
839
|
dataset=dataset,
|
@@ -781,7 +846,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
781
846
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
782
847
|
inference_method=inference_method,
|
783
848
|
input_cols=self.input_cols,
|
784
|
-
expected_output_cols=
|
849
|
+
expected_output_cols=expected_output_cols,
|
785
850
|
**transform_kwargs
|
786
851
|
)
|
787
852
|
return output_df
|
@@ -810,12 +875,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
810
875
|
Output dataset with probability of the sample for each class in the model.
|
811
876
|
"""
|
812
877
|
super()._check_dataset_type(dataset)
|
813
|
-
inference_method="score_samples"
|
878
|
+
inference_method = "score_samples"
|
814
879
|
|
815
880
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
816
881
|
# are specific to the type of dataset used.
|
817
882
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
818
883
|
|
884
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
885
|
+
|
819
886
|
if isinstance(dataset, DataFrame):
|
820
887
|
self._deps = self._batch_inference_validate_snowpark(
|
821
888
|
dataset=dataset,
|
@@ -828,6 +895,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
828
895
|
drop_input_cols = self._drop_input_cols,
|
829
896
|
expected_output_cols_type="float",
|
830
897
|
)
|
898
|
+
expected_output_cols = self._align_expected_output_names(
|
899
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
900
|
+
)
|
831
901
|
|
832
902
|
elif isinstance(dataset, pd.DataFrame):
|
833
903
|
transform_kwargs = dict(
|
@@ -846,7 +916,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
846
916
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
847
917
|
inference_method=inference_method,
|
848
918
|
input_cols=self.input_cols,
|
849
|
-
expected_output_cols=
|
919
|
+
expected_output_cols=expected_output_cols,
|
850
920
|
**transform_kwargs
|
851
921
|
)
|
852
922
|
return output_df
|
@@ -993,50 +1063,84 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
993
1063
|
)
|
994
1064
|
return output_df
|
995
1065
|
|
1066
|
+
|
1067
|
+
|
1068
|
+
def to_sklearn(self) -> Any:
|
1069
|
+
"""Get sklearn.linear_model.PassiveAggressiveRegressor object.
|
1070
|
+
"""
|
1071
|
+
if self._sklearn_object is None:
|
1072
|
+
self._sklearn_object = self._create_sklearn_object()
|
1073
|
+
return self._sklearn_object
|
1074
|
+
|
1075
|
+
def to_xgboost(self) -> Any:
|
1076
|
+
raise exceptions.SnowflakeMLException(
|
1077
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1078
|
+
original_exception=AttributeError(
|
1079
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1080
|
+
"to_xgboost()",
|
1081
|
+
"to_sklearn()"
|
1082
|
+
)
|
1083
|
+
),
|
1084
|
+
)
|
1085
|
+
|
1086
|
+
def to_lightgbm(self) -> Any:
|
1087
|
+
raise exceptions.SnowflakeMLException(
|
1088
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1089
|
+
original_exception=AttributeError(
|
1090
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1091
|
+
"to_lightgbm()",
|
1092
|
+
"to_sklearn()"
|
1093
|
+
)
|
1094
|
+
),
|
1095
|
+
)
|
996
1096
|
|
997
|
-
def
|
1097
|
+
def _get_dependencies(self) -> List[str]:
|
1098
|
+
return self._deps
|
1099
|
+
|
1100
|
+
|
1101
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
998
1102
|
self._model_signature_dict = dict()
|
999
1103
|
|
1000
1104
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1001
1105
|
|
1002
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1106
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1003
1107
|
outputs: List[BaseFeatureSpec] = []
|
1004
1108
|
if hasattr(self, "predict"):
|
1005
1109
|
# keep mypy happy
|
1006
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1110
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1007
1111
|
# For classifier, the type of predict is the same as the type of label
|
1008
|
-
if self._sklearn_object._estimator_type ==
|
1009
|
-
|
1112
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1113
|
+
# label columns is the desired type for output
|
1010
1114
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1011
1115
|
# rename the output columns
|
1012
1116
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1013
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1014
|
-
|
1015
|
-
|
1117
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1118
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1119
|
+
)
|
1016
1120
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1017
1121
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1018
|
-
# Clusterer returns int64 cluster labels.
|
1122
|
+
# Clusterer returns int64 cluster labels.
|
1019
1123
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1020
1124
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1021
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1125
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1126
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1127
|
+
)
|
1128
|
+
|
1025
1129
|
# For regressor, the type of predict is float64
|
1026
|
-
elif self._sklearn_object._estimator_type ==
|
1130
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1027
1131
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1028
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1132
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1133
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1134
|
+
)
|
1135
|
+
|
1032
1136
|
for prob_func in PROB_FUNCTIONS:
|
1033
1137
|
if hasattr(self, prob_func):
|
1034
1138
|
output_cols_prefix: str = f"{prob_func}_"
|
1035
1139
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1036
1140
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1037
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1038
|
-
|
1039
|
-
|
1141
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1142
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1143
|
+
)
|
1040
1144
|
|
1041
1145
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1042
1146
|
items = list(self._model_signature_dict.items())
|
@@ -1049,10 +1153,10 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1049
1153
|
"""Returns model signature of current class.
|
1050
1154
|
|
1051
1155
|
Raises:
|
1052
|
-
|
1156
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1053
1157
|
|
1054
1158
|
Returns:
|
1055
|
-
Dict
|
1159
|
+
Dict with each method and its input output signature
|
1056
1160
|
"""
|
1057
1161
|
if self._model_signature_dict is None:
|
1058
1162
|
raise exceptions.SnowflakeMLException(
|
@@ -1060,35 +1164,3 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1060
1164
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1061
1165
|
)
|
1062
1166
|
return self._model_signature_dict
|
1063
|
-
|
1064
|
-
def to_sklearn(self) -> Any:
|
1065
|
-
"""Get sklearn.linear_model.PassiveAggressiveRegressor object.
|
1066
|
-
"""
|
1067
|
-
if self._sklearn_object is None:
|
1068
|
-
self._sklearn_object = self._create_sklearn_object()
|
1069
|
-
return self._sklearn_object
|
1070
|
-
|
1071
|
-
def to_xgboost(self) -> Any:
|
1072
|
-
raise exceptions.SnowflakeMLException(
|
1073
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1074
|
-
original_exception=AttributeError(
|
1075
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1076
|
-
"to_xgboost()",
|
1077
|
-
"to_sklearn()"
|
1078
|
-
)
|
1079
|
-
),
|
1080
|
-
)
|
1081
|
-
|
1082
|
-
def to_lightgbm(self) -> Any:
|
1083
|
-
raise exceptions.SnowflakeMLException(
|
1084
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1085
|
-
original_exception=AttributeError(
|
1086
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1087
|
-
"to_lightgbm()",
|
1088
|
-
"to_sklearn()"
|
1089
|
-
)
|
1090
|
-
),
|
1091
|
-
)
|
1092
|
-
|
1093
|
-
def _get_dependencies(self) -> List[str]:
|
1094
|
-
return self._deps
|