snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class PassiveAggressiveRegressor(BaseTransformer):
|
71
64
|
r"""Passive Aggressive Regressor
|
72
65
|
For more details on this class, see [sklearn.linear_model.PassiveAggressiveRegressor]
|
@@ -287,12 +280,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
287
280
|
)
|
288
281
|
return selected_cols
|
289
282
|
|
290
|
-
|
291
|
-
project=_PROJECT,
|
292
|
-
subproject=_SUBPROJECT,
|
293
|
-
custom_tags=dict([("autogen", True)]),
|
294
|
-
)
|
295
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveRegressor":
|
283
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveRegressor":
|
296
284
|
"""Fit linear model with Passive Aggressive algorithm
|
297
285
|
For more details on this function, see [sklearn.linear_model.PassiveAggressiveRegressor.fit]
|
298
286
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveRegressor.html#sklearn.linear_model.PassiveAggressiveRegressor.fit)
|
@@ -319,12 +307,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
319
307
|
|
320
308
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
321
309
|
|
322
|
-
|
310
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
323
311
|
if SNOWML_SPROC_ENV in os.environ:
|
324
312
|
statement_params = telemetry.get_function_usage_statement_params(
|
325
313
|
project=_PROJECT,
|
326
314
|
subproject=_SUBPROJECT,
|
327
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
315
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
316
|
+
inspect.currentframe(), PassiveAggressiveRegressor.__class__.__name__
|
317
|
+
),
|
328
318
|
api_calls=[Session.call],
|
329
319
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
330
320
|
)
|
@@ -345,27 +335,24 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
345
335
|
)
|
346
336
|
self._sklearn_object = model_trainer.train()
|
347
337
|
self._is_fitted = True
|
348
|
-
self.
|
338
|
+
self._generate_model_signatures(dataset)
|
349
339
|
return self
|
350
340
|
|
351
341
|
def _batch_inference_validate_snowpark(
|
352
342
|
self,
|
353
343
|
dataset: DataFrame,
|
354
344
|
inference_method: str,
|
355
|
-
) ->
|
356
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
357
|
-
return the available package that exists in the snowflake anaconda channel
|
345
|
+
) -> None:
|
346
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
358
347
|
|
359
348
|
Args:
|
360
349
|
dataset: snowpark dataframe
|
361
350
|
inference_method: the inference method such as predict, score...
|
362
|
-
|
351
|
+
|
363
352
|
Raises:
|
364
353
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
365
354
|
SnowflakeMLException: If the session is None, raise error
|
366
355
|
|
367
|
-
Returns:
|
368
|
-
A list of available package that exists in the snowflake anaconda channel
|
369
356
|
"""
|
370
357
|
if not self._is_fitted:
|
371
358
|
raise exceptions.SnowflakeMLException(
|
@@ -383,9 +370,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
383
370
|
"Session must not specified for snowpark dataset."
|
384
371
|
),
|
385
372
|
)
|
386
|
-
|
387
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
388
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
373
|
+
|
389
374
|
|
390
375
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
391
376
|
@telemetry.send_api_usage_telemetry(
|
@@ -421,7 +406,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
421
406
|
# when it is classifier, infer the datatype from label columns
|
422
407
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
423
408
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
424
|
-
label_cols_signatures = [
|
409
|
+
label_cols_signatures = [
|
410
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
411
|
+
]
|
425
412
|
if len(label_cols_signatures) == 0:
|
426
413
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
427
414
|
raise exceptions.SnowflakeMLException(
|
@@ -429,25 +416,23 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
429
416
|
original_exception=ValueError(error_str),
|
430
417
|
)
|
431
418
|
|
432
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
433
|
-
label_cols_signatures[0].as_snowpark_type()
|
434
|
-
)
|
419
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
435
420
|
|
436
|
-
self.
|
437
|
-
|
421
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
422
|
+
self._deps = self._get_dependencies()
|
423
|
+
assert isinstance(
|
424
|
+
dataset._session, Session
|
425
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
438
426
|
|
439
427
|
transform_kwargs = dict(
|
440
|
-
session
|
441
|
-
dependencies
|
442
|
-
drop_input_cols
|
443
|
-
expected_output_cols_type
|
428
|
+
session=dataset._session,
|
429
|
+
dependencies=self._deps,
|
430
|
+
drop_input_cols=self._drop_input_cols,
|
431
|
+
expected_output_cols_type=expected_type_inferred,
|
444
432
|
)
|
445
433
|
|
446
434
|
elif isinstance(dataset, pd.DataFrame):
|
447
|
-
transform_kwargs = dict(
|
448
|
-
snowpark_input_cols = self._snowpark_cols,
|
449
|
-
drop_input_cols = self._drop_input_cols
|
450
|
-
)
|
435
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
451
436
|
|
452
437
|
transform_handlers = ModelTransformerBuilder.build(
|
453
438
|
dataset=dataset,
|
@@ -487,7 +472,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
487
472
|
Transformed dataset.
|
488
473
|
"""
|
489
474
|
super()._check_dataset_type(dataset)
|
490
|
-
inference_method="transform"
|
475
|
+
inference_method = "transform"
|
491
476
|
|
492
477
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
493
478
|
# are specific to the type of dataset used.
|
@@ -517,24 +502,19 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
517
502
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
518
503
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
519
504
|
|
520
|
-
self.
|
521
|
-
|
522
|
-
inference_method=inference_method,
|
523
|
-
)
|
505
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
506
|
+
self._deps = self._get_dependencies()
|
524
507
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
525
508
|
|
526
509
|
transform_kwargs = dict(
|
527
|
-
session
|
528
|
-
dependencies
|
529
|
-
drop_input_cols
|
530
|
-
expected_output_cols_type
|
510
|
+
session=dataset._session,
|
511
|
+
dependencies=self._deps,
|
512
|
+
drop_input_cols=self._drop_input_cols,
|
513
|
+
expected_output_cols_type=expected_dtype,
|
531
514
|
)
|
532
515
|
|
533
516
|
elif isinstance(dataset, pd.DataFrame):
|
534
|
-
transform_kwargs = dict(
|
535
|
-
snowpark_input_cols = self._snowpark_cols,
|
536
|
-
drop_input_cols = self._drop_input_cols
|
537
|
-
)
|
517
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
538
518
|
|
539
519
|
transform_handlers = ModelTransformerBuilder.build(
|
540
520
|
dataset=dataset,
|
@@ -553,7 +533,11 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
553
533
|
return output_df
|
554
534
|
|
555
535
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
556
|
-
def fit_predict(
|
536
|
+
def fit_predict(
|
537
|
+
self,
|
538
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
539
|
+
output_cols_prefix: str = "fit_predict_",
|
540
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
557
541
|
""" Method not supported for this class.
|
558
542
|
|
559
543
|
|
@@ -578,22 +562,104 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
578
562
|
)
|
579
563
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
580
564
|
drop_input_cols=self._drop_input_cols,
|
581
|
-
expected_output_cols_list=
|
565
|
+
expected_output_cols_list=(
|
566
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
567
|
+
),
|
582
568
|
)
|
583
569
|
self._sklearn_object = fitted_estimator
|
584
570
|
self._is_fitted = True
|
585
571
|
return output_result
|
586
572
|
|
573
|
+
|
574
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
575
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
576
|
+
""" Method not supported for this class.
|
577
|
+
|
587
578
|
|
588
|
-
|
589
|
-
|
590
|
-
|
579
|
+
Raises:
|
580
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
581
|
+
|
582
|
+
Args:
|
583
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
584
|
+
Snowpark or Pandas DataFrame.
|
585
|
+
output_cols_prefix: Prefix for the response columns
|
591
586
|
Returns:
|
592
587
|
Transformed dataset.
|
593
588
|
"""
|
594
|
-
self.
|
595
|
-
|
596
|
-
|
589
|
+
self._infer_input_output_cols(dataset)
|
590
|
+
super()._check_dataset_type(dataset)
|
591
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
592
|
+
estimator=self._sklearn_object,
|
593
|
+
dataset=dataset,
|
594
|
+
input_cols=self.input_cols,
|
595
|
+
label_cols=self.label_cols,
|
596
|
+
sample_weight_col=self.sample_weight_col,
|
597
|
+
autogenerated=self._autogenerated,
|
598
|
+
subproject=_SUBPROJECT,
|
599
|
+
)
|
600
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
601
|
+
drop_input_cols=self._drop_input_cols,
|
602
|
+
expected_output_cols_list=self.output_cols,
|
603
|
+
)
|
604
|
+
self._sklearn_object = fitted_estimator
|
605
|
+
self._is_fitted = True
|
606
|
+
return output_result
|
607
|
+
|
608
|
+
|
609
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
610
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
611
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
612
|
+
"""
|
613
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
614
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
615
|
+
if output_cols:
|
616
|
+
output_cols = [
|
617
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
618
|
+
for c in output_cols
|
619
|
+
]
|
620
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
621
|
+
output_cols = [output_cols_prefix]
|
622
|
+
elif self._sklearn_object is not None:
|
623
|
+
classes = self._sklearn_object.classes_
|
624
|
+
if isinstance(classes, numpy.ndarray):
|
625
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
626
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
627
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
628
|
+
output_cols = []
|
629
|
+
for i, cl in enumerate(classes):
|
630
|
+
# For binary classification, there is only one output column for each class
|
631
|
+
# ndarray as the two classes are complementary.
|
632
|
+
if len(cl) == 2:
|
633
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
634
|
+
else:
|
635
|
+
output_cols.extend([
|
636
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
637
|
+
])
|
638
|
+
else:
|
639
|
+
output_cols = []
|
640
|
+
|
641
|
+
# Make sure column names are valid snowflake identifiers.
|
642
|
+
assert output_cols is not None # Make MyPy happy
|
643
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
644
|
+
|
645
|
+
return rv
|
646
|
+
|
647
|
+
def _align_expected_output_names(
|
648
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
649
|
+
) -> List[str]:
|
650
|
+
# in case the inferred output column names dimension is different
|
651
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
652
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
653
|
+
output_df_columns = list(output_df_pd.columns)
|
654
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
655
|
+
if self.sample_weight_col:
|
656
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
657
|
+
# if the dimension of inferred output column names is correct; use it
|
658
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
659
|
+
return expected_output_cols_list
|
660
|
+
# otherwise, use the sklearn estimator's output
|
661
|
+
else:
|
662
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
597
663
|
|
598
664
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
599
665
|
@telemetry.send_api_usage_telemetry(
|
@@ -625,24 +691,26 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
625
691
|
# are specific to the type of dataset used.
|
626
692
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
627
693
|
|
694
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
695
|
+
|
628
696
|
if isinstance(dataset, DataFrame):
|
629
|
-
self.
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
697
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
698
|
+
self._deps = self._get_dependencies()
|
699
|
+
assert isinstance(
|
700
|
+
dataset._session, Session
|
701
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
634
702
|
transform_kwargs = dict(
|
635
703
|
session=dataset._session,
|
636
704
|
dependencies=self._deps,
|
637
|
-
drop_input_cols
|
705
|
+
drop_input_cols=self._drop_input_cols,
|
638
706
|
expected_output_cols_type="float",
|
639
707
|
)
|
708
|
+
expected_output_cols = self._align_expected_output_names(
|
709
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
710
|
+
)
|
640
711
|
|
641
712
|
elif isinstance(dataset, pd.DataFrame):
|
642
|
-
transform_kwargs = dict(
|
643
|
-
snowpark_input_cols = self._snowpark_cols,
|
644
|
-
drop_input_cols = self._drop_input_cols
|
645
|
-
)
|
713
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
646
714
|
|
647
715
|
transform_handlers = ModelTransformerBuilder.build(
|
648
716
|
dataset=dataset,
|
@@ -654,7 +722,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
654
722
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
655
723
|
inference_method=inference_method,
|
656
724
|
input_cols=self.input_cols,
|
657
|
-
expected_output_cols=
|
725
|
+
expected_output_cols=expected_output_cols,
|
658
726
|
**transform_kwargs
|
659
727
|
)
|
660
728
|
return output_df
|
@@ -684,29 +752,30 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
684
752
|
Output dataset with log probability of the sample for each class in the model.
|
685
753
|
"""
|
686
754
|
super()._check_dataset_type(dataset)
|
687
|
-
inference_method="predict_log_proba"
|
755
|
+
inference_method = "predict_log_proba"
|
756
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
688
757
|
|
689
758
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
690
759
|
# are specific to the type of dataset used.
|
691
760
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
692
761
|
|
693
762
|
if isinstance(dataset, DataFrame):
|
694
|
-
self.
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
763
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
764
|
+
self._deps = self._get_dependencies()
|
765
|
+
assert isinstance(
|
766
|
+
dataset._session, Session
|
767
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
699
768
|
transform_kwargs = dict(
|
700
769
|
session=dataset._session,
|
701
770
|
dependencies=self._deps,
|
702
|
-
drop_input_cols
|
771
|
+
drop_input_cols=self._drop_input_cols,
|
703
772
|
expected_output_cols_type="float",
|
704
773
|
)
|
774
|
+
expected_output_cols = self._align_expected_output_names(
|
775
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
776
|
+
)
|
705
777
|
elif isinstance(dataset, pd.DataFrame):
|
706
|
-
transform_kwargs = dict(
|
707
|
-
snowpark_input_cols = self._snowpark_cols,
|
708
|
-
drop_input_cols = self._drop_input_cols
|
709
|
-
)
|
778
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
710
779
|
|
711
780
|
transform_handlers = ModelTransformerBuilder.build(
|
712
781
|
dataset=dataset,
|
@@ -719,7 +788,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
719
788
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
720
789
|
inference_method=inference_method,
|
721
790
|
input_cols=self.input_cols,
|
722
|
-
expected_output_cols=
|
791
|
+
expected_output_cols=expected_output_cols,
|
723
792
|
**transform_kwargs
|
724
793
|
)
|
725
794
|
return output_df
|
@@ -745,30 +814,32 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
745
814
|
Output dataset with results of the decision function for the samples in input dataset.
|
746
815
|
"""
|
747
816
|
super()._check_dataset_type(dataset)
|
748
|
-
inference_method="decision_function"
|
817
|
+
inference_method = "decision_function"
|
749
818
|
|
750
819
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
751
820
|
# are specific to the type of dataset used.
|
752
821
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
753
822
|
|
823
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
824
|
+
|
754
825
|
if isinstance(dataset, DataFrame):
|
755
|
-
self.
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
826
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
827
|
+
self._deps = self._get_dependencies()
|
828
|
+
assert isinstance(
|
829
|
+
dataset._session, Session
|
830
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
760
831
|
transform_kwargs = dict(
|
761
832
|
session=dataset._session,
|
762
833
|
dependencies=self._deps,
|
763
|
-
drop_input_cols
|
834
|
+
drop_input_cols=self._drop_input_cols,
|
764
835
|
expected_output_cols_type="float",
|
765
836
|
)
|
837
|
+
expected_output_cols = self._align_expected_output_names(
|
838
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
839
|
+
)
|
766
840
|
|
767
841
|
elif isinstance(dataset, pd.DataFrame):
|
768
|
-
transform_kwargs = dict(
|
769
|
-
snowpark_input_cols = self._snowpark_cols,
|
770
|
-
drop_input_cols = self._drop_input_cols
|
771
|
-
)
|
842
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
772
843
|
|
773
844
|
transform_handlers = ModelTransformerBuilder.build(
|
774
845
|
dataset=dataset,
|
@@ -781,7 +852,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
781
852
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
782
853
|
inference_method=inference_method,
|
783
854
|
input_cols=self.input_cols,
|
784
|
-
expected_output_cols=
|
855
|
+
expected_output_cols=expected_output_cols,
|
785
856
|
**transform_kwargs
|
786
857
|
)
|
787
858
|
return output_df
|
@@ -810,17 +881,17 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
810
881
|
Output dataset with probability of the sample for each class in the model.
|
811
882
|
"""
|
812
883
|
super()._check_dataset_type(dataset)
|
813
|
-
inference_method="score_samples"
|
884
|
+
inference_method = "score_samples"
|
814
885
|
|
815
886
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
816
887
|
# are specific to the type of dataset used.
|
817
888
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
818
889
|
|
890
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
891
|
+
|
819
892
|
if isinstance(dataset, DataFrame):
|
820
|
-
self.
|
821
|
-
|
822
|
-
inference_method=inference_method,
|
823
|
-
)
|
893
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
894
|
+
self._deps = self._get_dependencies()
|
824
895
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
825
896
|
transform_kwargs = dict(
|
826
897
|
session=dataset._session,
|
@@ -828,6 +899,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
828
899
|
drop_input_cols = self._drop_input_cols,
|
829
900
|
expected_output_cols_type="float",
|
830
901
|
)
|
902
|
+
expected_output_cols = self._align_expected_output_names(
|
903
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
904
|
+
)
|
831
905
|
|
832
906
|
elif isinstance(dataset, pd.DataFrame):
|
833
907
|
transform_kwargs = dict(
|
@@ -846,7 +920,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
846
920
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
847
921
|
inference_method=inference_method,
|
848
922
|
input_cols=self.input_cols,
|
849
|
-
expected_output_cols=
|
923
|
+
expected_output_cols=expected_output_cols,
|
850
924
|
**transform_kwargs
|
851
925
|
)
|
852
926
|
return output_df
|
@@ -881,17 +955,15 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
881
955
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
882
956
|
|
883
957
|
if isinstance(dataset, DataFrame):
|
884
|
-
self.
|
885
|
-
|
886
|
-
inference_method="score",
|
887
|
-
)
|
958
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
959
|
+
self._deps = self._get_dependencies()
|
888
960
|
selected_cols = self._get_active_columns()
|
889
961
|
if len(selected_cols) > 0:
|
890
962
|
dataset = dataset.select(selected_cols)
|
891
963
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
892
964
|
transform_kwargs = dict(
|
893
965
|
session=dataset._session,
|
894
|
-
dependencies=
|
966
|
+
dependencies=self._deps,
|
895
967
|
score_sproc_imports=['sklearn'],
|
896
968
|
)
|
897
969
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -956,11 +1028,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
956
1028
|
|
957
1029
|
if isinstance(dataset, DataFrame):
|
958
1030
|
|
959
|
-
self.
|
960
|
-
|
961
|
-
inference_method=inference_method,
|
962
|
-
|
963
|
-
)
|
1031
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1032
|
+
self._deps = self._get_dependencies()
|
964
1033
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
965
1034
|
transform_kwargs = dict(
|
966
1035
|
session = dataset._session,
|
@@ -993,50 +1062,84 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
993
1062
|
)
|
994
1063
|
return output_df
|
995
1064
|
|
1065
|
+
|
1066
|
+
|
1067
|
+
def to_sklearn(self) -> Any:
|
1068
|
+
"""Get sklearn.linear_model.PassiveAggressiveRegressor object.
|
1069
|
+
"""
|
1070
|
+
if self._sklearn_object is None:
|
1071
|
+
self._sklearn_object = self._create_sklearn_object()
|
1072
|
+
return self._sklearn_object
|
1073
|
+
|
1074
|
+
def to_xgboost(self) -> Any:
|
1075
|
+
raise exceptions.SnowflakeMLException(
|
1076
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1077
|
+
original_exception=AttributeError(
|
1078
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1079
|
+
"to_xgboost()",
|
1080
|
+
"to_sklearn()"
|
1081
|
+
)
|
1082
|
+
),
|
1083
|
+
)
|
1084
|
+
|
1085
|
+
def to_lightgbm(self) -> Any:
|
1086
|
+
raise exceptions.SnowflakeMLException(
|
1087
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1088
|
+
original_exception=AttributeError(
|
1089
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1090
|
+
"to_lightgbm()",
|
1091
|
+
"to_sklearn()"
|
1092
|
+
)
|
1093
|
+
),
|
1094
|
+
)
|
1095
|
+
|
1096
|
+
def _get_dependencies(self) -> List[str]:
|
1097
|
+
return self._deps
|
1098
|
+
|
996
1099
|
|
997
|
-
def
|
1100
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
998
1101
|
self._model_signature_dict = dict()
|
999
1102
|
|
1000
1103
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1001
1104
|
|
1002
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1105
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1003
1106
|
outputs: List[BaseFeatureSpec] = []
|
1004
1107
|
if hasattr(self, "predict"):
|
1005
1108
|
# keep mypy happy
|
1006
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1109
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1007
1110
|
# For classifier, the type of predict is the same as the type of label
|
1008
|
-
if self._sklearn_object._estimator_type ==
|
1009
|
-
|
1111
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1112
|
+
# label columns is the desired type for output
|
1010
1113
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1011
1114
|
# rename the output columns
|
1012
1115
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1013
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1014
|
-
|
1015
|
-
|
1116
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1117
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1118
|
+
)
|
1016
1119
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1017
1120
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1018
|
-
# Clusterer returns int64 cluster labels.
|
1121
|
+
# Clusterer returns int64 cluster labels.
|
1019
1122
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1020
1123
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1021
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1124
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1125
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1126
|
+
)
|
1127
|
+
|
1025
1128
|
# For regressor, the type of predict is float64
|
1026
|
-
elif self._sklearn_object._estimator_type ==
|
1129
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1027
1130
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1028
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1131
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1132
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1133
|
+
)
|
1134
|
+
|
1032
1135
|
for prob_func in PROB_FUNCTIONS:
|
1033
1136
|
if hasattr(self, prob_func):
|
1034
1137
|
output_cols_prefix: str = f"{prob_func}_"
|
1035
1138
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1036
1139
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1037
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1038
|
-
|
1039
|
-
|
1140
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1141
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1142
|
+
)
|
1040
1143
|
|
1041
1144
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1042
1145
|
items = list(self._model_signature_dict.items())
|
@@ -1049,10 +1152,10 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1049
1152
|
"""Returns model signature of current class.
|
1050
1153
|
|
1051
1154
|
Raises:
|
1052
|
-
|
1155
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1053
1156
|
|
1054
1157
|
Returns:
|
1055
|
-
Dict
|
1158
|
+
Dict with each method and its input output signature
|
1056
1159
|
"""
|
1057
1160
|
if self._model_signature_dict is None:
|
1058
1161
|
raise exceptions.SnowflakeMLException(
|
@@ -1060,35 +1163,3 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
1060
1163
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1061
1164
|
)
|
1062
1165
|
return self._model_signature_dict
|
1063
|
-
|
1064
|
-
def to_sklearn(self) -> Any:
|
1065
|
-
"""Get sklearn.linear_model.PassiveAggressiveRegressor object.
|
1066
|
-
"""
|
1067
|
-
if self._sklearn_object is None:
|
1068
|
-
self._sklearn_object = self._create_sklearn_object()
|
1069
|
-
return self._sklearn_object
|
1070
|
-
|
1071
|
-
def to_xgboost(self) -> Any:
|
1072
|
-
raise exceptions.SnowflakeMLException(
|
1073
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1074
|
-
original_exception=AttributeError(
|
1075
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1076
|
-
"to_xgboost()",
|
1077
|
-
"to_sklearn()"
|
1078
|
-
)
|
1079
|
-
),
|
1080
|
-
)
|
1081
|
-
|
1082
|
-
def to_lightgbm(self) -> Any:
|
1083
|
-
raise exceptions.SnowflakeMLException(
|
1084
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1085
|
-
original_exception=AttributeError(
|
1086
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1087
|
-
"to_lightgbm()",
|
1088
|
-
"to_sklearn()"
|
1089
|
-
)
|
1090
|
-
),
|
1091
|
-
)
|
1092
|
-
|
1093
|
-
def _get_dependencies(self) -> List[str]:
|
1094
|
-
return self._deps
|