snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class SGDRegressor(BaseTransformer):
|
71
64
|
r"""Linear model fitted by minimizing a regularized empirical loss with SGD
|
72
65
|
For more details on this class, see [sklearn.linear_model.SGDRegressor]
|
@@ -354,12 +347,7 @@ class SGDRegressor(BaseTransformer):
|
|
354
347
|
)
|
355
348
|
return selected_cols
|
356
349
|
|
357
|
-
|
358
|
-
project=_PROJECT,
|
359
|
-
subproject=_SUBPROJECT,
|
360
|
-
custom_tags=dict([("autogen", True)]),
|
361
|
-
)
|
362
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "SGDRegressor":
|
350
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "SGDRegressor":
|
363
351
|
"""Fit linear model with Stochastic Gradient Descent
|
364
352
|
For more details on this function, see [sklearn.linear_model.SGDRegressor.fit]
|
365
353
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html#sklearn.linear_model.SGDRegressor.fit)
|
@@ -386,12 +374,14 @@ class SGDRegressor(BaseTransformer):
|
|
386
374
|
|
387
375
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
388
376
|
|
389
|
-
|
377
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
390
378
|
if SNOWML_SPROC_ENV in os.environ:
|
391
379
|
statement_params = telemetry.get_function_usage_statement_params(
|
392
380
|
project=_PROJECT,
|
393
381
|
subproject=_SUBPROJECT,
|
394
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
382
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
383
|
+
inspect.currentframe(), SGDRegressor.__class__.__name__
|
384
|
+
),
|
395
385
|
api_calls=[Session.call],
|
396
386
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
397
387
|
)
|
@@ -412,27 +402,24 @@ class SGDRegressor(BaseTransformer):
|
|
412
402
|
)
|
413
403
|
self._sklearn_object = model_trainer.train()
|
414
404
|
self._is_fitted = True
|
415
|
-
self.
|
405
|
+
self._generate_model_signatures(dataset)
|
416
406
|
return self
|
417
407
|
|
418
408
|
def _batch_inference_validate_snowpark(
|
419
409
|
self,
|
420
410
|
dataset: DataFrame,
|
421
411
|
inference_method: str,
|
422
|
-
) ->
|
423
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
424
|
-
return the available package that exists in the snowflake anaconda channel
|
412
|
+
) -> None:
|
413
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
425
414
|
|
426
415
|
Args:
|
427
416
|
dataset: snowpark dataframe
|
428
417
|
inference_method: the inference method such as predict, score...
|
429
|
-
|
418
|
+
|
430
419
|
Raises:
|
431
420
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
432
421
|
SnowflakeMLException: If the session is None, raise error
|
433
422
|
|
434
|
-
Returns:
|
435
|
-
A list of available package that exists in the snowflake anaconda channel
|
436
423
|
"""
|
437
424
|
if not self._is_fitted:
|
438
425
|
raise exceptions.SnowflakeMLException(
|
@@ -450,9 +437,7 @@ class SGDRegressor(BaseTransformer):
|
|
450
437
|
"Session must not specified for snowpark dataset."
|
451
438
|
),
|
452
439
|
)
|
453
|
-
|
454
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
455
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
440
|
+
|
456
441
|
|
457
442
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
458
443
|
@telemetry.send_api_usage_telemetry(
|
@@ -488,7 +473,9 @@ class SGDRegressor(BaseTransformer):
|
|
488
473
|
# when it is classifier, infer the datatype from label columns
|
489
474
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
490
475
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
491
|
-
label_cols_signatures = [
|
476
|
+
label_cols_signatures = [
|
477
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
478
|
+
]
|
492
479
|
if len(label_cols_signatures) == 0:
|
493
480
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
494
481
|
raise exceptions.SnowflakeMLException(
|
@@ -496,25 +483,23 @@ class SGDRegressor(BaseTransformer):
|
|
496
483
|
original_exception=ValueError(error_str),
|
497
484
|
)
|
498
485
|
|
499
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
500
|
-
label_cols_signatures[0].as_snowpark_type()
|
501
|
-
)
|
486
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
502
487
|
|
503
|
-
self.
|
504
|
-
|
488
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
489
|
+
self._deps = self._get_dependencies()
|
490
|
+
assert isinstance(
|
491
|
+
dataset._session, Session
|
492
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
505
493
|
|
506
494
|
transform_kwargs = dict(
|
507
|
-
session
|
508
|
-
dependencies
|
509
|
-
drop_input_cols
|
510
|
-
expected_output_cols_type
|
495
|
+
session=dataset._session,
|
496
|
+
dependencies=self._deps,
|
497
|
+
drop_input_cols=self._drop_input_cols,
|
498
|
+
expected_output_cols_type=expected_type_inferred,
|
511
499
|
)
|
512
500
|
|
513
501
|
elif isinstance(dataset, pd.DataFrame):
|
514
|
-
transform_kwargs = dict(
|
515
|
-
snowpark_input_cols = self._snowpark_cols,
|
516
|
-
drop_input_cols = self._drop_input_cols
|
517
|
-
)
|
502
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
518
503
|
|
519
504
|
transform_handlers = ModelTransformerBuilder.build(
|
520
505
|
dataset=dataset,
|
@@ -554,7 +539,7 @@ class SGDRegressor(BaseTransformer):
|
|
554
539
|
Transformed dataset.
|
555
540
|
"""
|
556
541
|
super()._check_dataset_type(dataset)
|
557
|
-
inference_method="transform"
|
542
|
+
inference_method = "transform"
|
558
543
|
|
559
544
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
560
545
|
# are specific to the type of dataset used.
|
@@ -584,24 +569,19 @@ class SGDRegressor(BaseTransformer):
|
|
584
569
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
585
570
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
586
571
|
|
587
|
-
self.
|
588
|
-
|
589
|
-
inference_method=inference_method,
|
590
|
-
)
|
572
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
573
|
+
self._deps = self._get_dependencies()
|
591
574
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
592
575
|
|
593
576
|
transform_kwargs = dict(
|
594
|
-
session
|
595
|
-
dependencies
|
596
|
-
drop_input_cols
|
597
|
-
expected_output_cols_type
|
577
|
+
session=dataset._session,
|
578
|
+
dependencies=self._deps,
|
579
|
+
drop_input_cols=self._drop_input_cols,
|
580
|
+
expected_output_cols_type=expected_dtype,
|
598
581
|
)
|
599
582
|
|
600
583
|
elif isinstance(dataset, pd.DataFrame):
|
601
|
-
transform_kwargs = dict(
|
602
|
-
snowpark_input_cols = self._snowpark_cols,
|
603
|
-
drop_input_cols = self._drop_input_cols
|
604
|
-
)
|
584
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
605
585
|
|
606
586
|
transform_handlers = ModelTransformerBuilder.build(
|
607
587
|
dataset=dataset,
|
@@ -620,7 +600,11 @@ class SGDRegressor(BaseTransformer):
|
|
620
600
|
return output_df
|
621
601
|
|
622
602
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
623
|
-
def fit_predict(
|
603
|
+
def fit_predict(
|
604
|
+
self,
|
605
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
606
|
+
output_cols_prefix: str = "fit_predict_",
|
607
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
624
608
|
""" Method not supported for this class.
|
625
609
|
|
626
610
|
|
@@ -645,22 +629,104 @@ class SGDRegressor(BaseTransformer):
|
|
645
629
|
)
|
646
630
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
647
631
|
drop_input_cols=self._drop_input_cols,
|
648
|
-
expected_output_cols_list=
|
632
|
+
expected_output_cols_list=(
|
633
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
634
|
+
),
|
649
635
|
)
|
650
636
|
self._sklearn_object = fitted_estimator
|
651
637
|
self._is_fitted = True
|
652
638
|
return output_result
|
653
639
|
|
640
|
+
|
641
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
642
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
643
|
+
""" Method not supported for this class.
|
644
|
+
|
654
645
|
|
655
|
-
|
656
|
-
|
657
|
-
|
646
|
+
Raises:
|
647
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
648
|
+
|
649
|
+
Args:
|
650
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
651
|
+
Snowpark or Pandas DataFrame.
|
652
|
+
output_cols_prefix: Prefix for the response columns
|
658
653
|
Returns:
|
659
654
|
Transformed dataset.
|
660
655
|
"""
|
661
|
-
self.
|
662
|
-
|
663
|
-
|
656
|
+
self._infer_input_output_cols(dataset)
|
657
|
+
super()._check_dataset_type(dataset)
|
658
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
659
|
+
estimator=self._sklearn_object,
|
660
|
+
dataset=dataset,
|
661
|
+
input_cols=self.input_cols,
|
662
|
+
label_cols=self.label_cols,
|
663
|
+
sample_weight_col=self.sample_weight_col,
|
664
|
+
autogenerated=self._autogenerated,
|
665
|
+
subproject=_SUBPROJECT,
|
666
|
+
)
|
667
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
668
|
+
drop_input_cols=self._drop_input_cols,
|
669
|
+
expected_output_cols_list=self.output_cols,
|
670
|
+
)
|
671
|
+
self._sklearn_object = fitted_estimator
|
672
|
+
self._is_fitted = True
|
673
|
+
return output_result
|
674
|
+
|
675
|
+
|
676
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
677
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
678
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
679
|
+
"""
|
680
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
681
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
682
|
+
if output_cols:
|
683
|
+
output_cols = [
|
684
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
685
|
+
for c in output_cols
|
686
|
+
]
|
687
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
688
|
+
output_cols = [output_cols_prefix]
|
689
|
+
elif self._sklearn_object is not None:
|
690
|
+
classes = self._sklearn_object.classes_
|
691
|
+
if isinstance(classes, numpy.ndarray):
|
692
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
693
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
694
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
695
|
+
output_cols = []
|
696
|
+
for i, cl in enumerate(classes):
|
697
|
+
# For binary classification, there is only one output column for each class
|
698
|
+
# ndarray as the two classes are complementary.
|
699
|
+
if len(cl) == 2:
|
700
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
701
|
+
else:
|
702
|
+
output_cols.extend([
|
703
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
704
|
+
])
|
705
|
+
else:
|
706
|
+
output_cols = []
|
707
|
+
|
708
|
+
# Make sure column names are valid snowflake identifiers.
|
709
|
+
assert output_cols is not None # Make MyPy happy
|
710
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
711
|
+
|
712
|
+
return rv
|
713
|
+
|
714
|
+
def _align_expected_output_names(
|
715
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
716
|
+
) -> List[str]:
|
717
|
+
# in case the inferred output column names dimension is different
|
718
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
719
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
720
|
+
output_df_columns = list(output_df_pd.columns)
|
721
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
722
|
+
if self.sample_weight_col:
|
723
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
724
|
+
# if the dimension of inferred output column names is correct; use it
|
725
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
726
|
+
return expected_output_cols_list
|
727
|
+
# otherwise, use the sklearn estimator's output
|
728
|
+
else:
|
729
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
664
730
|
|
665
731
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
666
732
|
@telemetry.send_api_usage_telemetry(
|
@@ -692,24 +758,26 @@ class SGDRegressor(BaseTransformer):
|
|
692
758
|
# are specific to the type of dataset used.
|
693
759
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
694
760
|
|
761
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
762
|
+
|
695
763
|
if isinstance(dataset, DataFrame):
|
696
|
-
self.
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
764
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
765
|
+
self._deps = self._get_dependencies()
|
766
|
+
assert isinstance(
|
767
|
+
dataset._session, Session
|
768
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
701
769
|
transform_kwargs = dict(
|
702
770
|
session=dataset._session,
|
703
771
|
dependencies=self._deps,
|
704
|
-
drop_input_cols
|
772
|
+
drop_input_cols=self._drop_input_cols,
|
705
773
|
expected_output_cols_type="float",
|
706
774
|
)
|
775
|
+
expected_output_cols = self._align_expected_output_names(
|
776
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
777
|
+
)
|
707
778
|
|
708
779
|
elif isinstance(dataset, pd.DataFrame):
|
709
|
-
transform_kwargs = dict(
|
710
|
-
snowpark_input_cols = self._snowpark_cols,
|
711
|
-
drop_input_cols = self._drop_input_cols
|
712
|
-
)
|
780
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
713
781
|
|
714
782
|
transform_handlers = ModelTransformerBuilder.build(
|
715
783
|
dataset=dataset,
|
@@ -721,7 +789,7 @@ class SGDRegressor(BaseTransformer):
|
|
721
789
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
722
790
|
inference_method=inference_method,
|
723
791
|
input_cols=self.input_cols,
|
724
|
-
expected_output_cols=
|
792
|
+
expected_output_cols=expected_output_cols,
|
725
793
|
**transform_kwargs
|
726
794
|
)
|
727
795
|
return output_df
|
@@ -751,29 +819,30 @@ class SGDRegressor(BaseTransformer):
|
|
751
819
|
Output dataset with log probability of the sample for each class in the model.
|
752
820
|
"""
|
753
821
|
super()._check_dataset_type(dataset)
|
754
|
-
inference_method="predict_log_proba"
|
822
|
+
inference_method = "predict_log_proba"
|
823
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
755
824
|
|
756
825
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
757
826
|
# are specific to the type of dataset used.
|
758
827
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
759
828
|
|
760
829
|
if isinstance(dataset, DataFrame):
|
761
|
-
self.
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
830
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
831
|
+
self._deps = self._get_dependencies()
|
832
|
+
assert isinstance(
|
833
|
+
dataset._session, Session
|
834
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
766
835
|
transform_kwargs = dict(
|
767
836
|
session=dataset._session,
|
768
837
|
dependencies=self._deps,
|
769
|
-
drop_input_cols
|
838
|
+
drop_input_cols=self._drop_input_cols,
|
770
839
|
expected_output_cols_type="float",
|
771
840
|
)
|
841
|
+
expected_output_cols = self._align_expected_output_names(
|
842
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
843
|
+
)
|
772
844
|
elif isinstance(dataset, pd.DataFrame):
|
773
|
-
transform_kwargs = dict(
|
774
|
-
snowpark_input_cols = self._snowpark_cols,
|
775
|
-
drop_input_cols = self._drop_input_cols
|
776
|
-
)
|
845
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
777
846
|
|
778
847
|
transform_handlers = ModelTransformerBuilder.build(
|
779
848
|
dataset=dataset,
|
@@ -786,7 +855,7 @@ class SGDRegressor(BaseTransformer):
|
|
786
855
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
787
856
|
inference_method=inference_method,
|
788
857
|
input_cols=self.input_cols,
|
789
|
-
expected_output_cols=
|
858
|
+
expected_output_cols=expected_output_cols,
|
790
859
|
**transform_kwargs
|
791
860
|
)
|
792
861
|
return output_df
|
@@ -812,30 +881,32 @@ class SGDRegressor(BaseTransformer):
|
|
812
881
|
Output dataset with results of the decision function for the samples in input dataset.
|
813
882
|
"""
|
814
883
|
super()._check_dataset_type(dataset)
|
815
|
-
inference_method="decision_function"
|
884
|
+
inference_method = "decision_function"
|
816
885
|
|
817
886
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
818
887
|
# are specific to the type of dataset used.
|
819
888
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
820
889
|
|
890
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
891
|
+
|
821
892
|
if isinstance(dataset, DataFrame):
|
822
|
-
self.
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
893
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
894
|
+
self._deps = self._get_dependencies()
|
895
|
+
assert isinstance(
|
896
|
+
dataset._session, Session
|
897
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
827
898
|
transform_kwargs = dict(
|
828
899
|
session=dataset._session,
|
829
900
|
dependencies=self._deps,
|
830
|
-
drop_input_cols
|
901
|
+
drop_input_cols=self._drop_input_cols,
|
831
902
|
expected_output_cols_type="float",
|
832
903
|
)
|
904
|
+
expected_output_cols = self._align_expected_output_names(
|
905
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
906
|
+
)
|
833
907
|
|
834
908
|
elif isinstance(dataset, pd.DataFrame):
|
835
|
-
transform_kwargs = dict(
|
836
|
-
snowpark_input_cols = self._snowpark_cols,
|
837
|
-
drop_input_cols = self._drop_input_cols
|
838
|
-
)
|
909
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
839
910
|
|
840
911
|
transform_handlers = ModelTransformerBuilder.build(
|
841
912
|
dataset=dataset,
|
@@ -848,7 +919,7 @@ class SGDRegressor(BaseTransformer):
|
|
848
919
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
849
920
|
inference_method=inference_method,
|
850
921
|
input_cols=self.input_cols,
|
851
|
-
expected_output_cols=
|
922
|
+
expected_output_cols=expected_output_cols,
|
852
923
|
**transform_kwargs
|
853
924
|
)
|
854
925
|
return output_df
|
@@ -877,17 +948,17 @@ class SGDRegressor(BaseTransformer):
|
|
877
948
|
Output dataset with probability of the sample for each class in the model.
|
878
949
|
"""
|
879
950
|
super()._check_dataset_type(dataset)
|
880
|
-
inference_method="score_samples"
|
951
|
+
inference_method = "score_samples"
|
881
952
|
|
882
953
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
883
954
|
# are specific to the type of dataset used.
|
884
955
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
885
956
|
|
957
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
958
|
+
|
886
959
|
if isinstance(dataset, DataFrame):
|
887
|
-
self.
|
888
|
-
|
889
|
-
inference_method=inference_method,
|
890
|
-
)
|
960
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
961
|
+
self._deps = self._get_dependencies()
|
891
962
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
892
963
|
transform_kwargs = dict(
|
893
964
|
session=dataset._session,
|
@@ -895,6 +966,9 @@ class SGDRegressor(BaseTransformer):
|
|
895
966
|
drop_input_cols = self._drop_input_cols,
|
896
967
|
expected_output_cols_type="float",
|
897
968
|
)
|
969
|
+
expected_output_cols = self._align_expected_output_names(
|
970
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
971
|
+
)
|
898
972
|
|
899
973
|
elif isinstance(dataset, pd.DataFrame):
|
900
974
|
transform_kwargs = dict(
|
@@ -913,7 +987,7 @@ class SGDRegressor(BaseTransformer):
|
|
913
987
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
914
988
|
inference_method=inference_method,
|
915
989
|
input_cols=self.input_cols,
|
916
|
-
expected_output_cols=
|
990
|
+
expected_output_cols=expected_output_cols,
|
917
991
|
**transform_kwargs
|
918
992
|
)
|
919
993
|
return output_df
|
@@ -948,17 +1022,15 @@ class SGDRegressor(BaseTransformer):
|
|
948
1022
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
949
1023
|
|
950
1024
|
if isinstance(dataset, DataFrame):
|
951
|
-
self.
|
952
|
-
|
953
|
-
inference_method="score",
|
954
|
-
)
|
1025
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
1026
|
+
self._deps = self._get_dependencies()
|
955
1027
|
selected_cols = self._get_active_columns()
|
956
1028
|
if len(selected_cols) > 0:
|
957
1029
|
dataset = dataset.select(selected_cols)
|
958
1030
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
959
1031
|
transform_kwargs = dict(
|
960
1032
|
session=dataset._session,
|
961
|
-
dependencies=
|
1033
|
+
dependencies=self._deps,
|
962
1034
|
score_sproc_imports=['sklearn'],
|
963
1035
|
)
|
964
1036
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1023,11 +1095,8 @@ class SGDRegressor(BaseTransformer):
|
|
1023
1095
|
|
1024
1096
|
if isinstance(dataset, DataFrame):
|
1025
1097
|
|
1026
|
-
self.
|
1027
|
-
|
1028
|
-
inference_method=inference_method,
|
1029
|
-
|
1030
|
-
)
|
1098
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1099
|
+
self._deps = self._get_dependencies()
|
1031
1100
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
1032
1101
|
transform_kwargs = dict(
|
1033
1102
|
session = dataset._session,
|
@@ -1060,50 +1129,84 @@ class SGDRegressor(BaseTransformer):
|
|
1060
1129
|
)
|
1061
1130
|
return output_df
|
1062
1131
|
|
1132
|
+
|
1133
|
+
|
1134
|
+
def to_sklearn(self) -> Any:
|
1135
|
+
"""Get sklearn.linear_model.SGDRegressor object.
|
1136
|
+
"""
|
1137
|
+
if self._sklearn_object is None:
|
1138
|
+
self._sklearn_object = self._create_sklearn_object()
|
1139
|
+
return self._sklearn_object
|
1140
|
+
|
1141
|
+
def to_xgboost(self) -> Any:
|
1142
|
+
raise exceptions.SnowflakeMLException(
|
1143
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1144
|
+
original_exception=AttributeError(
|
1145
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1146
|
+
"to_xgboost()",
|
1147
|
+
"to_sklearn()"
|
1148
|
+
)
|
1149
|
+
),
|
1150
|
+
)
|
1151
|
+
|
1152
|
+
def to_lightgbm(self) -> Any:
|
1153
|
+
raise exceptions.SnowflakeMLException(
|
1154
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1155
|
+
original_exception=AttributeError(
|
1156
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1157
|
+
"to_lightgbm()",
|
1158
|
+
"to_sklearn()"
|
1159
|
+
)
|
1160
|
+
),
|
1161
|
+
)
|
1162
|
+
|
1163
|
+
def _get_dependencies(self) -> List[str]:
|
1164
|
+
return self._deps
|
1165
|
+
|
1063
1166
|
|
1064
|
-
def
|
1167
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1065
1168
|
self._model_signature_dict = dict()
|
1066
1169
|
|
1067
1170
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1068
1171
|
|
1069
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1172
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1070
1173
|
outputs: List[BaseFeatureSpec] = []
|
1071
1174
|
if hasattr(self, "predict"):
|
1072
1175
|
# keep mypy happy
|
1073
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1176
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1074
1177
|
# For classifier, the type of predict is the same as the type of label
|
1075
|
-
if self._sklearn_object._estimator_type ==
|
1076
|
-
|
1178
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1179
|
+
# label columns is the desired type for output
|
1077
1180
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1078
1181
|
# rename the output columns
|
1079
1182
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1080
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1081
|
-
|
1082
|
-
|
1183
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1184
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1185
|
+
)
|
1083
1186
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1084
1187
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1085
|
-
# Clusterer returns int64 cluster labels.
|
1188
|
+
# Clusterer returns int64 cluster labels.
|
1086
1189
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1087
1190
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1088
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1191
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1192
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1193
|
+
)
|
1194
|
+
|
1092
1195
|
# For regressor, the type of predict is float64
|
1093
|
-
elif self._sklearn_object._estimator_type ==
|
1196
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1094
1197
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1095
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1198
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1199
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1200
|
+
)
|
1201
|
+
|
1099
1202
|
for prob_func in PROB_FUNCTIONS:
|
1100
1203
|
if hasattr(self, prob_func):
|
1101
1204
|
output_cols_prefix: str = f"{prob_func}_"
|
1102
1205
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1103
1206
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1104
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1105
|
-
|
1106
|
-
|
1207
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1208
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1209
|
+
)
|
1107
1210
|
|
1108
1211
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1109
1212
|
items = list(self._model_signature_dict.items())
|
@@ -1116,10 +1219,10 @@ class SGDRegressor(BaseTransformer):
|
|
1116
1219
|
"""Returns model signature of current class.
|
1117
1220
|
|
1118
1221
|
Raises:
|
1119
|
-
|
1222
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1120
1223
|
|
1121
1224
|
Returns:
|
1122
|
-
Dict
|
1225
|
+
Dict with each method and its input output signature
|
1123
1226
|
"""
|
1124
1227
|
if self._model_signature_dict is None:
|
1125
1228
|
raise exceptions.SnowflakeMLException(
|
@@ -1127,35 +1230,3 @@ class SGDRegressor(BaseTransformer):
|
|
1127
1230
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1128
1231
|
)
|
1129
1232
|
return self._model_signature_dict
|
1130
|
-
|
1131
|
-
def to_sklearn(self) -> Any:
|
1132
|
-
"""Get sklearn.linear_model.SGDRegressor object.
|
1133
|
-
"""
|
1134
|
-
if self._sklearn_object is None:
|
1135
|
-
self._sklearn_object = self._create_sklearn_object()
|
1136
|
-
return self._sklearn_object
|
1137
|
-
|
1138
|
-
def to_xgboost(self) -> Any:
|
1139
|
-
raise exceptions.SnowflakeMLException(
|
1140
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1141
|
-
original_exception=AttributeError(
|
1142
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1143
|
-
"to_xgboost()",
|
1144
|
-
"to_sklearn()"
|
1145
|
-
)
|
1146
|
-
),
|
1147
|
-
)
|
1148
|
-
|
1149
|
-
def to_lightgbm(self) -> Any:
|
1150
|
-
raise exceptions.SnowflakeMLException(
|
1151
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1152
|
-
original_exception=AttributeError(
|
1153
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1154
|
-
"to_lightgbm()",
|
1155
|
-
"to_sklearn()"
|
1156
|
-
)
|
1157
|
-
),
|
1158
|
-
)
|
1159
|
-
|
1160
|
-
def _get_dependencies(self) -> List[str]:
|
1161
|
-
return self._deps
|