snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -297,12 +296,7 @@ class Ridge(BaseTransformer):
|
|
297
296
|
)
|
298
297
|
return selected_cols
|
299
298
|
|
300
|
-
|
301
|
-
project=_PROJECT,
|
302
|
-
subproject=_SUBPROJECT,
|
303
|
-
custom_tags=dict([("autogen", True)]),
|
304
|
-
)
|
305
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Ridge":
|
299
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Ridge":
|
306
300
|
"""Fit Ridge regression model
|
307
301
|
For more details on this function, see [sklearn.linear_model.Ridge.fit]
|
308
302
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge.fit)
|
@@ -329,12 +323,14 @@ class Ridge(BaseTransformer):
|
|
329
323
|
|
330
324
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
331
325
|
|
332
|
-
|
326
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
333
327
|
if SNOWML_SPROC_ENV in os.environ:
|
334
328
|
statement_params = telemetry.get_function_usage_statement_params(
|
335
329
|
project=_PROJECT,
|
336
330
|
subproject=_SUBPROJECT,
|
337
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
331
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
332
|
+
inspect.currentframe(), Ridge.__class__.__name__
|
333
|
+
),
|
338
334
|
api_calls=[Session.call],
|
339
335
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
340
336
|
)
|
@@ -355,7 +351,7 @@ class Ridge(BaseTransformer):
|
|
355
351
|
)
|
356
352
|
self._sklearn_object = model_trainer.train()
|
357
353
|
self._is_fitted = True
|
358
|
-
self.
|
354
|
+
self._generate_model_signatures(dataset)
|
359
355
|
return self
|
360
356
|
|
361
357
|
def _batch_inference_validate_snowpark(
|
@@ -431,7 +427,9 @@ class Ridge(BaseTransformer):
|
|
431
427
|
# when it is classifier, infer the datatype from label columns
|
432
428
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
433
429
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
434
|
-
label_cols_signatures = [
|
430
|
+
label_cols_signatures = [
|
431
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
432
|
+
]
|
435
433
|
if len(label_cols_signatures) == 0:
|
436
434
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
437
435
|
raise exceptions.SnowflakeMLException(
|
@@ -439,25 +437,22 @@ class Ridge(BaseTransformer):
|
|
439
437
|
original_exception=ValueError(error_str),
|
440
438
|
)
|
441
439
|
|
442
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
443
|
-
label_cols_signatures[0].as_snowpark_type()
|
444
|
-
)
|
440
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
445
441
|
|
446
442
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
447
|
-
assert isinstance(
|
443
|
+
assert isinstance(
|
444
|
+
dataset._session, Session
|
445
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
448
446
|
|
449
447
|
transform_kwargs = dict(
|
450
|
-
session
|
451
|
-
dependencies
|
452
|
-
drop_input_cols
|
453
|
-
expected_output_cols_type
|
448
|
+
session=dataset._session,
|
449
|
+
dependencies=self._deps,
|
450
|
+
drop_input_cols=self._drop_input_cols,
|
451
|
+
expected_output_cols_type=expected_type_inferred,
|
454
452
|
)
|
455
453
|
|
456
454
|
elif isinstance(dataset, pd.DataFrame):
|
457
|
-
transform_kwargs = dict(
|
458
|
-
snowpark_input_cols = self._snowpark_cols,
|
459
|
-
drop_input_cols = self._drop_input_cols
|
460
|
-
)
|
455
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
461
456
|
|
462
457
|
transform_handlers = ModelTransformerBuilder.build(
|
463
458
|
dataset=dataset,
|
@@ -497,7 +492,7 @@ class Ridge(BaseTransformer):
|
|
497
492
|
Transformed dataset.
|
498
493
|
"""
|
499
494
|
super()._check_dataset_type(dataset)
|
500
|
-
inference_method="transform"
|
495
|
+
inference_method = "transform"
|
501
496
|
|
502
497
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
503
498
|
# are specific to the type of dataset used.
|
@@ -534,17 +529,14 @@ class Ridge(BaseTransformer):
|
|
534
529
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
535
530
|
|
536
531
|
transform_kwargs = dict(
|
537
|
-
session
|
538
|
-
dependencies
|
539
|
-
drop_input_cols
|
540
|
-
expected_output_cols_type
|
532
|
+
session=dataset._session,
|
533
|
+
dependencies=self._deps,
|
534
|
+
drop_input_cols=self._drop_input_cols,
|
535
|
+
expected_output_cols_type=expected_dtype,
|
541
536
|
)
|
542
537
|
|
543
538
|
elif isinstance(dataset, pd.DataFrame):
|
544
|
-
transform_kwargs = dict(
|
545
|
-
snowpark_input_cols = self._snowpark_cols,
|
546
|
-
drop_input_cols = self._drop_input_cols
|
547
|
-
)
|
539
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
548
540
|
|
549
541
|
transform_handlers = ModelTransformerBuilder.build(
|
550
542
|
dataset=dataset,
|
@@ -563,7 +555,11 @@ class Ridge(BaseTransformer):
|
|
563
555
|
return output_df
|
564
556
|
|
565
557
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
566
|
-
def fit_predict(
|
558
|
+
def fit_predict(
|
559
|
+
self,
|
560
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
561
|
+
output_cols_prefix: str = "fit_predict_",
|
562
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
567
563
|
""" Method not supported for this class.
|
568
564
|
|
569
565
|
|
@@ -588,7 +584,9 @@ class Ridge(BaseTransformer):
|
|
588
584
|
)
|
589
585
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
590
586
|
drop_input_cols=self._drop_input_cols,
|
591
|
-
expected_output_cols_list=
|
587
|
+
expected_output_cols_list=(
|
588
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
589
|
+
),
|
592
590
|
)
|
593
591
|
self._sklearn_object = fitted_estimator
|
594
592
|
self._is_fitted = True
|
@@ -605,6 +603,62 @@ class Ridge(BaseTransformer):
|
|
605
603
|
assert self._sklearn_object is not None
|
606
604
|
return self._sklearn_object.embedding_
|
607
605
|
|
606
|
+
|
607
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
608
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
609
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
610
|
+
"""
|
611
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
612
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
613
|
+
if output_cols:
|
614
|
+
output_cols = [
|
615
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
616
|
+
for c in output_cols
|
617
|
+
]
|
618
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
619
|
+
output_cols = [output_cols_prefix]
|
620
|
+
elif self._sklearn_object is not None:
|
621
|
+
classes = self._sklearn_object.classes_
|
622
|
+
if isinstance(classes, numpy.ndarray):
|
623
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
624
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
625
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
626
|
+
output_cols = []
|
627
|
+
for i, cl in enumerate(classes):
|
628
|
+
# For binary classification, there is only one output column for each class
|
629
|
+
# ndarray as the two classes are complementary.
|
630
|
+
if len(cl) == 2:
|
631
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
632
|
+
else:
|
633
|
+
output_cols.extend([
|
634
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
635
|
+
])
|
636
|
+
else:
|
637
|
+
output_cols = []
|
638
|
+
|
639
|
+
# Make sure column names are valid snowflake identifiers.
|
640
|
+
assert output_cols is not None # Make MyPy happy
|
641
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
642
|
+
|
643
|
+
return rv
|
644
|
+
|
645
|
+
def _align_expected_output_names(
|
646
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
647
|
+
) -> List[str]:
|
648
|
+
# in case the inferred output column names dimension is different
|
649
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
650
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
651
|
+
output_df_columns = list(output_df_pd.columns)
|
652
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
653
|
+
if self.sample_weight_col:
|
654
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
655
|
+
# if the dimension of inferred output column names is correct; use it
|
656
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
657
|
+
return expected_output_cols_list
|
658
|
+
# otherwise, use the sklearn estimator's output
|
659
|
+
else:
|
660
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
661
|
+
|
608
662
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
609
663
|
@telemetry.send_api_usage_telemetry(
|
610
664
|
project=_PROJECT,
|
@@ -635,24 +689,28 @@ class Ridge(BaseTransformer):
|
|
635
689
|
# are specific to the type of dataset used.
|
636
690
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
637
691
|
|
692
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
693
|
+
|
638
694
|
if isinstance(dataset, DataFrame):
|
639
695
|
self._deps = self._batch_inference_validate_snowpark(
|
640
696
|
dataset=dataset,
|
641
697
|
inference_method=inference_method,
|
642
698
|
)
|
643
|
-
assert isinstance(
|
699
|
+
assert isinstance(
|
700
|
+
dataset._session, Session
|
701
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
644
702
|
transform_kwargs = dict(
|
645
703
|
session=dataset._session,
|
646
704
|
dependencies=self._deps,
|
647
|
-
drop_input_cols
|
705
|
+
drop_input_cols=self._drop_input_cols,
|
648
706
|
expected_output_cols_type="float",
|
649
707
|
)
|
708
|
+
expected_output_cols = self._align_expected_output_names(
|
709
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
710
|
+
)
|
650
711
|
|
651
712
|
elif isinstance(dataset, pd.DataFrame):
|
652
|
-
transform_kwargs = dict(
|
653
|
-
snowpark_input_cols = self._snowpark_cols,
|
654
|
-
drop_input_cols = self._drop_input_cols
|
655
|
-
)
|
713
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
656
714
|
|
657
715
|
transform_handlers = ModelTransformerBuilder.build(
|
658
716
|
dataset=dataset,
|
@@ -664,7 +722,7 @@ class Ridge(BaseTransformer):
|
|
664
722
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
665
723
|
inference_method=inference_method,
|
666
724
|
input_cols=self.input_cols,
|
667
|
-
expected_output_cols=
|
725
|
+
expected_output_cols=expected_output_cols,
|
668
726
|
**transform_kwargs
|
669
727
|
)
|
670
728
|
return output_df
|
@@ -694,7 +752,8 @@ class Ridge(BaseTransformer):
|
|
694
752
|
Output dataset with log probability of the sample for each class in the model.
|
695
753
|
"""
|
696
754
|
super()._check_dataset_type(dataset)
|
697
|
-
inference_method="predict_log_proba"
|
755
|
+
inference_method = "predict_log_proba"
|
756
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
698
757
|
|
699
758
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
700
759
|
# are specific to the type of dataset used.
|
@@ -705,18 +764,20 @@ class Ridge(BaseTransformer):
|
|
705
764
|
dataset=dataset,
|
706
765
|
inference_method=inference_method,
|
707
766
|
)
|
708
|
-
assert isinstance(
|
767
|
+
assert isinstance(
|
768
|
+
dataset._session, Session
|
769
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
709
770
|
transform_kwargs = dict(
|
710
771
|
session=dataset._session,
|
711
772
|
dependencies=self._deps,
|
712
|
-
drop_input_cols
|
773
|
+
drop_input_cols=self._drop_input_cols,
|
713
774
|
expected_output_cols_type="float",
|
714
775
|
)
|
776
|
+
expected_output_cols = self._align_expected_output_names(
|
777
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
778
|
+
)
|
715
779
|
elif isinstance(dataset, pd.DataFrame):
|
716
|
-
transform_kwargs = dict(
|
717
|
-
snowpark_input_cols = self._snowpark_cols,
|
718
|
-
drop_input_cols = self._drop_input_cols
|
719
|
-
)
|
780
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
720
781
|
|
721
782
|
transform_handlers = ModelTransformerBuilder.build(
|
722
783
|
dataset=dataset,
|
@@ -729,7 +790,7 @@ class Ridge(BaseTransformer):
|
|
729
790
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
730
791
|
inference_method=inference_method,
|
731
792
|
input_cols=self.input_cols,
|
732
|
-
expected_output_cols=
|
793
|
+
expected_output_cols=expected_output_cols,
|
733
794
|
**transform_kwargs
|
734
795
|
)
|
735
796
|
return output_df
|
@@ -755,30 +816,34 @@ class Ridge(BaseTransformer):
|
|
755
816
|
Output dataset with results of the decision function for the samples in input dataset.
|
756
817
|
"""
|
757
818
|
super()._check_dataset_type(dataset)
|
758
|
-
inference_method="decision_function"
|
819
|
+
inference_method = "decision_function"
|
759
820
|
|
760
821
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
761
822
|
# are specific to the type of dataset used.
|
762
823
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
763
824
|
|
825
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
826
|
+
|
764
827
|
if isinstance(dataset, DataFrame):
|
765
828
|
self._deps = self._batch_inference_validate_snowpark(
|
766
829
|
dataset=dataset,
|
767
830
|
inference_method=inference_method,
|
768
831
|
)
|
769
|
-
assert isinstance(
|
832
|
+
assert isinstance(
|
833
|
+
dataset._session, Session
|
834
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
770
835
|
transform_kwargs = dict(
|
771
836
|
session=dataset._session,
|
772
837
|
dependencies=self._deps,
|
773
|
-
drop_input_cols
|
838
|
+
drop_input_cols=self._drop_input_cols,
|
774
839
|
expected_output_cols_type="float",
|
775
840
|
)
|
841
|
+
expected_output_cols = self._align_expected_output_names(
|
842
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
843
|
+
)
|
776
844
|
|
777
845
|
elif isinstance(dataset, pd.DataFrame):
|
778
|
-
transform_kwargs = dict(
|
779
|
-
snowpark_input_cols = self._snowpark_cols,
|
780
|
-
drop_input_cols = self._drop_input_cols
|
781
|
-
)
|
846
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
782
847
|
|
783
848
|
transform_handlers = ModelTransformerBuilder.build(
|
784
849
|
dataset=dataset,
|
@@ -791,7 +856,7 @@ class Ridge(BaseTransformer):
|
|
791
856
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
792
857
|
inference_method=inference_method,
|
793
858
|
input_cols=self.input_cols,
|
794
|
-
expected_output_cols=
|
859
|
+
expected_output_cols=expected_output_cols,
|
795
860
|
**transform_kwargs
|
796
861
|
)
|
797
862
|
return output_df
|
@@ -820,12 +885,14 @@ class Ridge(BaseTransformer):
|
|
820
885
|
Output dataset with probability of the sample for each class in the model.
|
821
886
|
"""
|
822
887
|
super()._check_dataset_type(dataset)
|
823
|
-
inference_method="score_samples"
|
888
|
+
inference_method = "score_samples"
|
824
889
|
|
825
890
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
826
891
|
# are specific to the type of dataset used.
|
827
892
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
828
893
|
|
894
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
895
|
+
|
829
896
|
if isinstance(dataset, DataFrame):
|
830
897
|
self._deps = self._batch_inference_validate_snowpark(
|
831
898
|
dataset=dataset,
|
@@ -838,6 +905,9 @@ class Ridge(BaseTransformer):
|
|
838
905
|
drop_input_cols = self._drop_input_cols,
|
839
906
|
expected_output_cols_type="float",
|
840
907
|
)
|
908
|
+
expected_output_cols = self._align_expected_output_names(
|
909
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
910
|
+
)
|
841
911
|
|
842
912
|
elif isinstance(dataset, pd.DataFrame):
|
843
913
|
transform_kwargs = dict(
|
@@ -856,7 +926,7 @@ class Ridge(BaseTransformer):
|
|
856
926
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
857
927
|
inference_method=inference_method,
|
858
928
|
input_cols=self.input_cols,
|
859
|
-
expected_output_cols=
|
929
|
+
expected_output_cols=expected_output_cols,
|
860
930
|
**transform_kwargs
|
861
931
|
)
|
862
932
|
return output_df
|
@@ -1003,50 +1073,84 @@ class Ridge(BaseTransformer):
|
|
1003
1073
|
)
|
1004
1074
|
return output_df
|
1005
1075
|
|
1076
|
+
|
1077
|
+
|
1078
|
+
def to_sklearn(self) -> Any:
|
1079
|
+
"""Get sklearn.linear_model.Ridge object.
|
1080
|
+
"""
|
1081
|
+
if self._sklearn_object is None:
|
1082
|
+
self._sklearn_object = self._create_sklearn_object()
|
1083
|
+
return self._sklearn_object
|
1084
|
+
|
1085
|
+
def to_xgboost(self) -> Any:
|
1086
|
+
raise exceptions.SnowflakeMLException(
|
1087
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1088
|
+
original_exception=AttributeError(
|
1089
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1090
|
+
"to_xgboost()",
|
1091
|
+
"to_sklearn()"
|
1092
|
+
)
|
1093
|
+
),
|
1094
|
+
)
|
1095
|
+
|
1096
|
+
def to_lightgbm(self) -> Any:
|
1097
|
+
raise exceptions.SnowflakeMLException(
|
1098
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1099
|
+
original_exception=AttributeError(
|
1100
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1101
|
+
"to_lightgbm()",
|
1102
|
+
"to_sklearn()"
|
1103
|
+
)
|
1104
|
+
),
|
1105
|
+
)
|
1006
1106
|
|
1007
|
-
def
|
1107
|
+
def _get_dependencies(self) -> List[str]:
|
1108
|
+
return self._deps
|
1109
|
+
|
1110
|
+
|
1111
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1008
1112
|
self._model_signature_dict = dict()
|
1009
1113
|
|
1010
1114
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1011
1115
|
|
1012
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1116
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1013
1117
|
outputs: List[BaseFeatureSpec] = []
|
1014
1118
|
if hasattr(self, "predict"):
|
1015
1119
|
# keep mypy happy
|
1016
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1120
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1017
1121
|
# For classifier, the type of predict is the same as the type of label
|
1018
|
-
if self._sklearn_object._estimator_type ==
|
1019
|
-
|
1122
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1123
|
+
# label columns is the desired type for output
|
1020
1124
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1021
1125
|
# rename the output columns
|
1022
1126
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1023
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1024
|
-
|
1025
|
-
|
1127
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1128
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1129
|
+
)
|
1026
1130
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1027
1131
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1028
|
-
# Clusterer returns int64 cluster labels.
|
1132
|
+
# Clusterer returns int64 cluster labels.
|
1029
1133
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1030
1134
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1031
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1135
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1136
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1137
|
+
)
|
1138
|
+
|
1035
1139
|
# For regressor, the type of predict is float64
|
1036
|
-
elif self._sklearn_object._estimator_type ==
|
1140
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1037
1141
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1038
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1142
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1143
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1144
|
+
)
|
1145
|
+
|
1042
1146
|
for prob_func in PROB_FUNCTIONS:
|
1043
1147
|
if hasattr(self, prob_func):
|
1044
1148
|
output_cols_prefix: str = f"{prob_func}_"
|
1045
1149
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1046
1150
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1047
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1048
|
-
|
1049
|
-
|
1151
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1152
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1153
|
+
)
|
1050
1154
|
|
1051
1155
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1052
1156
|
items = list(self._model_signature_dict.items())
|
@@ -1059,10 +1163,10 @@ class Ridge(BaseTransformer):
|
|
1059
1163
|
"""Returns model signature of current class.
|
1060
1164
|
|
1061
1165
|
Raises:
|
1062
|
-
|
1166
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1063
1167
|
|
1064
1168
|
Returns:
|
1065
|
-
Dict
|
1169
|
+
Dict with each method and its input output signature
|
1066
1170
|
"""
|
1067
1171
|
if self._model_signature_dict is None:
|
1068
1172
|
raise exceptions.SnowflakeMLException(
|
@@ -1070,35 +1174,3 @@ class Ridge(BaseTransformer):
|
|
1070
1174
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1071
1175
|
)
|
1072
1176
|
return self._model_signature_dict
|
1073
|
-
|
1074
|
-
def to_sklearn(self) -> Any:
|
1075
|
-
"""Get sklearn.linear_model.Ridge object.
|
1076
|
-
"""
|
1077
|
-
if self._sklearn_object is None:
|
1078
|
-
self._sklearn_object = self._create_sklearn_object()
|
1079
|
-
return self._sklearn_object
|
1080
|
-
|
1081
|
-
def to_xgboost(self) -> Any:
|
1082
|
-
raise exceptions.SnowflakeMLException(
|
1083
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1084
|
-
original_exception=AttributeError(
|
1085
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1086
|
-
"to_xgboost()",
|
1087
|
-
"to_sklearn()"
|
1088
|
-
)
|
1089
|
-
),
|
1090
|
-
)
|
1091
|
-
|
1092
|
-
def to_lightgbm(self) -> Any:
|
1093
|
-
raise exceptions.SnowflakeMLException(
|
1094
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1095
|
-
original_exception=AttributeError(
|
1096
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1097
|
-
"to_lightgbm()",
|
1098
|
-
"to_sklearn()"
|
1099
|
-
)
|
1100
|
-
),
|
1101
|
-
)
|
1102
|
-
|
1103
|
-
def _get_dependencies(self) -> List[str]:
|
1104
|
-
return self._deps
|