snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -262,12 +261,7 @@ class Lasso(BaseTransformer):
|
|
262
261
|
)
|
263
262
|
return selected_cols
|
264
263
|
|
265
|
-
|
266
|
-
project=_PROJECT,
|
267
|
-
subproject=_SUBPROJECT,
|
268
|
-
custom_tags=dict([("autogen", True)]),
|
269
|
-
)
|
270
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Lasso":
|
264
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "Lasso":
|
271
265
|
"""Fit model with coordinate descent
|
272
266
|
For more details on this function, see [sklearn.linear_model.Lasso.fit]
|
273
267
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso.fit)
|
@@ -294,12 +288,14 @@ class Lasso(BaseTransformer):
|
|
294
288
|
|
295
289
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
296
290
|
|
297
|
-
|
291
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
298
292
|
if SNOWML_SPROC_ENV in os.environ:
|
299
293
|
statement_params = telemetry.get_function_usage_statement_params(
|
300
294
|
project=_PROJECT,
|
301
295
|
subproject=_SUBPROJECT,
|
302
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
296
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
297
|
+
inspect.currentframe(), Lasso.__class__.__name__
|
298
|
+
),
|
303
299
|
api_calls=[Session.call],
|
304
300
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
305
301
|
)
|
@@ -320,7 +316,7 @@ class Lasso(BaseTransformer):
|
|
320
316
|
)
|
321
317
|
self._sklearn_object = model_trainer.train()
|
322
318
|
self._is_fitted = True
|
323
|
-
self.
|
319
|
+
self._generate_model_signatures(dataset)
|
324
320
|
return self
|
325
321
|
|
326
322
|
def _batch_inference_validate_snowpark(
|
@@ -396,7 +392,9 @@ class Lasso(BaseTransformer):
|
|
396
392
|
# when it is classifier, infer the datatype from label columns
|
397
393
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
398
394
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
399
|
-
label_cols_signatures = [
|
395
|
+
label_cols_signatures = [
|
396
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
397
|
+
]
|
400
398
|
if len(label_cols_signatures) == 0:
|
401
399
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
402
400
|
raise exceptions.SnowflakeMLException(
|
@@ -404,25 +402,22 @@ class Lasso(BaseTransformer):
|
|
404
402
|
original_exception=ValueError(error_str),
|
405
403
|
)
|
406
404
|
|
407
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
408
|
-
label_cols_signatures[0].as_snowpark_type()
|
409
|
-
)
|
405
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
410
406
|
|
411
407
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
412
|
-
assert isinstance(
|
408
|
+
assert isinstance(
|
409
|
+
dataset._session, Session
|
410
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
413
411
|
|
414
412
|
transform_kwargs = dict(
|
415
|
-
session
|
416
|
-
dependencies
|
417
|
-
drop_input_cols
|
418
|
-
expected_output_cols_type
|
413
|
+
session=dataset._session,
|
414
|
+
dependencies=self._deps,
|
415
|
+
drop_input_cols=self._drop_input_cols,
|
416
|
+
expected_output_cols_type=expected_type_inferred,
|
419
417
|
)
|
420
418
|
|
421
419
|
elif isinstance(dataset, pd.DataFrame):
|
422
|
-
transform_kwargs = dict(
|
423
|
-
snowpark_input_cols = self._snowpark_cols,
|
424
|
-
drop_input_cols = self._drop_input_cols
|
425
|
-
)
|
420
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
426
421
|
|
427
422
|
transform_handlers = ModelTransformerBuilder.build(
|
428
423
|
dataset=dataset,
|
@@ -462,7 +457,7 @@ class Lasso(BaseTransformer):
|
|
462
457
|
Transformed dataset.
|
463
458
|
"""
|
464
459
|
super()._check_dataset_type(dataset)
|
465
|
-
inference_method="transform"
|
460
|
+
inference_method = "transform"
|
466
461
|
|
467
462
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
468
463
|
# are specific to the type of dataset used.
|
@@ -499,17 +494,14 @@ class Lasso(BaseTransformer):
|
|
499
494
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
500
495
|
|
501
496
|
transform_kwargs = dict(
|
502
|
-
session
|
503
|
-
dependencies
|
504
|
-
drop_input_cols
|
505
|
-
expected_output_cols_type
|
497
|
+
session=dataset._session,
|
498
|
+
dependencies=self._deps,
|
499
|
+
drop_input_cols=self._drop_input_cols,
|
500
|
+
expected_output_cols_type=expected_dtype,
|
506
501
|
)
|
507
502
|
|
508
503
|
elif isinstance(dataset, pd.DataFrame):
|
509
|
-
transform_kwargs = dict(
|
510
|
-
snowpark_input_cols = self._snowpark_cols,
|
511
|
-
drop_input_cols = self._drop_input_cols
|
512
|
-
)
|
504
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
513
505
|
|
514
506
|
transform_handlers = ModelTransformerBuilder.build(
|
515
507
|
dataset=dataset,
|
@@ -528,7 +520,11 @@ class Lasso(BaseTransformer):
|
|
528
520
|
return output_df
|
529
521
|
|
530
522
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
531
|
-
def fit_predict(
|
523
|
+
def fit_predict(
|
524
|
+
self,
|
525
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
526
|
+
output_cols_prefix: str = "fit_predict_",
|
527
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
532
528
|
""" Method not supported for this class.
|
533
529
|
|
534
530
|
|
@@ -553,7 +549,9 @@ class Lasso(BaseTransformer):
|
|
553
549
|
)
|
554
550
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
555
551
|
drop_input_cols=self._drop_input_cols,
|
556
|
-
expected_output_cols_list=
|
552
|
+
expected_output_cols_list=(
|
553
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
554
|
+
),
|
557
555
|
)
|
558
556
|
self._sklearn_object = fitted_estimator
|
559
557
|
self._is_fitted = True
|
@@ -570,6 +568,62 @@ class Lasso(BaseTransformer):
|
|
570
568
|
assert self._sklearn_object is not None
|
571
569
|
return self._sklearn_object.embedding_
|
572
570
|
|
571
|
+
|
572
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
573
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
574
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
575
|
+
"""
|
576
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
577
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
578
|
+
if output_cols:
|
579
|
+
output_cols = [
|
580
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
581
|
+
for c in output_cols
|
582
|
+
]
|
583
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
584
|
+
output_cols = [output_cols_prefix]
|
585
|
+
elif self._sklearn_object is not None:
|
586
|
+
classes = self._sklearn_object.classes_
|
587
|
+
if isinstance(classes, numpy.ndarray):
|
588
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
589
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
590
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
591
|
+
output_cols = []
|
592
|
+
for i, cl in enumerate(classes):
|
593
|
+
# For binary classification, there is only one output column for each class
|
594
|
+
# ndarray as the two classes are complementary.
|
595
|
+
if len(cl) == 2:
|
596
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
597
|
+
else:
|
598
|
+
output_cols.extend([
|
599
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
600
|
+
])
|
601
|
+
else:
|
602
|
+
output_cols = []
|
603
|
+
|
604
|
+
# Make sure column names are valid snowflake identifiers.
|
605
|
+
assert output_cols is not None # Make MyPy happy
|
606
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
607
|
+
|
608
|
+
return rv
|
609
|
+
|
610
|
+
def _align_expected_output_names(
|
611
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
612
|
+
) -> List[str]:
|
613
|
+
# in case the inferred output column names dimension is different
|
614
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
615
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
616
|
+
output_df_columns = list(output_df_pd.columns)
|
617
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
618
|
+
if self.sample_weight_col:
|
619
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
620
|
+
# if the dimension of inferred output column names is correct; use it
|
621
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
622
|
+
return expected_output_cols_list
|
623
|
+
# otherwise, use the sklearn estimator's output
|
624
|
+
else:
|
625
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
626
|
+
|
573
627
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
574
628
|
@telemetry.send_api_usage_telemetry(
|
575
629
|
project=_PROJECT,
|
@@ -600,24 +654,28 @@ class Lasso(BaseTransformer):
|
|
600
654
|
# are specific to the type of dataset used.
|
601
655
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
602
656
|
|
657
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
658
|
+
|
603
659
|
if isinstance(dataset, DataFrame):
|
604
660
|
self._deps = self._batch_inference_validate_snowpark(
|
605
661
|
dataset=dataset,
|
606
662
|
inference_method=inference_method,
|
607
663
|
)
|
608
|
-
assert isinstance(
|
664
|
+
assert isinstance(
|
665
|
+
dataset._session, Session
|
666
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
609
667
|
transform_kwargs = dict(
|
610
668
|
session=dataset._session,
|
611
669
|
dependencies=self._deps,
|
612
|
-
drop_input_cols
|
670
|
+
drop_input_cols=self._drop_input_cols,
|
613
671
|
expected_output_cols_type="float",
|
614
672
|
)
|
673
|
+
expected_output_cols = self._align_expected_output_names(
|
674
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
675
|
+
)
|
615
676
|
|
616
677
|
elif isinstance(dataset, pd.DataFrame):
|
617
|
-
transform_kwargs = dict(
|
618
|
-
snowpark_input_cols = self._snowpark_cols,
|
619
|
-
drop_input_cols = self._drop_input_cols
|
620
|
-
)
|
678
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
621
679
|
|
622
680
|
transform_handlers = ModelTransformerBuilder.build(
|
623
681
|
dataset=dataset,
|
@@ -629,7 +687,7 @@ class Lasso(BaseTransformer):
|
|
629
687
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
630
688
|
inference_method=inference_method,
|
631
689
|
input_cols=self.input_cols,
|
632
|
-
expected_output_cols=
|
690
|
+
expected_output_cols=expected_output_cols,
|
633
691
|
**transform_kwargs
|
634
692
|
)
|
635
693
|
return output_df
|
@@ -659,7 +717,8 @@ class Lasso(BaseTransformer):
|
|
659
717
|
Output dataset with log probability of the sample for each class in the model.
|
660
718
|
"""
|
661
719
|
super()._check_dataset_type(dataset)
|
662
|
-
inference_method="predict_log_proba"
|
720
|
+
inference_method = "predict_log_proba"
|
721
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
663
722
|
|
664
723
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
665
724
|
# are specific to the type of dataset used.
|
@@ -670,18 +729,20 @@ class Lasso(BaseTransformer):
|
|
670
729
|
dataset=dataset,
|
671
730
|
inference_method=inference_method,
|
672
731
|
)
|
673
|
-
assert isinstance(
|
732
|
+
assert isinstance(
|
733
|
+
dataset._session, Session
|
734
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
674
735
|
transform_kwargs = dict(
|
675
736
|
session=dataset._session,
|
676
737
|
dependencies=self._deps,
|
677
|
-
drop_input_cols
|
738
|
+
drop_input_cols=self._drop_input_cols,
|
678
739
|
expected_output_cols_type="float",
|
679
740
|
)
|
741
|
+
expected_output_cols = self._align_expected_output_names(
|
742
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
743
|
+
)
|
680
744
|
elif isinstance(dataset, pd.DataFrame):
|
681
|
-
transform_kwargs = dict(
|
682
|
-
snowpark_input_cols = self._snowpark_cols,
|
683
|
-
drop_input_cols = self._drop_input_cols
|
684
|
-
)
|
745
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
685
746
|
|
686
747
|
transform_handlers = ModelTransformerBuilder.build(
|
687
748
|
dataset=dataset,
|
@@ -694,7 +755,7 @@ class Lasso(BaseTransformer):
|
|
694
755
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
695
756
|
inference_method=inference_method,
|
696
757
|
input_cols=self.input_cols,
|
697
|
-
expected_output_cols=
|
758
|
+
expected_output_cols=expected_output_cols,
|
698
759
|
**transform_kwargs
|
699
760
|
)
|
700
761
|
return output_df
|
@@ -720,30 +781,34 @@ class Lasso(BaseTransformer):
|
|
720
781
|
Output dataset with results of the decision function for the samples in input dataset.
|
721
782
|
"""
|
722
783
|
super()._check_dataset_type(dataset)
|
723
|
-
inference_method="decision_function"
|
784
|
+
inference_method = "decision_function"
|
724
785
|
|
725
786
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
726
787
|
# are specific to the type of dataset used.
|
727
788
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
728
789
|
|
790
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
791
|
+
|
729
792
|
if isinstance(dataset, DataFrame):
|
730
793
|
self._deps = self._batch_inference_validate_snowpark(
|
731
794
|
dataset=dataset,
|
732
795
|
inference_method=inference_method,
|
733
796
|
)
|
734
|
-
assert isinstance(
|
797
|
+
assert isinstance(
|
798
|
+
dataset._session, Session
|
799
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
735
800
|
transform_kwargs = dict(
|
736
801
|
session=dataset._session,
|
737
802
|
dependencies=self._deps,
|
738
|
-
drop_input_cols
|
803
|
+
drop_input_cols=self._drop_input_cols,
|
739
804
|
expected_output_cols_type="float",
|
740
805
|
)
|
806
|
+
expected_output_cols = self._align_expected_output_names(
|
807
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
808
|
+
)
|
741
809
|
|
742
810
|
elif isinstance(dataset, pd.DataFrame):
|
743
|
-
transform_kwargs = dict(
|
744
|
-
snowpark_input_cols = self._snowpark_cols,
|
745
|
-
drop_input_cols = self._drop_input_cols
|
746
|
-
)
|
811
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
747
812
|
|
748
813
|
transform_handlers = ModelTransformerBuilder.build(
|
749
814
|
dataset=dataset,
|
@@ -756,7 +821,7 @@ class Lasso(BaseTransformer):
|
|
756
821
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
757
822
|
inference_method=inference_method,
|
758
823
|
input_cols=self.input_cols,
|
759
|
-
expected_output_cols=
|
824
|
+
expected_output_cols=expected_output_cols,
|
760
825
|
**transform_kwargs
|
761
826
|
)
|
762
827
|
return output_df
|
@@ -785,12 +850,14 @@ class Lasso(BaseTransformer):
|
|
785
850
|
Output dataset with probability of the sample for each class in the model.
|
786
851
|
"""
|
787
852
|
super()._check_dataset_type(dataset)
|
788
|
-
inference_method="score_samples"
|
853
|
+
inference_method = "score_samples"
|
789
854
|
|
790
855
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
791
856
|
# are specific to the type of dataset used.
|
792
857
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
793
858
|
|
859
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
860
|
+
|
794
861
|
if isinstance(dataset, DataFrame):
|
795
862
|
self._deps = self._batch_inference_validate_snowpark(
|
796
863
|
dataset=dataset,
|
@@ -803,6 +870,9 @@ class Lasso(BaseTransformer):
|
|
803
870
|
drop_input_cols = self._drop_input_cols,
|
804
871
|
expected_output_cols_type="float",
|
805
872
|
)
|
873
|
+
expected_output_cols = self._align_expected_output_names(
|
874
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
875
|
+
)
|
806
876
|
|
807
877
|
elif isinstance(dataset, pd.DataFrame):
|
808
878
|
transform_kwargs = dict(
|
@@ -821,7 +891,7 @@ class Lasso(BaseTransformer):
|
|
821
891
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
822
892
|
inference_method=inference_method,
|
823
893
|
input_cols=self.input_cols,
|
824
|
-
expected_output_cols=
|
894
|
+
expected_output_cols=expected_output_cols,
|
825
895
|
**transform_kwargs
|
826
896
|
)
|
827
897
|
return output_df
|
@@ -968,50 +1038,84 @@ class Lasso(BaseTransformer):
|
|
968
1038
|
)
|
969
1039
|
return output_df
|
970
1040
|
|
1041
|
+
|
1042
|
+
|
1043
|
+
def to_sklearn(self) -> Any:
|
1044
|
+
"""Get sklearn.linear_model.Lasso object.
|
1045
|
+
"""
|
1046
|
+
if self._sklearn_object is None:
|
1047
|
+
self._sklearn_object = self._create_sklearn_object()
|
1048
|
+
return self._sklearn_object
|
1049
|
+
|
1050
|
+
def to_xgboost(self) -> Any:
|
1051
|
+
raise exceptions.SnowflakeMLException(
|
1052
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1053
|
+
original_exception=AttributeError(
|
1054
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1055
|
+
"to_xgboost()",
|
1056
|
+
"to_sklearn()"
|
1057
|
+
)
|
1058
|
+
),
|
1059
|
+
)
|
1060
|
+
|
1061
|
+
def to_lightgbm(self) -> Any:
|
1062
|
+
raise exceptions.SnowflakeMLException(
|
1063
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1064
|
+
original_exception=AttributeError(
|
1065
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1066
|
+
"to_lightgbm()",
|
1067
|
+
"to_sklearn()"
|
1068
|
+
)
|
1069
|
+
),
|
1070
|
+
)
|
971
1071
|
|
972
|
-
def
|
1072
|
+
def _get_dependencies(self) -> List[str]:
|
1073
|
+
return self._deps
|
1074
|
+
|
1075
|
+
|
1076
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
973
1077
|
self._model_signature_dict = dict()
|
974
1078
|
|
975
1079
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
976
1080
|
|
977
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1081
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
978
1082
|
outputs: List[BaseFeatureSpec] = []
|
979
1083
|
if hasattr(self, "predict"):
|
980
1084
|
# keep mypy happy
|
981
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1085
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
982
1086
|
# For classifier, the type of predict is the same as the type of label
|
983
|
-
if self._sklearn_object._estimator_type ==
|
984
|
-
|
1087
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1088
|
+
# label columns is the desired type for output
|
985
1089
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
986
1090
|
# rename the output columns
|
987
1091
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
988
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
989
|
-
|
990
|
-
|
1092
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1093
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1094
|
+
)
|
991
1095
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
992
1096
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
993
|
-
# Clusterer returns int64 cluster labels.
|
1097
|
+
# Clusterer returns int64 cluster labels.
|
994
1098
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
995
1099
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
996
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
997
|
-
|
998
|
-
|
999
|
-
|
1100
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1101
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1102
|
+
)
|
1103
|
+
|
1000
1104
|
# For regressor, the type of predict is float64
|
1001
|
-
elif self._sklearn_object._estimator_type ==
|
1105
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1002
1106
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1003
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1107
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1108
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1109
|
+
)
|
1110
|
+
|
1007
1111
|
for prob_func in PROB_FUNCTIONS:
|
1008
1112
|
if hasattr(self, prob_func):
|
1009
1113
|
output_cols_prefix: str = f"{prob_func}_"
|
1010
1114
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1011
1115
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1012
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1013
|
-
|
1014
|
-
|
1116
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1117
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1118
|
+
)
|
1015
1119
|
|
1016
1120
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1017
1121
|
items = list(self._model_signature_dict.items())
|
@@ -1024,10 +1128,10 @@ class Lasso(BaseTransformer):
|
|
1024
1128
|
"""Returns model signature of current class.
|
1025
1129
|
|
1026
1130
|
Raises:
|
1027
|
-
|
1131
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1028
1132
|
|
1029
1133
|
Returns:
|
1030
|
-
Dict
|
1134
|
+
Dict with each method and its input output signature
|
1031
1135
|
"""
|
1032
1136
|
if self._model_signature_dict is None:
|
1033
1137
|
raise exceptions.SnowflakeMLException(
|
@@ -1035,35 +1139,3 @@ class Lasso(BaseTransformer):
|
|
1035
1139
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1036
1140
|
)
|
1037
1141
|
return self._model_signature_dict
|
1038
|
-
|
1039
|
-
def to_sklearn(self) -> Any:
|
1040
|
-
"""Get sklearn.linear_model.Lasso object.
|
1041
|
-
"""
|
1042
|
-
if self._sklearn_object is None:
|
1043
|
-
self._sklearn_object = self._create_sklearn_object()
|
1044
|
-
return self._sklearn_object
|
1045
|
-
|
1046
|
-
def to_xgboost(self) -> Any:
|
1047
|
-
raise exceptions.SnowflakeMLException(
|
1048
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1049
|
-
original_exception=AttributeError(
|
1050
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1051
|
-
"to_xgboost()",
|
1052
|
-
"to_sklearn()"
|
1053
|
-
)
|
1054
|
-
),
|
1055
|
-
)
|
1056
|
-
|
1057
|
-
def to_lightgbm(self) -> Any:
|
1058
|
-
raise exceptions.SnowflakeMLException(
|
1059
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1060
|
-
original_exception=AttributeError(
|
1061
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1062
|
-
"to_lightgbm()",
|
1063
|
-
"to_sklearn()"
|
1064
|
-
)
|
1065
|
-
),
|
1066
|
-
)
|
1067
|
-
|
1068
|
-
def _get_dependencies(self) -> List[str]:
|
1069
|
-
return self._deps
|