snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -266,12 +265,7 @@ class LassoLarsIC(BaseTransformer):
|
|
266
265
|
)
|
267
266
|
return selected_cols
|
268
267
|
|
269
|
-
|
270
|
-
project=_PROJECT,
|
271
|
-
subproject=_SUBPROJECT,
|
272
|
-
custom_tags=dict([("autogen", True)]),
|
273
|
-
)
|
274
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "LassoLarsIC":
|
268
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "LassoLarsIC":
|
275
269
|
"""Fit the model using X, y as training data
|
276
270
|
For more details on this function, see [sklearn.linear_model.LassoLarsIC.fit]
|
277
271
|
(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsIC.html#sklearn.linear_model.LassoLarsIC.fit)
|
@@ -298,12 +292,14 @@ class LassoLarsIC(BaseTransformer):
|
|
298
292
|
|
299
293
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
300
294
|
|
301
|
-
|
295
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
302
296
|
if SNOWML_SPROC_ENV in os.environ:
|
303
297
|
statement_params = telemetry.get_function_usage_statement_params(
|
304
298
|
project=_PROJECT,
|
305
299
|
subproject=_SUBPROJECT,
|
306
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
300
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
301
|
+
inspect.currentframe(), LassoLarsIC.__class__.__name__
|
302
|
+
),
|
307
303
|
api_calls=[Session.call],
|
308
304
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
309
305
|
)
|
@@ -324,7 +320,7 @@ class LassoLarsIC(BaseTransformer):
|
|
324
320
|
)
|
325
321
|
self._sklearn_object = model_trainer.train()
|
326
322
|
self._is_fitted = True
|
327
|
-
self.
|
323
|
+
self._generate_model_signatures(dataset)
|
328
324
|
return self
|
329
325
|
|
330
326
|
def _batch_inference_validate_snowpark(
|
@@ -400,7 +396,9 @@ class LassoLarsIC(BaseTransformer):
|
|
400
396
|
# when it is classifier, infer the datatype from label columns
|
401
397
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
402
398
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
403
|
-
label_cols_signatures = [
|
399
|
+
label_cols_signatures = [
|
400
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
401
|
+
]
|
404
402
|
if len(label_cols_signatures) == 0:
|
405
403
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
406
404
|
raise exceptions.SnowflakeMLException(
|
@@ -408,25 +406,22 @@ class LassoLarsIC(BaseTransformer):
|
|
408
406
|
original_exception=ValueError(error_str),
|
409
407
|
)
|
410
408
|
|
411
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
412
|
-
label_cols_signatures[0].as_snowpark_type()
|
413
|
-
)
|
409
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
414
410
|
|
415
411
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
416
|
-
assert isinstance(
|
412
|
+
assert isinstance(
|
413
|
+
dataset._session, Session
|
414
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
417
415
|
|
418
416
|
transform_kwargs = dict(
|
419
|
-
session
|
420
|
-
dependencies
|
421
|
-
drop_input_cols
|
422
|
-
expected_output_cols_type
|
417
|
+
session=dataset._session,
|
418
|
+
dependencies=self._deps,
|
419
|
+
drop_input_cols=self._drop_input_cols,
|
420
|
+
expected_output_cols_type=expected_type_inferred,
|
423
421
|
)
|
424
422
|
|
425
423
|
elif isinstance(dataset, pd.DataFrame):
|
426
|
-
transform_kwargs = dict(
|
427
|
-
snowpark_input_cols = self._snowpark_cols,
|
428
|
-
drop_input_cols = self._drop_input_cols
|
429
|
-
)
|
424
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
430
425
|
|
431
426
|
transform_handlers = ModelTransformerBuilder.build(
|
432
427
|
dataset=dataset,
|
@@ -466,7 +461,7 @@ class LassoLarsIC(BaseTransformer):
|
|
466
461
|
Transformed dataset.
|
467
462
|
"""
|
468
463
|
super()._check_dataset_type(dataset)
|
469
|
-
inference_method="transform"
|
464
|
+
inference_method = "transform"
|
470
465
|
|
471
466
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
472
467
|
# are specific to the type of dataset used.
|
@@ -503,17 +498,14 @@ class LassoLarsIC(BaseTransformer):
|
|
503
498
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
504
499
|
|
505
500
|
transform_kwargs = dict(
|
506
|
-
session
|
507
|
-
dependencies
|
508
|
-
drop_input_cols
|
509
|
-
expected_output_cols_type
|
501
|
+
session=dataset._session,
|
502
|
+
dependencies=self._deps,
|
503
|
+
drop_input_cols=self._drop_input_cols,
|
504
|
+
expected_output_cols_type=expected_dtype,
|
510
505
|
)
|
511
506
|
|
512
507
|
elif isinstance(dataset, pd.DataFrame):
|
513
|
-
transform_kwargs = dict(
|
514
|
-
snowpark_input_cols = self._snowpark_cols,
|
515
|
-
drop_input_cols = self._drop_input_cols
|
516
|
-
)
|
508
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
517
509
|
|
518
510
|
transform_handlers = ModelTransformerBuilder.build(
|
519
511
|
dataset=dataset,
|
@@ -532,7 +524,11 @@ class LassoLarsIC(BaseTransformer):
|
|
532
524
|
return output_df
|
533
525
|
|
534
526
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
535
|
-
def fit_predict(
|
527
|
+
def fit_predict(
|
528
|
+
self,
|
529
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
530
|
+
output_cols_prefix: str = "fit_predict_",
|
531
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
536
532
|
""" Method not supported for this class.
|
537
533
|
|
538
534
|
|
@@ -557,7 +553,9 @@ class LassoLarsIC(BaseTransformer):
|
|
557
553
|
)
|
558
554
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
559
555
|
drop_input_cols=self._drop_input_cols,
|
560
|
-
expected_output_cols_list=
|
556
|
+
expected_output_cols_list=(
|
557
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
558
|
+
),
|
561
559
|
)
|
562
560
|
self._sklearn_object = fitted_estimator
|
563
561
|
self._is_fitted = True
|
@@ -574,6 +572,62 @@ class LassoLarsIC(BaseTransformer):
|
|
574
572
|
assert self._sklearn_object is not None
|
575
573
|
return self._sklearn_object.embedding_
|
576
574
|
|
575
|
+
|
576
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
577
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
578
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
579
|
+
"""
|
580
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
581
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
582
|
+
if output_cols:
|
583
|
+
output_cols = [
|
584
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
585
|
+
for c in output_cols
|
586
|
+
]
|
587
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
588
|
+
output_cols = [output_cols_prefix]
|
589
|
+
elif self._sklearn_object is not None:
|
590
|
+
classes = self._sklearn_object.classes_
|
591
|
+
if isinstance(classes, numpy.ndarray):
|
592
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
593
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
594
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
595
|
+
output_cols = []
|
596
|
+
for i, cl in enumerate(classes):
|
597
|
+
# For binary classification, there is only one output column for each class
|
598
|
+
# ndarray as the two classes are complementary.
|
599
|
+
if len(cl) == 2:
|
600
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
601
|
+
else:
|
602
|
+
output_cols.extend([
|
603
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
604
|
+
])
|
605
|
+
else:
|
606
|
+
output_cols = []
|
607
|
+
|
608
|
+
# Make sure column names are valid snowflake identifiers.
|
609
|
+
assert output_cols is not None # Make MyPy happy
|
610
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
611
|
+
|
612
|
+
return rv
|
613
|
+
|
614
|
+
def _align_expected_output_names(
|
615
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
616
|
+
) -> List[str]:
|
617
|
+
# in case the inferred output column names dimension is different
|
618
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
619
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
620
|
+
output_df_columns = list(output_df_pd.columns)
|
621
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
622
|
+
if self.sample_weight_col:
|
623
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
624
|
+
# if the dimension of inferred output column names is correct; use it
|
625
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
626
|
+
return expected_output_cols_list
|
627
|
+
# otherwise, use the sklearn estimator's output
|
628
|
+
else:
|
629
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
630
|
+
|
577
631
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
578
632
|
@telemetry.send_api_usage_telemetry(
|
579
633
|
project=_PROJECT,
|
@@ -604,24 +658,28 @@ class LassoLarsIC(BaseTransformer):
|
|
604
658
|
# are specific to the type of dataset used.
|
605
659
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
606
660
|
|
661
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
662
|
+
|
607
663
|
if isinstance(dataset, DataFrame):
|
608
664
|
self._deps = self._batch_inference_validate_snowpark(
|
609
665
|
dataset=dataset,
|
610
666
|
inference_method=inference_method,
|
611
667
|
)
|
612
|
-
assert isinstance(
|
668
|
+
assert isinstance(
|
669
|
+
dataset._session, Session
|
670
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
613
671
|
transform_kwargs = dict(
|
614
672
|
session=dataset._session,
|
615
673
|
dependencies=self._deps,
|
616
|
-
drop_input_cols
|
674
|
+
drop_input_cols=self._drop_input_cols,
|
617
675
|
expected_output_cols_type="float",
|
618
676
|
)
|
677
|
+
expected_output_cols = self._align_expected_output_names(
|
678
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
679
|
+
)
|
619
680
|
|
620
681
|
elif isinstance(dataset, pd.DataFrame):
|
621
|
-
transform_kwargs = dict(
|
622
|
-
snowpark_input_cols = self._snowpark_cols,
|
623
|
-
drop_input_cols = self._drop_input_cols
|
624
|
-
)
|
682
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
625
683
|
|
626
684
|
transform_handlers = ModelTransformerBuilder.build(
|
627
685
|
dataset=dataset,
|
@@ -633,7 +691,7 @@ class LassoLarsIC(BaseTransformer):
|
|
633
691
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
634
692
|
inference_method=inference_method,
|
635
693
|
input_cols=self.input_cols,
|
636
|
-
expected_output_cols=
|
694
|
+
expected_output_cols=expected_output_cols,
|
637
695
|
**transform_kwargs
|
638
696
|
)
|
639
697
|
return output_df
|
@@ -663,7 +721,8 @@ class LassoLarsIC(BaseTransformer):
|
|
663
721
|
Output dataset with log probability of the sample for each class in the model.
|
664
722
|
"""
|
665
723
|
super()._check_dataset_type(dataset)
|
666
|
-
inference_method="predict_log_proba"
|
724
|
+
inference_method = "predict_log_proba"
|
725
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
667
726
|
|
668
727
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
669
728
|
# are specific to the type of dataset used.
|
@@ -674,18 +733,20 @@ class LassoLarsIC(BaseTransformer):
|
|
674
733
|
dataset=dataset,
|
675
734
|
inference_method=inference_method,
|
676
735
|
)
|
677
|
-
assert isinstance(
|
736
|
+
assert isinstance(
|
737
|
+
dataset._session, Session
|
738
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
678
739
|
transform_kwargs = dict(
|
679
740
|
session=dataset._session,
|
680
741
|
dependencies=self._deps,
|
681
|
-
drop_input_cols
|
742
|
+
drop_input_cols=self._drop_input_cols,
|
682
743
|
expected_output_cols_type="float",
|
683
744
|
)
|
745
|
+
expected_output_cols = self._align_expected_output_names(
|
746
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
747
|
+
)
|
684
748
|
elif isinstance(dataset, pd.DataFrame):
|
685
|
-
transform_kwargs = dict(
|
686
|
-
snowpark_input_cols = self._snowpark_cols,
|
687
|
-
drop_input_cols = self._drop_input_cols
|
688
|
-
)
|
749
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
689
750
|
|
690
751
|
transform_handlers = ModelTransformerBuilder.build(
|
691
752
|
dataset=dataset,
|
@@ -698,7 +759,7 @@ class LassoLarsIC(BaseTransformer):
|
|
698
759
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
699
760
|
inference_method=inference_method,
|
700
761
|
input_cols=self.input_cols,
|
701
|
-
expected_output_cols=
|
762
|
+
expected_output_cols=expected_output_cols,
|
702
763
|
**transform_kwargs
|
703
764
|
)
|
704
765
|
return output_df
|
@@ -724,30 +785,34 @@ class LassoLarsIC(BaseTransformer):
|
|
724
785
|
Output dataset with results of the decision function for the samples in input dataset.
|
725
786
|
"""
|
726
787
|
super()._check_dataset_type(dataset)
|
727
|
-
inference_method="decision_function"
|
788
|
+
inference_method = "decision_function"
|
728
789
|
|
729
790
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
730
791
|
# are specific to the type of dataset used.
|
731
792
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
732
793
|
|
794
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
795
|
+
|
733
796
|
if isinstance(dataset, DataFrame):
|
734
797
|
self._deps = self._batch_inference_validate_snowpark(
|
735
798
|
dataset=dataset,
|
736
799
|
inference_method=inference_method,
|
737
800
|
)
|
738
|
-
assert isinstance(
|
801
|
+
assert isinstance(
|
802
|
+
dataset._session, Session
|
803
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
739
804
|
transform_kwargs = dict(
|
740
805
|
session=dataset._session,
|
741
806
|
dependencies=self._deps,
|
742
|
-
drop_input_cols
|
807
|
+
drop_input_cols=self._drop_input_cols,
|
743
808
|
expected_output_cols_type="float",
|
744
809
|
)
|
810
|
+
expected_output_cols = self._align_expected_output_names(
|
811
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
812
|
+
)
|
745
813
|
|
746
814
|
elif isinstance(dataset, pd.DataFrame):
|
747
|
-
transform_kwargs = dict(
|
748
|
-
snowpark_input_cols = self._snowpark_cols,
|
749
|
-
drop_input_cols = self._drop_input_cols
|
750
|
-
)
|
815
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
751
816
|
|
752
817
|
transform_handlers = ModelTransformerBuilder.build(
|
753
818
|
dataset=dataset,
|
@@ -760,7 +825,7 @@ class LassoLarsIC(BaseTransformer):
|
|
760
825
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
761
826
|
inference_method=inference_method,
|
762
827
|
input_cols=self.input_cols,
|
763
|
-
expected_output_cols=
|
828
|
+
expected_output_cols=expected_output_cols,
|
764
829
|
**transform_kwargs
|
765
830
|
)
|
766
831
|
return output_df
|
@@ -789,12 +854,14 @@ class LassoLarsIC(BaseTransformer):
|
|
789
854
|
Output dataset with probability of the sample for each class in the model.
|
790
855
|
"""
|
791
856
|
super()._check_dataset_type(dataset)
|
792
|
-
inference_method="score_samples"
|
857
|
+
inference_method = "score_samples"
|
793
858
|
|
794
859
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
795
860
|
# are specific to the type of dataset used.
|
796
861
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
797
862
|
|
863
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
864
|
+
|
798
865
|
if isinstance(dataset, DataFrame):
|
799
866
|
self._deps = self._batch_inference_validate_snowpark(
|
800
867
|
dataset=dataset,
|
@@ -807,6 +874,9 @@ class LassoLarsIC(BaseTransformer):
|
|
807
874
|
drop_input_cols = self._drop_input_cols,
|
808
875
|
expected_output_cols_type="float",
|
809
876
|
)
|
877
|
+
expected_output_cols = self._align_expected_output_names(
|
878
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
879
|
+
)
|
810
880
|
|
811
881
|
elif isinstance(dataset, pd.DataFrame):
|
812
882
|
transform_kwargs = dict(
|
@@ -825,7 +895,7 @@ class LassoLarsIC(BaseTransformer):
|
|
825
895
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
826
896
|
inference_method=inference_method,
|
827
897
|
input_cols=self.input_cols,
|
828
|
-
expected_output_cols=
|
898
|
+
expected_output_cols=expected_output_cols,
|
829
899
|
**transform_kwargs
|
830
900
|
)
|
831
901
|
return output_df
|
@@ -972,50 +1042,84 @@ class LassoLarsIC(BaseTransformer):
|
|
972
1042
|
)
|
973
1043
|
return output_df
|
974
1044
|
|
1045
|
+
|
1046
|
+
|
1047
|
+
def to_sklearn(self) -> Any:
|
1048
|
+
"""Get sklearn.linear_model.LassoLarsIC object.
|
1049
|
+
"""
|
1050
|
+
if self._sklearn_object is None:
|
1051
|
+
self._sklearn_object = self._create_sklearn_object()
|
1052
|
+
return self._sklearn_object
|
1053
|
+
|
1054
|
+
def to_xgboost(self) -> Any:
|
1055
|
+
raise exceptions.SnowflakeMLException(
|
1056
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1057
|
+
original_exception=AttributeError(
|
1058
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1059
|
+
"to_xgboost()",
|
1060
|
+
"to_sklearn()"
|
1061
|
+
)
|
1062
|
+
),
|
1063
|
+
)
|
1064
|
+
|
1065
|
+
def to_lightgbm(self) -> Any:
|
1066
|
+
raise exceptions.SnowflakeMLException(
|
1067
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1068
|
+
original_exception=AttributeError(
|
1069
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1070
|
+
"to_lightgbm()",
|
1071
|
+
"to_sklearn()"
|
1072
|
+
)
|
1073
|
+
),
|
1074
|
+
)
|
975
1075
|
|
976
|
-
def
|
1076
|
+
def _get_dependencies(self) -> List[str]:
|
1077
|
+
return self._deps
|
1078
|
+
|
1079
|
+
|
1080
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
977
1081
|
self._model_signature_dict = dict()
|
978
1082
|
|
979
1083
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
980
1084
|
|
981
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1085
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
982
1086
|
outputs: List[BaseFeatureSpec] = []
|
983
1087
|
if hasattr(self, "predict"):
|
984
1088
|
# keep mypy happy
|
985
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1089
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
986
1090
|
# For classifier, the type of predict is the same as the type of label
|
987
|
-
if self._sklearn_object._estimator_type ==
|
988
|
-
|
1091
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1092
|
+
# label columns is the desired type for output
|
989
1093
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
990
1094
|
# rename the output columns
|
991
1095
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
992
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
993
|
-
|
994
|
-
|
1096
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1097
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1098
|
+
)
|
995
1099
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
996
1100
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
997
|
-
# Clusterer returns int64 cluster labels.
|
1101
|
+
# Clusterer returns int64 cluster labels.
|
998
1102
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
999
1103
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1000
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1104
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1105
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1106
|
+
)
|
1107
|
+
|
1004
1108
|
# For regressor, the type of predict is float64
|
1005
|
-
elif self._sklearn_object._estimator_type ==
|
1109
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1006
1110
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1007
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1111
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1112
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1113
|
+
)
|
1114
|
+
|
1011
1115
|
for prob_func in PROB_FUNCTIONS:
|
1012
1116
|
if hasattr(self, prob_func):
|
1013
1117
|
output_cols_prefix: str = f"{prob_func}_"
|
1014
1118
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1015
1119
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1016
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1017
|
-
|
1018
|
-
|
1120
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1121
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1122
|
+
)
|
1019
1123
|
|
1020
1124
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1021
1125
|
items = list(self._model_signature_dict.items())
|
@@ -1028,10 +1132,10 @@ class LassoLarsIC(BaseTransformer):
|
|
1028
1132
|
"""Returns model signature of current class.
|
1029
1133
|
|
1030
1134
|
Raises:
|
1031
|
-
|
1135
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1032
1136
|
|
1033
1137
|
Returns:
|
1034
|
-
Dict
|
1138
|
+
Dict with each method and its input output signature
|
1035
1139
|
"""
|
1036
1140
|
if self._model_signature_dict is None:
|
1037
1141
|
raise exceptions.SnowflakeMLException(
|
@@ -1039,35 +1143,3 @@ class LassoLarsIC(BaseTransformer):
|
|
1039
1143
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1040
1144
|
)
|
1041
1145
|
return self._model_signature_dict
|
1042
|
-
|
1043
|
-
def to_sklearn(self) -> Any:
|
1044
|
-
"""Get sklearn.linear_model.LassoLarsIC object.
|
1045
|
-
"""
|
1046
|
-
if self._sklearn_object is None:
|
1047
|
-
self._sklearn_object = self._create_sklearn_object()
|
1048
|
-
return self._sklearn_object
|
1049
|
-
|
1050
|
-
def to_xgboost(self) -> Any:
|
1051
|
-
raise exceptions.SnowflakeMLException(
|
1052
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1053
|
-
original_exception=AttributeError(
|
1054
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1055
|
-
"to_xgboost()",
|
1056
|
-
"to_sklearn()"
|
1057
|
-
)
|
1058
|
-
),
|
1059
|
-
)
|
1060
|
-
|
1061
|
-
def to_lightgbm(self) -> Any:
|
1062
|
-
raise exceptions.SnowflakeMLException(
|
1063
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1064
|
-
original_exception=AttributeError(
|
1065
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1066
|
-
"to_lightgbm()",
|
1067
|
-
"to_sklearn()"
|
1068
|
-
)
|
1069
|
-
),
|
1070
|
-
)
|
1071
|
-
|
1072
|
-
def _get_dependencies(self) -> List[str]:
|
1073
|
-
return self._deps
|