snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -275,12 +274,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
275
274
|
)
|
276
275
|
return selected_cols
|
277
276
|
|
278
|
-
|
279
|
-
project=_PROJECT,
|
280
|
-
subproject=_SUBPROJECT,
|
281
|
-
custom_tags=dict([("autogen", True)]),
|
282
|
-
)
|
283
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "KNeighborsRegressor":
|
277
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "KNeighborsRegressor":
|
284
278
|
"""Fit the k-nearest neighbors regressor from the training dataset
|
285
279
|
For more details on this function, see [sklearn.neighbors.KNeighborsRegressor.fit]
|
286
280
|
(https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html#sklearn.neighbors.KNeighborsRegressor.fit)
|
@@ -307,12 +301,14 @@ class KNeighborsRegressor(BaseTransformer):
|
|
307
301
|
|
308
302
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
309
303
|
|
310
|
-
|
304
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
311
305
|
if SNOWML_SPROC_ENV in os.environ:
|
312
306
|
statement_params = telemetry.get_function_usage_statement_params(
|
313
307
|
project=_PROJECT,
|
314
308
|
subproject=_SUBPROJECT,
|
315
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
309
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
310
|
+
inspect.currentframe(), KNeighborsRegressor.__class__.__name__
|
311
|
+
),
|
316
312
|
api_calls=[Session.call],
|
317
313
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
318
314
|
)
|
@@ -333,7 +329,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
333
329
|
)
|
334
330
|
self._sklearn_object = model_trainer.train()
|
335
331
|
self._is_fitted = True
|
336
|
-
self.
|
332
|
+
self._generate_model_signatures(dataset)
|
337
333
|
return self
|
338
334
|
|
339
335
|
def _batch_inference_validate_snowpark(
|
@@ -409,7 +405,9 @@ class KNeighborsRegressor(BaseTransformer):
|
|
409
405
|
# when it is classifier, infer the datatype from label columns
|
410
406
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
411
407
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
412
|
-
label_cols_signatures = [
|
408
|
+
label_cols_signatures = [
|
409
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
410
|
+
]
|
413
411
|
if len(label_cols_signatures) == 0:
|
414
412
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
415
413
|
raise exceptions.SnowflakeMLException(
|
@@ -417,25 +415,22 @@ class KNeighborsRegressor(BaseTransformer):
|
|
417
415
|
original_exception=ValueError(error_str),
|
418
416
|
)
|
419
417
|
|
420
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
421
|
-
label_cols_signatures[0].as_snowpark_type()
|
422
|
-
)
|
418
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
423
419
|
|
424
420
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
425
|
-
assert isinstance(
|
421
|
+
assert isinstance(
|
422
|
+
dataset._session, Session
|
423
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
426
424
|
|
427
425
|
transform_kwargs = dict(
|
428
|
-
session
|
429
|
-
dependencies
|
430
|
-
drop_input_cols
|
431
|
-
expected_output_cols_type
|
426
|
+
session=dataset._session,
|
427
|
+
dependencies=self._deps,
|
428
|
+
drop_input_cols=self._drop_input_cols,
|
429
|
+
expected_output_cols_type=expected_type_inferred,
|
432
430
|
)
|
433
431
|
|
434
432
|
elif isinstance(dataset, pd.DataFrame):
|
435
|
-
transform_kwargs = dict(
|
436
|
-
snowpark_input_cols = self._snowpark_cols,
|
437
|
-
drop_input_cols = self._drop_input_cols
|
438
|
-
)
|
433
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
439
434
|
|
440
435
|
transform_handlers = ModelTransformerBuilder.build(
|
441
436
|
dataset=dataset,
|
@@ -475,7 +470,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
475
470
|
Transformed dataset.
|
476
471
|
"""
|
477
472
|
super()._check_dataset_type(dataset)
|
478
|
-
inference_method="transform"
|
473
|
+
inference_method = "transform"
|
479
474
|
|
480
475
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
481
476
|
# are specific to the type of dataset used.
|
@@ -512,17 +507,14 @@ class KNeighborsRegressor(BaseTransformer):
|
|
512
507
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
513
508
|
|
514
509
|
transform_kwargs = dict(
|
515
|
-
session
|
516
|
-
dependencies
|
517
|
-
drop_input_cols
|
518
|
-
expected_output_cols_type
|
510
|
+
session=dataset._session,
|
511
|
+
dependencies=self._deps,
|
512
|
+
drop_input_cols=self._drop_input_cols,
|
513
|
+
expected_output_cols_type=expected_dtype,
|
519
514
|
)
|
520
515
|
|
521
516
|
elif isinstance(dataset, pd.DataFrame):
|
522
|
-
transform_kwargs = dict(
|
523
|
-
snowpark_input_cols = self._snowpark_cols,
|
524
|
-
drop_input_cols = self._drop_input_cols
|
525
|
-
)
|
517
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
526
518
|
|
527
519
|
transform_handlers = ModelTransformerBuilder.build(
|
528
520
|
dataset=dataset,
|
@@ -541,7 +533,11 @@ class KNeighborsRegressor(BaseTransformer):
|
|
541
533
|
return output_df
|
542
534
|
|
543
535
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
544
|
-
def fit_predict(
|
536
|
+
def fit_predict(
|
537
|
+
self,
|
538
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
539
|
+
output_cols_prefix: str = "fit_predict_",
|
540
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
545
541
|
""" Method not supported for this class.
|
546
542
|
|
547
543
|
|
@@ -566,7 +562,9 @@ class KNeighborsRegressor(BaseTransformer):
|
|
566
562
|
)
|
567
563
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
568
564
|
drop_input_cols=self._drop_input_cols,
|
569
|
-
expected_output_cols_list=
|
565
|
+
expected_output_cols_list=(
|
566
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
567
|
+
),
|
570
568
|
)
|
571
569
|
self._sklearn_object = fitted_estimator
|
572
570
|
self._is_fitted = True
|
@@ -583,6 +581,62 @@ class KNeighborsRegressor(BaseTransformer):
|
|
583
581
|
assert self._sklearn_object is not None
|
584
582
|
return self._sklearn_object.embedding_
|
585
583
|
|
584
|
+
|
585
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
586
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
587
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
588
|
+
"""
|
589
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
590
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
591
|
+
if output_cols:
|
592
|
+
output_cols = [
|
593
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
594
|
+
for c in output_cols
|
595
|
+
]
|
596
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
597
|
+
output_cols = [output_cols_prefix]
|
598
|
+
elif self._sklearn_object is not None:
|
599
|
+
classes = self._sklearn_object.classes_
|
600
|
+
if isinstance(classes, numpy.ndarray):
|
601
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
602
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
603
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
604
|
+
output_cols = []
|
605
|
+
for i, cl in enumerate(classes):
|
606
|
+
# For binary classification, there is only one output column for each class
|
607
|
+
# ndarray as the two classes are complementary.
|
608
|
+
if len(cl) == 2:
|
609
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
610
|
+
else:
|
611
|
+
output_cols.extend([
|
612
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
613
|
+
])
|
614
|
+
else:
|
615
|
+
output_cols = []
|
616
|
+
|
617
|
+
# Make sure column names are valid snowflake identifiers.
|
618
|
+
assert output_cols is not None # Make MyPy happy
|
619
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
620
|
+
|
621
|
+
return rv
|
622
|
+
|
623
|
+
def _align_expected_output_names(
|
624
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
625
|
+
) -> List[str]:
|
626
|
+
# in case the inferred output column names dimension is different
|
627
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
628
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
629
|
+
output_df_columns = list(output_df_pd.columns)
|
630
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
631
|
+
if self.sample_weight_col:
|
632
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
633
|
+
# if the dimension of inferred output column names is correct; use it
|
634
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
635
|
+
return expected_output_cols_list
|
636
|
+
# otherwise, use the sklearn estimator's output
|
637
|
+
else:
|
638
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
639
|
+
|
586
640
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
587
641
|
@telemetry.send_api_usage_telemetry(
|
588
642
|
project=_PROJECT,
|
@@ -613,24 +667,28 @@ class KNeighborsRegressor(BaseTransformer):
|
|
613
667
|
# are specific to the type of dataset used.
|
614
668
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
615
669
|
|
670
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
671
|
+
|
616
672
|
if isinstance(dataset, DataFrame):
|
617
673
|
self._deps = self._batch_inference_validate_snowpark(
|
618
674
|
dataset=dataset,
|
619
675
|
inference_method=inference_method,
|
620
676
|
)
|
621
|
-
assert isinstance(
|
677
|
+
assert isinstance(
|
678
|
+
dataset._session, Session
|
679
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
622
680
|
transform_kwargs = dict(
|
623
681
|
session=dataset._session,
|
624
682
|
dependencies=self._deps,
|
625
|
-
drop_input_cols
|
683
|
+
drop_input_cols=self._drop_input_cols,
|
626
684
|
expected_output_cols_type="float",
|
627
685
|
)
|
686
|
+
expected_output_cols = self._align_expected_output_names(
|
687
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
688
|
+
)
|
628
689
|
|
629
690
|
elif isinstance(dataset, pd.DataFrame):
|
630
|
-
transform_kwargs = dict(
|
631
|
-
snowpark_input_cols = self._snowpark_cols,
|
632
|
-
drop_input_cols = self._drop_input_cols
|
633
|
-
)
|
691
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
634
692
|
|
635
693
|
transform_handlers = ModelTransformerBuilder.build(
|
636
694
|
dataset=dataset,
|
@@ -642,7 +700,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
642
700
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
643
701
|
inference_method=inference_method,
|
644
702
|
input_cols=self.input_cols,
|
645
|
-
expected_output_cols=
|
703
|
+
expected_output_cols=expected_output_cols,
|
646
704
|
**transform_kwargs
|
647
705
|
)
|
648
706
|
return output_df
|
@@ -672,7 +730,8 @@ class KNeighborsRegressor(BaseTransformer):
|
|
672
730
|
Output dataset with log probability of the sample for each class in the model.
|
673
731
|
"""
|
674
732
|
super()._check_dataset_type(dataset)
|
675
|
-
inference_method="predict_log_proba"
|
733
|
+
inference_method = "predict_log_proba"
|
734
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
676
735
|
|
677
736
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
678
737
|
# are specific to the type of dataset used.
|
@@ -683,18 +742,20 @@ class KNeighborsRegressor(BaseTransformer):
|
|
683
742
|
dataset=dataset,
|
684
743
|
inference_method=inference_method,
|
685
744
|
)
|
686
|
-
assert isinstance(
|
745
|
+
assert isinstance(
|
746
|
+
dataset._session, Session
|
747
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
687
748
|
transform_kwargs = dict(
|
688
749
|
session=dataset._session,
|
689
750
|
dependencies=self._deps,
|
690
|
-
drop_input_cols
|
751
|
+
drop_input_cols=self._drop_input_cols,
|
691
752
|
expected_output_cols_type="float",
|
692
753
|
)
|
754
|
+
expected_output_cols = self._align_expected_output_names(
|
755
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
756
|
+
)
|
693
757
|
elif isinstance(dataset, pd.DataFrame):
|
694
|
-
transform_kwargs = dict(
|
695
|
-
snowpark_input_cols = self._snowpark_cols,
|
696
|
-
drop_input_cols = self._drop_input_cols
|
697
|
-
)
|
758
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
698
759
|
|
699
760
|
transform_handlers = ModelTransformerBuilder.build(
|
700
761
|
dataset=dataset,
|
@@ -707,7 +768,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
707
768
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
708
769
|
inference_method=inference_method,
|
709
770
|
input_cols=self.input_cols,
|
710
|
-
expected_output_cols=
|
771
|
+
expected_output_cols=expected_output_cols,
|
711
772
|
**transform_kwargs
|
712
773
|
)
|
713
774
|
return output_df
|
@@ -733,30 +794,34 @@ class KNeighborsRegressor(BaseTransformer):
|
|
733
794
|
Output dataset with results of the decision function for the samples in input dataset.
|
734
795
|
"""
|
735
796
|
super()._check_dataset_type(dataset)
|
736
|
-
inference_method="decision_function"
|
797
|
+
inference_method = "decision_function"
|
737
798
|
|
738
799
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
739
800
|
# are specific to the type of dataset used.
|
740
801
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
741
802
|
|
803
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
804
|
+
|
742
805
|
if isinstance(dataset, DataFrame):
|
743
806
|
self._deps = self._batch_inference_validate_snowpark(
|
744
807
|
dataset=dataset,
|
745
808
|
inference_method=inference_method,
|
746
809
|
)
|
747
|
-
assert isinstance(
|
810
|
+
assert isinstance(
|
811
|
+
dataset._session, Session
|
812
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
748
813
|
transform_kwargs = dict(
|
749
814
|
session=dataset._session,
|
750
815
|
dependencies=self._deps,
|
751
|
-
drop_input_cols
|
816
|
+
drop_input_cols=self._drop_input_cols,
|
752
817
|
expected_output_cols_type="float",
|
753
818
|
)
|
819
|
+
expected_output_cols = self._align_expected_output_names(
|
820
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
821
|
+
)
|
754
822
|
|
755
823
|
elif isinstance(dataset, pd.DataFrame):
|
756
|
-
transform_kwargs = dict(
|
757
|
-
snowpark_input_cols = self._snowpark_cols,
|
758
|
-
drop_input_cols = self._drop_input_cols
|
759
|
-
)
|
824
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
760
825
|
|
761
826
|
transform_handlers = ModelTransformerBuilder.build(
|
762
827
|
dataset=dataset,
|
@@ -769,7 +834,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
769
834
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
770
835
|
inference_method=inference_method,
|
771
836
|
input_cols=self.input_cols,
|
772
|
-
expected_output_cols=
|
837
|
+
expected_output_cols=expected_output_cols,
|
773
838
|
**transform_kwargs
|
774
839
|
)
|
775
840
|
return output_df
|
@@ -798,12 +863,14 @@ class KNeighborsRegressor(BaseTransformer):
|
|
798
863
|
Output dataset with probability of the sample for each class in the model.
|
799
864
|
"""
|
800
865
|
super()._check_dataset_type(dataset)
|
801
|
-
inference_method="score_samples"
|
866
|
+
inference_method = "score_samples"
|
802
867
|
|
803
868
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
804
869
|
# are specific to the type of dataset used.
|
805
870
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
806
871
|
|
872
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
873
|
+
|
807
874
|
if isinstance(dataset, DataFrame):
|
808
875
|
self._deps = self._batch_inference_validate_snowpark(
|
809
876
|
dataset=dataset,
|
@@ -816,6 +883,9 @@ class KNeighborsRegressor(BaseTransformer):
|
|
816
883
|
drop_input_cols = self._drop_input_cols,
|
817
884
|
expected_output_cols_type="float",
|
818
885
|
)
|
886
|
+
expected_output_cols = self._align_expected_output_names(
|
887
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
888
|
+
)
|
819
889
|
|
820
890
|
elif isinstance(dataset, pd.DataFrame):
|
821
891
|
transform_kwargs = dict(
|
@@ -834,7 +904,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
834
904
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
835
905
|
inference_method=inference_method,
|
836
906
|
input_cols=self.input_cols,
|
837
|
-
expected_output_cols=
|
907
|
+
expected_output_cols=expected_output_cols,
|
838
908
|
**transform_kwargs
|
839
909
|
)
|
840
910
|
return output_df
|
@@ -983,50 +1053,84 @@ class KNeighborsRegressor(BaseTransformer):
|
|
983
1053
|
)
|
984
1054
|
return output_df
|
985
1055
|
|
1056
|
+
|
1057
|
+
|
1058
|
+
def to_sklearn(self) -> Any:
|
1059
|
+
"""Get sklearn.neighbors.KNeighborsRegressor object.
|
1060
|
+
"""
|
1061
|
+
if self._sklearn_object is None:
|
1062
|
+
self._sklearn_object = self._create_sklearn_object()
|
1063
|
+
return self._sklearn_object
|
1064
|
+
|
1065
|
+
def to_xgboost(self) -> Any:
|
1066
|
+
raise exceptions.SnowflakeMLException(
|
1067
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1068
|
+
original_exception=AttributeError(
|
1069
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1070
|
+
"to_xgboost()",
|
1071
|
+
"to_sklearn()"
|
1072
|
+
)
|
1073
|
+
),
|
1074
|
+
)
|
1075
|
+
|
1076
|
+
def to_lightgbm(self) -> Any:
|
1077
|
+
raise exceptions.SnowflakeMLException(
|
1078
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1079
|
+
original_exception=AttributeError(
|
1080
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1081
|
+
"to_lightgbm()",
|
1082
|
+
"to_sklearn()"
|
1083
|
+
)
|
1084
|
+
),
|
1085
|
+
)
|
986
1086
|
|
987
|
-
def
|
1087
|
+
def _get_dependencies(self) -> List[str]:
|
1088
|
+
return self._deps
|
1089
|
+
|
1090
|
+
|
1091
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
988
1092
|
self._model_signature_dict = dict()
|
989
1093
|
|
990
1094
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
991
1095
|
|
992
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1096
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
993
1097
|
outputs: List[BaseFeatureSpec] = []
|
994
1098
|
if hasattr(self, "predict"):
|
995
1099
|
# keep mypy happy
|
996
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1100
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
997
1101
|
# For classifier, the type of predict is the same as the type of label
|
998
|
-
if self._sklearn_object._estimator_type ==
|
999
|
-
|
1102
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1103
|
+
# label columns is the desired type for output
|
1000
1104
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1001
1105
|
# rename the output columns
|
1002
1106
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1003
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1004
|
-
|
1005
|
-
|
1107
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1108
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1109
|
+
)
|
1006
1110
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1007
1111
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1008
|
-
# Clusterer returns int64 cluster labels.
|
1112
|
+
# Clusterer returns int64 cluster labels.
|
1009
1113
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1010
1114
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1011
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1115
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1116
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1117
|
+
)
|
1118
|
+
|
1015
1119
|
# For regressor, the type of predict is float64
|
1016
|
-
elif self._sklearn_object._estimator_type ==
|
1120
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1017
1121
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1018
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1122
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1123
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1124
|
+
)
|
1125
|
+
|
1022
1126
|
for prob_func in PROB_FUNCTIONS:
|
1023
1127
|
if hasattr(self, prob_func):
|
1024
1128
|
output_cols_prefix: str = f"{prob_func}_"
|
1025
1129
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1026
1130
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1027
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1028
|
-
|
1029
|
-
|
1131
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1132
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1133
|
+
)
|
1030
1134
|
|
1031
1135
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1032
1136
|
items = list(self._model_signature_dict.items())
|
@@ -1039,10 +1143,10 @@ class KNeighborsRegressor(BaseTransformer):
|
|
1039
1143
|
"""Returns model signature of current class.
|
1040
1144
|
|
1041
1145
|
Raises:
|
1042
|
-
|
1146
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1043
1147
|
|
1044
1148
|
Returns:
|
1045
|
-
Dict
|
1149
|
+
Dict with each method and its input output signature
|
1046
1150
|
"""
|
1047
1151
|
if self._model_signature_dict is None:
|
1048
1152
|
raise exceptions.SnowflakeMLException(
|
@@ -1050,35 +1154,3 @@ class KNeighborsRegressor(BaseTransformer):
|
|
1050
1154
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1051
1155
|
)
|
1052
1156
|
return self._model_signature_dict
|
1053
|
-
|
1054
|
-
def to_sklearn(self) -> Any:
|
1055
|
-
"""Get sklearn.neighbors.KNeighborsRegressor object.
|
1056
|
-
"""
|
1057
|
-
if self._sklearn_object is None:
|
1058
|
-
self._sklearn_object = self._create_sklearn_object()
|
1059
|
-
return self._sklearn_object
|
1060
|
-
|
1061
|
-
def to_xgboost(self) -> Any:
|
1062
|
-
raise exceptions.SnowflakeMLException(
|
1063
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1064
|
-
original_exception=AttributeError(
|
1065
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1066
|
-
"to_xgboost()",
|
1067
|
-
"to_sklearn()"
|
1068
|
-
)
|
1069
|
-
),
|
1070
|
-
)
|
1071
|
-
|
1072
|
-
def to_lightgbm(self) -> Any:
|
1073
|
-
raise exceptions.SnowflakeMLException(
|
1074
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1075
|
-
original_exception=AttributeError(
|
1076
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1077
|
-
"to_lightgbm()",
|
1078
|
-
"to_sklearn()"
|
1079
|
-
)
|
1080
|
-
),
|
1081
|
-
)
|
1082
|
-
|
1083
|
-
def _get_dependencies(self) -> List[str]:
|
1084
|
-
return self._deps
|