snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -299,12 +298,7 @@ class NuSVC(BaseTransformer):
|
|
299
298
|
)
|
300
299
|
return selected_cols
|
301
300
|
|
302
|
-
|
303
|
-
project=_PROJECT,
|
304
|
-
subproject=_SUBPROJECT,
|
305
|
-
custom_tags=dict([("autogen", True)]),
|
306
|
-
)
|
307
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "NuSVC":
|
301
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "NuSVC":
|
308
302
|
"""Fit the SVM model according to the given training data
|
309
303
|
For more details on this function, see [sklearn.svm.NuSVC.fit]
|
310
304
|
(https://scikit-learn.org/stable/modules/generated/sklearn.svm.NuSVC.html#sklearn.svm.NuSVC.fit)
|
@@ -331,12 +325,14 @@ class NuSVC(BaseTransformer):
|
|
331
325
|
|
332
326
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
333
327
|
|
334
|
-
|
328
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
335
329
|
if SNOWML_SPROC_ENV in os.environ:
|
336
330
|
statement_params = telemetry.get_function_usage_statement_params(
|
337
331
|
project=_PROJECT,
|
338
332
|
subproject=_SUBPROJECT,
|
339
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
333
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
334
|
+
inspect.currentframe(), NuSVC.__class__.__name__
|
335
|
+
),
|
340
336
|
api_calls=[Session.call],
|
341
337
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
342
338
|
)
|
@@ -357,7 +353,7 @@ class NuSVC(BaseTransformer):
|
|
357
353
|
)
|
358
354
|
self._sklearn_object = model_trainer.train()
|
359
355
|
self._is_fitted = True
|
360
|
-
self.
|
356
|
+
self._generate_model_signatures(dataset)
|
361
357
|
return self
|
362
358
|
|
363
359
|
def _batch_inference_validate_snowpark(
|
@@ -433,7 +429,9 @@ class NuSVC(BaseTransformer):
|
|
433
429
|
# when it is classifier, infer the datatype from label columns
|
434
430
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
435
431
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
436
|
-
label_cols_signatures = [
|
432
|
+
label_cols_signatures = [
|
433
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
434
|
+
]
|
437
435
|
if len(label_cols_signatures) == 0:
|
438
436
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
439
437
|
raise exceptions.SnowflakeMLException(
|
@@ -441,25 +439,22 @@ class NuSVC(BaseTransformer):
|
|
441
439
|
original_exception=ValueError(error_str),
|
442
440
|
)
|
443
441
|
|
444
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
445
|
-
label_cols_signatures[0].as_snowpark_type()
|
446
|
-
)
|
442
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
447
443
|
|
448
444
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
449
|
-
assert isinstance(
|
445
|
+
assert isinstance(
|
446
|
+
dataset._session, Session
|
447
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
450
448
|
|
451
449
|
transform_kwargs = dict(
|
452
|
-
session
|
453
|
-
dependencies
|
454
|
-
drop_input_cols
|
455
|
-
expected_output_cols_type
|
450
|
+
session=dataset._session,
|
451
|
+
dependencies=self._deps,
|
452
|
+
drop_input_cols=self._drop_input_cols,
|
453
|
+
expected_output_cols_type=expected_type_inferred,
|
456
454
|
)
|
457
455
|
|
458
456
|
elif isinstance(dataset, pd.DataFrame):
|
459
|
-
transform_kwargs = dict(
|
460
|
-
snowpark_input_cols = self._snowpark_cols,
|
461
|
-
drop_input_cols = self._drop_input_cols
|
462
|
-
)
|
457
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
463
458
|
|
464
459
|
transform_handlers = ModelTransformerBuilder.build(
|
465
460
|
dataset=dataset,
|
@@ -499,7 +494,7 @@ class NuSVC(BaseTransformer):
|
|
499
494
|
Transformed dataset.
|
500
495
|
"""
|
501
496
|
super()._check_dataset_type(dataset)
|
502
|
-
inference_method="transform"
|
497
|
+
inference_method = "transform"
|
503
498
|
|
504
499
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
505
500
|
# are specific to the type of dataset used.
|
@@ -536,17 +531,14 @@ class NuSVC(BaseTransformer):
|
|
536
531
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
537
532
|
|
538
533
|
transform_kwargs = dict(
|
539
|
-
session
|
540
|
-
dependencies
|
541
|
-
drop_input_cols
|
542
|
-
expected_output_cols_type
|
534
|
+
session=dataset._session,
|
535
|
+
dependencies=self._deps,
|
536
|
+
drop_input_cols=self._drop_input_cols,
|
537
|
+
expected_output_cols_type=expected_dtype,
|
543
538
|
)
|
544
539
|
|
545
540
|
elif isinstance(dataset, pd.DataFrame):
|
546
|
-
transform_kwargs = dict(
|
547
|
-
snowpark_input_cols = self._snowpark_cols,
|
548
|
-
drop_input_cols = self._drop_input_cols
|
549
|
-
)
|
541
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
550
542
|
|
551
543
|
transform_handlers = ModelTransformerBuilder.build(
|
552
544
|
dataset=dataset,
|
@@ -565,7 +557,11 @@ class NuSVC(BaseTransformer):
|
|
565
557
|
return output_df
|
566
558
|
|
567
559
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
568
|
-
def fit_predict(
|
560
|
+
def fit_predict(
|
561
|
+
self,
|
562
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
563
|
+
output_cols_prefix: str = "fit_predict_",
|
564
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
569
565
|
""" Method not supported for this class.
|
570
566
|
|
571
567
|
|
@@ -590,7 +586,9 @@ class NuSVC(BaseTransformer):
|
|
590
586
|
)
|
591
587
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
592
588
|
drop_input_cols=self._drop_input_cols,
|
593
|
-
expected_output_cols_list=
|
589
|
+
expected_output_cols_list=(
|
590
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
591
|
+
),
|
594
592
|
)
|
595
593
|
self._sklearn_object = fitted_estimator
|
596
594
|
self._is_fitted = True
|
@@ -607,6 +605,62 @@ class NuSVC(BaseTransformer):
|
|
607
605
|
assert self._sklearn_object is not None
|
608
606
|
return self._sklearn_object.embedding_
|
609
607
|
|
608
|
+
|
609
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
610
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
611
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
612
|
+
"""
|
613
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
614
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
615
|
+
if output_cols:
|
616
|
+
output_cols = [
|
617
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
618
|
+
for c in output_cols
|
619
|
+
]
|
620
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
621
|
+
output_cols = [output_cols_prefix]
|
622
|
+
elif self._sklearn_object is not None:
|
623
|
+
classes = self._sklearn_object.classes_
|
624
|
+
if isinstance(classes, numpy.ndarray):
|
625
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
626
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
627
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
628
|
+
output_cols = []
|
629
|
+
for i, cl in enumerate(classes):
|
630
|
+
# For binary classification, there is only one output column for each class
|
631
|
+
# ndarray as the two classes are complementary.
|
632
|
+
if len(cl) == 2:
|
633
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
634
|
+
else:
|
635
|
+
output_cols.extend([
|
636
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
637
|
+
])
|
638
|
+
else:
|
639
|
+
output_cols = []
|
640
|
+
|
641
|
+
# Make sure column names are valid snowflake identifiers.
|
642
|
+
assert output_cols is not None # Make MyPy happy
|
643
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
644
|
+
|
645
|
+
return rv
|
646
|
+
|
647
|
+
def _align_expected_output_names(
|
648
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
649
|
+
) -> List[str]:
|
650
|
+
# in case the inferred output column names dimension is different
|
651
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
652
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
653
|
+
output_df_columns = list(output_df_pd.columns)
|
654
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
655
|
+
if self.sample_weight_col:
|
656
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
657
|
+
# if the dimension of inferred output column names is correct; use it
|
658
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
659
|
+
return expected_output_cols_list
|
660
|
+
# otherwise, use the sklearn estimator's output
|
661
|
+
else:
|
662
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
663
|
+
|
610
664
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
611
665
|
@telemetry.send_api_usage_telemetry(
|
612
666
|
project=_PROJECT,
|
@@ -639,24 +693,28 @@ class NuSVC(BaseTransformer):
|
|
639
693
|
# are specific to the type of dataset used.
|
640
694
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
641
695
|
|
696
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
697
|
+
|
642
698
|
if isinstance(dataset, DataFrame):
|
643
699
|
self._deps = self._batch_inference_validate_snowpark(
|
644
700
|
dataset=dataset,
|
645
701
|
inference_method=inference_method,
|
646
702
|
)
|
647
|
-
assert isinstance(
|
703
|
+
assert isinstance(
|
704
|
+
dataset._session, Session
|
705
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
648
706
|
transform_kwargs = dict(
|
649
707
|
session=dataset._session,
|
650
708
|
dependencies=self._deps,
|
651
|
-
drop_input_cols
|
709
|
+
drop_input_cols=self._drop_input_cols,
|
652
710
|
expected_output_cols_type="float",
|
653
711
|
)
|
712
|
+
expected_output_cols = self._align_expected_output_names(
|
713
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
714
|
+
)
|
654
715
|
|
655
716
|
elif isinstance(dataset, pd.DataFrame):
|
656
|
-
transform_kwargs = dict(
|
657
|
-
snowpark_input_cols = self._snowpark_cols,
|
658
|
-
drop_input_cols = self._drop_input_cols
|
659
|
-
)
|
717
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
660
718
|
|
661
719
|
transform_handlers = ModelTransformerBuilder.build(
|
662
720
|
dataset=dataset,
|
@@ -668,7 +726,7 @@ class NuSVC(BaseTransformer):
|
|
668
726
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
669
727
|
inference_method=inference_method,
|
670
728
|
input_cols=self.input_cols,
|
671
|
-
expected_output_cols=
|
729
|
+
expected_output_cols=expected_output_cols,
|
672
730
|
**transform_kwargs
|
673
731
|
)
|
674
732
|
return output_df
|
@@ -700,7 +758,8 @@ class NuSVC(BaseTransformer):
|
|
700
758
|
Output dataset with log probability of the sample for each class in the model.
|
701
759
|
"""
|
702
760
|
super()._check_dataset_type(dataset)
|
703
|
-
inference_method="predict_log_proba"
|
761
|
+
inference_method = "predict_log_proba"
|
762
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
704
763
|
|
705
764
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
706
765
|
# are specific to the type of dataset used.
|
@@ -711,18 +770,20 @@ class NuSVC(BaseTransformer):
|
|
711
770
|
dataset=dataset,
|
712
771
|
inference_method=inference_method,
|
713
772
|
)
|
714
|
-
assert isinstance(
|
773
|
+
assert isinstance(
|
774
|
+
dataset._session, Session
|
775
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
715
776
|
transform_kwargs = dict(
|
716
777
|
session=dataset._session,
|
717
778
|
dependencies=self._deps,
|
718
|
-
drop_input_cols
|
779
|
+
drop_input_cols=self._drop_input_cols,
|
719
780
|
expected_output_cols_type="float",
|
720
781
|
)
|
782
|
+
expected_output_cols = self._align_expected_output_names(
|
783
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
784
|
+
)
|
721
785
|
elif isinstance(dataset, pd.DataFrame):
|
722
|
-
transform_kwargs = dict(
|
723
|
-
snowpark_input_cols = self._snowpark_cols,
|
724
|
-
drop_input_cols = self._drop_input_cols
|
725
|
-
)
|
786
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
726
787
|
|
727
788
|
transform_handlers = ModelTransformerBuilder.build(
|
728
789
|
dataset=dataset,
|
@@ -735,7 +796,7 @@ class NuSVC(BaseTransformer):
|
|
735
796
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
736
797
|
inference_method=inference_method,
|
737
798
|
input_cols=self.input_cols,
|
738
|
-
expected_output_cols=
|
799
|
+
expected_output_cols=expected_output_cols,
|
739
800
|
**transform_kwargs
|
740
801
|
)
|
741
802
|
return output_df
|
@@ -763,30 +824,34 @@ class NuSVC(BaseTransformer):
|
|
763
824
|
Output dataset with results of the decision function for the samples in input dataset.
|
764
825
|
"""
|
765
826
|
super()._check_dataset_type(dataset)
|
766
|
-
inference_method="decision_function"
|
827
|
+
inference_method = "decision_function"
|
767
828
|
|
768
829
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
769
830
|
# are specific to the type of dataset used.
|
770
831
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
771
832
|
|
833
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
834
|
+
|
772
835
|
if isinstance(dataset, DataFrame):
|
773
836
|
self._deps = self._batch_inference_validate_snowpark(
|
774
837
|
dataset=dataset,
|
775
838
|
inference_method=inference_method,
|
776
839
|
)
|
777
|
-
assert isinstance(
|
840
|
+
assert isinstance(
|
841
|
+
dataset._session, Session
|
842
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
778
843
|
transform_kwargs = dict(
|
779
844
|
session=dataset._session,
|
780
845
|
dependencies=self._deps,
|
781
|
-
drop_input_cols
|
846
|
+
drop_input_cols=self._drop_input_cols,
|
782
847
|
expected_output_cols_type="float",
|
783
848
|
)
|
849
|
+
expected_output_cols = self._align_expected_output_names(
|
850
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
851
|
+
)
|
784
852
|
|
785
853
|
elif isinstance(dataset, pd.DataFrame):
|
786
|
-
transform_kwargs = dict(
|
787
|
-
snowpark_input_cols = self._snowpark_cols,
|
788
|
-
drop_input_cols = self._drop_input_cols
|
789
|
-
)
|
854
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
790
855
|
|
791
856
|
transform_handlers = ModelTransformerBuilder.build(
|
792
857
|
dataset=dataset,
|
@@ -799,7 +864,7 @@ class NuSVC(BaseTransformer):
|
|
799
864
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
800
865
|
inference_method=inference_method,
|
801
866
|
input_cols=self.input_cols,
|
802
|
-
expected_output_cols=
|
867
|
+
expected_output_cols=expected_output_cols,
|
803
868
|
**transform_kwargs
|
804
869
|
)
|
805
870
|
return output_df
|
@@ -828,12 +893,14 @@ class NuSVC(BaseTransformer):
|
|
828
893
|
Output dataset with probability of the sample for each class in the model.
|
829
894
|
"""
|
830
895
|
super()._check_dataset_type(dataset)
|
831
|
-
inference_method="score_samples"
|
896
|
+
inference_method = "score_samples"
|
832
897
|
|
833
898
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
834
899
|
# are specific to the type of dataset used.
|
835
900
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
836
901
|
|
902
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
903
|
+
|
837
904
|
if isinstance(dataset, DataFrame):
|
838
905
|
self._deps = self._batch_inference_validate_snowpark(
|
839
906
|
dataset=dataset,
|
@@ -846,6 +913,9 @@ class NuSVC(BaseTransformer):
|
|
846
913
|
drop_input_cols = self._drop_input_cols,
|
847
914
|
expected_output_cols_type="float",
|
848
915
|
)
|
916
|
+
expected_output_cols = self._align_expected_output_names(
|
917
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
918
|
+
)
|
849
919
|
|
850
920
|
elif isinstance(dataset, pd.DataFrame):
|
851
921
|
transform_kwargs = dict(
|
@@ -864,7 +934,7 @@ class NuSVC(BaseTransformer):
|
|
864
934
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
865
935
|
inference_method=inference_method,
|
866
936
|
input_cols=self.input_cols,
|
867
|
-
expected_output_cols=
|
937
|
+
expected_output_cols=expected_output_cols,
|
868
938
|
**transform_kwargs
|
869
939
|
)
|
870
940
|
return output_df
|
@@ -1011,50 +1081,84 @@ class NuSVC(BaseTransformer):
|
|
1011
1081
|
)
|
1012
1082
|
return output_df
|
1013
1083
|
|
1084
|
+
|
1085
|
+
|
1086
|
+
def to_sklearn(self) -> Any:
|
1087
|
+
"""Get sklearn.svm.NuSVC object.
|
1088
|
+
"""
|
1089
|
+
if self._sklearn_object is None:
|
1090
|
+
self._sklearn_object = self._create_sklearn_object()
|
1091
|
+
return self._sklearn_object
|
1092
|
+
|
1093
|
+
def to_xgboost(self) -> Any:
|
1094
|
+
raise exceptions.SnowflakeMLException(
|
1095
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1096
|
+
original_exception=AttributeError(
|
1097
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1098
|
+
"to_xgboost()",
|
1099
|
+
"to_sklearn()"
|
1100
|
+
)
|
1101
|
+
),
|
1102
|
+
)
|
1103
|
+
|
1104
|
+
def to_lightgbm(self) -> Any:
|
1105
|
+
raise exceptions.SnowflakeMLException(
|
1106
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1107
|
+
original_exception=AttributeError(
|
1108
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1109
|
+
"to_lightgbm()",
|
1110
|
+
"to_sklearn()"
|
1111
|
+
)
|
1112
|
+
),
|
1113
|
+
)
|
1014
1114
|
|
1015
|
-
def
|
1115
|
+
def _get_dependencies(self) -> List[str]:
|
1116
|
+
return self._deps
|
1117
|
+
|
1118
|
+
|
1119
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1016
1120
|
self._model_signature_dict = dict()
|
1017
1121
|
|
1018
1122
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1019
1123
|
|
1020
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1124
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1021
1125
|
outputs: List[BaseFeatureSpec] = []
|
1022
1126
|
if hasattr(self, "predict"):
|
1023
1127
|
# keep mypy happy
|
1024
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1128
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1025
1129
|
# For classifier, the type of predict is the same as the type of label
|
1026
|
-
if self._sklearn_object._estimator_type ==
|
1027
|
-
|
1130
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1131
|
+
# label columns is the desired type for output
|
1028
1132
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1029
1133
|
# rename the output columns
|
1030
1134
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1031
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1032
|
-
|
1033
|
-
|
1135
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1136
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1137
|
+
)
|
1034
1138
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1035
1139
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1036
|
-
# Clusterer returns int64 cluster labels.
|
1140
|
+
# Clusterer returns int64 cluster labels.
|
1037
1141
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1038
1142
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1039
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1143
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1144
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1145
|
+
)
|
1146
|
+
|
1043
1147
|
# For regressor, the type of predict is float64
|
1044
|
-
elif self._sklearn_object._estimator_type ==
|
1148
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1045
1149
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1046
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1150
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1151
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1152
|
+
)
|
1153
|
+
|
1050
1154
|
for prob_func in PROB_FUNCTIONS:
|
1051
1155
|
if hasattr(self, prob_func):
|
1052
1156
|
output_cols_prefix: str = f"{prob_func}_"
|
1053
1157
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1054
1158
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1055
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1056
|
-
|
1057
|
-
|
1159
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1160
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1161
|
+
)
|
1058
1162
|
|
1059
1163
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1060
1164
|
items = list(self._model_signature_dict.items())
|
@@ -1067,10 +1171,10 @@ class NuSVC(BaseTransformer):
|
|
1067
1171
|
"""Returns model signature of current class.
|
1068
1172
|
|
1069
1173
|
Raises:
|
1070
|
-
|
1174
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1071
1175
|
|
1072
1176
|
Returns:
|
1073
|
-
Dict
|
1177
|
+
Dict with each method and its input output signature
|
1074
1178
|
"""
|
1075
1179
|
if self._model_signature_dict is None:
|
1076
1180
|
raise exceptions.SnowflakeMLException(
|
@@ -1078,35 +1182,3 @@ class NuSVC(BaseTransformer):
|
|
1078
1182
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1079
1183
|
)
|
1080
1184
|
return self._model_signature_dict
|
1081
|
-
|
1082
|
-
def to_sklearn(self) -> Any:
|
1083
|
-
"""Get sklearn.svm.NuSVC object.
|
1084
|
-
"""
|
1085
|
-
if self._sklearn_object is None:
|
1086
|
-
self._sklearn_object = self._create_sklearn_object()
|
1087
|
-
return self._sklearn_object
|
1088
|
-
|
1089
|
-
def to_xgboost(self) -> Any:
|
1090
|
-
raise exceptions.SnowflakeMLException(
|
1091
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1092
|
-
original_exception=AttributeError(
|
1093
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1094
|
-
"to_xgboost()",
|
1095
|
-
"to_sklearn()"
|
1096
|
-
)
|
1097
|
-
),
|
1098
|
-
)
|
1099
|
-
|
1100
|
-
def to_lightgbm(self) -> Any:
|
1101
|
-
raise exceptions.SnowflakeMLException(
|
1102
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1103
|
-
original_exception=AttributeError(
|
1104
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1105
|
-
"to_lightgbm()",
|
1106
|
-
"to_sklearn()"
|
1107
|
-
)
|
1108
|
-
),
|
1109
|
-
)
|
1110
|
-
|
1111
|
-
def _get_dependencies(self) -> List[str]:
|
1112
|
-
return self._deps
|