snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/telemetry.py +142 -20
- snowflake/ml/_internal/utils/identifier.py +48 -11
- snowflake/ml/_internal/utils/snowflake_env.py +23 -13
- snowflake/ml/_internal/utils/sql_identifier.py +1 -1
- snowflake/ml/_internal/utils/table_manager.py +19 -1
- snowflake/ml/_internal/utils/uri.py +2 -2
- snowflake/ml/data/data_connector.py +33 -7
- snowflake/ml/data/torch_utils.py +68 -0
- snowflake/ml/dataset/dataset.py +1 -3
- snowflake/ml/feature_store/feature_store.py +41 -17
- snowflake/ml/feature_store/feature_view.py +2 -2
- snowflake/ml/fileset/embedded_stage_fs.py +1 -1
- snowflake/ml/fileset/fileset.py +1 -1
- snowflake/ml/fileset/sfcfs.py +9 -3
- snowflake/ml/model/_client/model/model_version_impl.py +22 -7
- snowflake/ml/model/_client/ops/model_ops.py +39 -3
- snowflake/ml/model/_client/ops/service_ops.py +198 -7
- snowflake/ml/model/_client/service/model_deployment_spec.py +4 -5
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -2
- snowflake/ml/model/_client/sql/service.py +85 -18
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +3 -3
- snowflake/ml/model/_model_composer/model_composer.py +2 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +3 -8
- snowflake/ml/model/_packager/model_handlers/_utils.py +46 -14
- snowflake/ml/model/_packager/model_handlers/catboost.py +17 -15
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +23 -15
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +15 -57
- snowflake/ml/model/_packager/model_handlers/llm.py +4 -2
- snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +116 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +36 -24
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +119 -6
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +48 -48
- snowflake/ml/model/_packager/model_meta/model_meta.py +10 -7
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +0 -8
- snowflake/ml/model/_packager/model_packager.py +2 -0
- snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
- snowflake/ml/model/_signatures/utils.py +9 -0
- snowflake/ml/model/models/llm.py +3 -1
- snowflake/ml/model/type_hints.py +9 -1
- snowflake/ml/modeling/_internal/constants.py +1 -0
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
- snowflake/ml/modeling/_internal/model_specifications.py +2 -0
- snowflake/ml/modeling/_internal/model_trainer.py +1 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +113 -160
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +60 -21
- snowflake/ml/modeling/cluster/affinity_propagation.py +60 -21
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +60 -21
- snowflake/ml/modeling/cluster/birch.py +60 -21
- snowflake/ml/modeling/cluster/bisecting_k_means.py +60 -21
- snowflake/ml/modeling/cluster/dbscan.py +60 -21
- snowflake/ml/modeling/cluster/feature_agglomeration.py +60 -21
- snowflake/ml/modeling/cluster/k_means.py +60 -21
- snowflake/ml/modeling/cluster/mean_shift.py +60 -21
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +60 -21
- snowflake/ml/modeling/cluster/optics.py +60 -21
- snowflake/ml/modeling/cluster/spectral_biclustering.py +60 -21
- snowflake/ml/modeling/cluster/spectral_clustering.py +60 -21
- snowflake/ml/modeling/cluster/spectral_coclustering.py +60 -21
- snowflake/ml/modeling/compose/column_transformer.py +60 -21
- snowflake/ml/modeling/compose/transformed_target_regressor.py +60 -21
- snowflake/ml/modeling/covariance/elliptic_envelope.py +60 -21
- snowflake/ml/modeling/covariance/empirical_covariance.py +60 -21
- snowflake/ml/modeling/covariance/graphical_lasso.py +60 -21
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +60 -21
- snowflake/ml/modeling/covariance/ledoit_wolf.py +60 -21
- snowflake/ml/modeling/covariance/min_cov_det.py +60 -21
- snowflake/ml/modeling/covariance/oas.py +60 -21
- snowflake/ml/modeling/covariance/shrunk_covariance.py +60 -21
- snowflake/ml/modeling/decomposition/dictionary_learning.py +60 -21
- snowflake/ml/modeling/decomposition/factor_analysis.py +60 -21
- snowflake/ml/modeling/decomposition/fast_ica.py +60 -21
- snowflake/ml/modeling/decomposition/incremental_pca.py +60 -21
- snowflake/ml/modeling/decomposition/kernel_pca.py +60 -21
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +60 -21
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +60 -21
- snowflake/ml/modeling/decomposition/pca.py +60 -21
- snowflake/ml/modeling/decomposition/sparse_pca.py +60 -21
- snowflake/ml/modeling/decomposition/truncated_svd.py +60 -21
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +60 -21
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +60 -21
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/bagging_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/bagging_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/isolation_forest.py +60 -21
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/stacking_regressor.py +60 -21
- snowflake/ml/modeling/ensemble/voting_classifier.py +60 -21
- snowflake/ml/modeling/ensemble/voting_regressor.py +60 -21
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +60 -21
- snowflake/ml/modeling/feature_selection/select_fdr.py +60 -21
- snowflake/ml/modeling/feature_selection/select_fpr.py +60 -21
- snowflake/ml/modeling/feature_selection/select_fwe.py +60 -21
- snowflake/ml/modeling/feature_selection/select_k_best.py +60 -21
- snowflake/ml/modeling/feature_selection/select_percentile.py +60 -21
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +60 -21
- snowflake/ml/modeling/feature_selection/variance_threshold.py +60 -21
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +60 -21
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +60 -21
- snowflake/ml/modeling/impute/iterative_imputer.py +60 -21
- snowflake/ml/modeling/impute/knn_imputer.py +60 -21
- snowflake/ml/modeling/impute/missing_indicator.py +60 -21
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +60 -21
- snowflake/ml/modeling/kernel_approximation/nystroem.py +60 -21
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +60 -21
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +60 -21
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +60 -21
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +60 -21
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +60 -21
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/ard_regression.py +60 -21
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +60 -21
- snowflake/ml/modeling/linear_model/elastic_net.py +60 -21
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +60 -21
- snowflake/ml/modeling/linear_model/gamma_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/huber_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/lars.py +60 -21
- snowflake/ml/modeling/linear_model/lars_cv.py +60 -21
- snowflake/ml/modeling/linear_model/lasso.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_cv.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_lars.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +60 -21
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +60 -21
- snowflake/ml/modeling/linear_model/linear_regression.py +60 -21
- snowflake/ml/modeling/linear_model/logistic_regression.py +60 -21
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +60 -21
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +60 -21
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +60 -21
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +60 -21
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/perceptron.py +60 -21
- snowflake/ml/modeling/linear_model/poisson_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/ransac_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/ridge.py +60 -21
- snowflake/ml/modeling/linear_model/ridge_classifier.py +60 -21
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +60 -21
- snowflake/ml/modeling/linear_model/ridge_cv.py +60 -21
- snowflake/ml/modeling/linear_model/sgd_classifier.py +60 -21
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +60 -21
- snowflake/ml/modeling/linear_model/sgd_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +60 -21
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +60 -21
- snowflake/ml/modeling/manifold/isomap.py +60 -21
- snowflake/ml/modeling/manifold/mds.py +60 -21
- snowflake/ml/modeling/manifold/spectral_embedding.py +60 -21
- snowflake/ml/modeling/manifold/tsne.py +60 -21
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +60 -21
- snowflake/ml/modeling/mixture/gaussian_mixture.py +60 -21
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +60 -21
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +60 -21
- snowflake/ml/modeling/multiclass/output_code_classifier.py +60 -21
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/complement_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +60 -21
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +60 -21
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +60 -21
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +60 -21
- snowflake/ml/modeling/neighbors/kernel_density.py +60 -21
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +60 -21
- snowflake/ml/modeling/neighbors/nearest_centroid.py +60 -21
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +60 -21
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +60 -21
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +60 -21
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +60 -21
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +60 -21
- snowflake/ml/modeling/neural_network/mlp_classifier.py +60 -21
- snowflake/ml/modeling/neural_network/mlp_regressor.py +60 -21
- snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +1 -12
- snowflake/ml/modeling/preprocessing/polynomial_features.py +60 -21
- snowflake/ml/modeling/semi_supervised/label_propagation.py +60 -21
- snowflake/ml/modeling/semi_supervised/label_spreading.py +60 -21
- snowflake/ml/modeling/svm/linear_svc.py +60 -21
- snowflake/ml/modeling/svm/linear_svr.py +60 -21
- snowflake/ml/modeling/svm/nu_svc.py +60 -21
- snowflake/ml/modeling/svm/nu_svr.py +60 -21
- snowflake/ml/modeling/svm/svc.py +60 -21
- snowflake/ml/modeling/svm/svr.py +60 -21
- snowflake/ml/modeling/tree/decision_tree_classifier.py +60 -21
- snowflake/ml/modeling/tree/decision_tree_regressor.py +60 -21
- snowflake/ml/modeling/tree/extra_tree_classifier.py +60 -21
- snowflake/ml/modeling/tree/extra_tree_regressor.py +60 -21
- snowflake/ml/modeling/xgboost/xgb_classifier.py +63 -23
- snowflake/ml/modeling/xgboost/xgb_regressor.py +63 -23
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +63 -23
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +63 -23
- snowflake/ml/registry/_manager/model_manager.py +4 -0
- snowflake/ml/registry/model_registry.py +1 -1
- snowflake/ml/registry/registry.py +1 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/METADATA +23 -4
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/RECORD +211 -209
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/WHEEL +1 -1
- snowflake/ml/data/torch_dataset.py +0 -33
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/top_level.txt +0 -0
@@ -4,18 +4,17 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
18
16
|
import numpy
|
17
|
+
import sklearn
|
19
18
|
import xgboost
|
20
19
|
from sklearn.utils.metaestimators import available_if
|
21
20
|
|
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
23
22
|
from snowflake.ml._internal import telemetry
|
24
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
25
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
26
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
27
26
|
from snowflake.snowpark import DataFrame, Session
|
28
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
30
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
31
|
-
ModelTransformHandlers,
|
32
30
|
BatchInferenceKwargsTypedDict,
|
33
31
|
ScoreKwargsTypedDict
|
34
32
|
)
|
@@ -361,7 +359,7 @@ class XGBRegressor(BaseTransformer):
|
|
361
359
|
self.set_sample_weight_col(sample_weight_col)
|
362
360
|
self._use_external_memory_version = use_external_memory_version
|
363
361
|
self._batch_size = batch_size
|
364
|
-
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
362
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
365
363
|
|
366
364
|
self._deps = list(deps)
|
367
365
|
|
@@ -694,12 +692,23 @@ class XGBRegressor(BaseTransformer):
|
|
694
692
|
autogenerated=self._autogenerated,
|
695
693
|
subproject=_SUBPROJECT,
|
696
694
|
)
|
697
|
-
|
698
|
-
|
699
|
-
expected_output_cols_list=(
|
700
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
701
|
-
),
|
695
|
+
expected_output_cols = (
|
696
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
702
697
|
)
|
698
|
+
if isinstance(dataset, DataFrame):
|
699
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
700
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
701
|
+
)
|
702
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
703
|
+
drop_input_cols=self._drop_input_cols,
|
704
|
+
expected_output_cols_list=expected_output_cols,
|
705
|
+
example_output_pd_df=example_output_pd_df,
|
706
|
+
)
|
707
|
+
else:
|
708
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
709
|
+
drop_input_cols=self._drop_input_cols,
|
710
|
+
expected_output_cols_list=expected_output_cols,
|
711
|
+
)
|
703
712
|
self._sklearn_object = fitted_estimator
|
704
713
|
self._is_fitted = True
|
705
714
|
return output_result
|
@@ -778,12 +787,41 @@ class XGBRegressor(BaseTransformer):
|
|
778
787
|
|
779
788
|
return rv
|
780
789
|
|
781
|
-
def
|
782
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
783
|
-
) -> List[str]:
|
790
|
+
def _align_expected_output(
|
791
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
792
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
793
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
794
|
+
and output dataframe with 1 line.
|
795
|
+
If the method is fit_predict, run 2 lines of data.
|
796
|
+
"""
|
784
797
|
# in case the inferred output column names dimension is different
|
785
798
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
786
|
-
|
799
|
+
|
800
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
801
|
+
# so change the minimum of number of rows to 2
|
802
|
+
num_examples = 2
|
803
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
804
|
+
project=_PROJECT,
|
805
|
+
subproject=_SUBPROJECT,
|
806
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
807
|
+
inspect.currentframe(), XGBRegressor.__class__.__name__
|
808
|
+
),
|
809
|
+
api_calls=[Session.call],
|
810
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
811
|
+
)
|
812
|
+
if output_cols_prefix == "fit_predict_":
|
813
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
814
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
815
|
+
num_examples = self._sklearn_object.n_clusters
|
816
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
817
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
818
|
+
num_examples = self._sklearn_object.min_samples
|
819
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
820
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
821
|
+
num_examples = self._sklearn_object.n_neighbors
|
822
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
823
|
+
else:
|
824
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
787
825
|
|
788
826
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
789
827
|
# seen during the fit.
|
@@ -795,12 +833,14 @@ class XGBRegressor(BaseTransformer):
|
|
795
833
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
796
834
|
if self.sample_weight_col:
|
797
835
|
output_df_columns_set -= set(self.sample_weight_col)
|
836
|
+
|
798
837
|
# if the dimension of inferred output column names is correct; use it
|
799
838
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
800
|
-
return expected_output_cols_list
|
839
|
+
return expected_output_cols_list, output_df_pd
|
801
840
|
# otherwise, use the sklearn estimator's output
|
802
841
|
else:
|
803
|
-
|
842
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
843
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
804
844
|
|
805
845
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
806
846
|
@telemetry.send_api_usage_telemetry(
|
@@ -846,7 +886,7 @@ class XGBRegressor(BaseTransformer):
|
|
846
886
|
drop_input_cols=self._drop_input_cols,
|
847
887
|
expected_output_cols_type="float",
|
848
888
|
)
|
849
|
-
expected_output_cols = self.
|
889
|
+
expected_output_cols, _ = self._align_expected_output(
|
850
890
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
851
891
|
)
|
852
892
|
|
@@ -912,7 +952,7 @@ class XGBRegressor(BaseTransformer):
|
|
912
952
|
drop_input_cols=self._drop_input_cols,
|
913
953
|
expected_output_cols_type="float",
|
914
954
|
)
|
915
|
-
expected_output_cols = self.
|
955
|
+
expected_output_cols, _ = self._align_expected_output(
|
916
956
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
917
957
|
)
|
918
958
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -975,7 +1015,7 @@ class XGBRegressor(BaseTransformer):
|
|
975
1015
|
drop_input_cols=self._drop_input_cols,
|
976
1016
|
expected_output_cols_type="float",
|
977
1017
|
)
|
978
|
-
expected_output_cols = self.
|
1018
|
+
expected_output_cols, _ = self._align_expected_output(
|
979
1019
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
980
1020
|
)
|
981
1021
|
|
@@ -1040,7 +1080,7 @@ class XGBRegressor(BaseTransformer):
|
|
1040
1080
|
drop_input_cols = self._drop_input_cols,
|
1041
1081
|
expected_output_cols_type="float",
|
1042
1082
|
)
|
1043
|
-
expected_output_cols = self.
|
1083
|
+
expected_output_cols, _ = self._align_expected_output(
|
1044
1084
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1045
1085
|
)
|
1046
1086
|
|
@@ -1105,7 +1145,7 @@ class XGBRegressor(BaseTransformer):
|
|
1105
1145
|
transform_kwargs = dict(
|
1106
1146
|
session=dataset._session,
|
1107
1147
|
dependencies=self._deps,
|
1108
|
-
score_sproc_imports=['xgboost'],
|
1148
|
+
score_sproc_imports=['xgboost', 'sklearn'],
|
1109
1149
|
)
|
1110
1150
|
elif isinstance(dataset, pd.DataFrame):
|
1111
1151
|
# pandas_handler.score() does not require any extra kwargs.
|
@@ -4,18 +4,17 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
18
16
|
import numpy
|
17
|
+
import sklearn
|
19
18
|
import xgboost
|
20
19
|
from sklearn.utils.metaestimators import available_if
|
21
20
|
|
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
23
22
|
from snowflake.ml._internal import telemetry
|
24
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
25
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
26
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
27
26
|
from snowflake.snowpark import DataFrame, Session
|
28
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
30
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
31
|
-
ModelTransformHandlers,
|
32
30
|
BatchInferenceKwargsTypedDict,
|
33
31
|
ScoreKwargsTypedDict
|
34
32
|
)
|
@@ -363,7 +361,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
363
361
|
self.set_sample_weight_col(sample_weight_col)
|
364
362
|
self._use_external_memory_version = use_external_memory_version
|
365
363
|
self._batch_size = batch_size
|
366
|
-
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
364
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
367
365
|
|
368
366
|
self._deps = list(deps)
|
369
367
|
|
@@ -699,12 +697,23 @@ class XGBRFClassifier(BaseTransformer):
|
|
699
697
|
autogenerated=self._autogenerated,
|
700
698
|
subproject=_SUBPROJECT,
|
701
699
|
)
|
702
|
-
|
703
|
-
|
704
|
-
expected_output_cols_list=(
|
705
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
706
|
-
),
|
700
|
+
expected_output_cols = (
|
701
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
707
702
|
)
|
703
|
+
if isinstance(dataset, DataFrame):
|
704
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
705
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
706
|
+
)
|
707
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
708
|
+
drop_input_cols=self._drop_input_cols,
|
709
|
+
expected_output_cols_list=expected_output_cols,
|
710
|
+
example_output_pd_df=example_output_pd_df,
|
711
|
+
)
|
712
|
+
else:
|
713
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
714
|
+
drop_input_cols=self._drop_input_cols,
|
715
|
+
expected_output_cols_list=expected_output_cols,
|
716
|
+
)
|
708
717
|
self._sklearn_object = fitted_estimator
|
709
718
|
self._is_fitted = True
|
710
719
|
return output_result
|
@@ -783,12 +792,41 @@ class XGBRFClassifier(BaseTransformer):
|
|
783
792
|
|
784
793
|
return rv
|
785
794
|
|
786
|
-
def
|
787
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
788
|
-
) -> List[str]:
|
795
|
+
def _align_expected_output(
|
796
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
797
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
798
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
799
|
+
and output dataframe with 1 line.
|
800
|
+
If the method is fit_predict, run 2 lines of data.
|
801
|
+
"""
|
789
802
|
# in case the inferred output column names dimension is different
|
790
803
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
791
|
-
|
804
|
+
|
805
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
806
|
+
# so change the minimum of number of rows to 2
|
807
|
+
num_examples = 2
|
808
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
809
|
+
project=_PROJECT,
|
810
|
+
subproject=_SUBPROJECT,
|
811
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
812
|
+
inspect.currentframe(), XGBRFClassifier.__class__.__name__
|
813
|
+
),
|
814
|
+
api_calls=[Session.call],
|
815
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
816
|
+
)
|
817
|
+
if output_cols_prefix == "fit_predict_":
|
818
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
819
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
820
|
+
num_examples = self._sklearn_object.n_clusters
|
821
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
822
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
823
|
+
num_examples = self._sklearn_object.min_samples
|
824
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
825
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
826
|
+
num_examples = self._sklearn_object.n_neighbors
|
827
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
828
|
+
else:
|
829
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
792
830
|
|
793
831
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
794
832
|
# seen during the fit.
|
@@ -800,12 +838,14 @@ class XGBRFClassifier(BaseTransformer):
|
|
800
838
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
801
839
|
if self.sample_weight_col:
|
802
840
|
output_df_columns_set -= set(self.sample_weight_col)
|
841
|
+
|
803
842
|
# if the dimension of inferred output column names is correct; use it
|
804
843
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
805
|
-
return expected_output_cols_list
|
844
|
+
return expected_output_cols_list, output_df_pd
|
806
845
|
# otherwise, use the sklearn estimator's output
|
807
846
|
else:
|
808
|
-
|
847
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
848
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
809
849
|
|
810
850
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
811
851
|
@telemetry.send_api_usage_telemetry(
|
@@ -853,7 +893,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
853
893
|
drop_input_cols=self._drop_input_cols,
|
854
894
|
expected_output_cols_type="float",
|
855
895
|
)
|
856
|
-
expected_output_cols = self.
|
896
|
+
expected_output_cols, _ = self._align_expected_output(
|
857
897
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
858
898
|
)
|
859
899
|
|
@@ -921,7 +961,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
921
961
|
drop_input_cols=self._drop_input_cols,
|
922
962
|
expected_output_cols_type="float",
|
923
963
|
)
|
924
|
-
expected_output_cols = self.
|
964
|
+
expected_output_cols, _ = self._align_expected_output(
|
925
965
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
926
966
|
)
|
927
967
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -984,7 +1024,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
984
1024
|
drop_input_cols=self._drop_input_cols,
|
985
1025
|
expected_output_cols_type="float",
|
986
1026
|
)
|
987
|
-
expected_output_cols = self.
|
1027
|
+
expected_output_cols, _ = self._align_expected_output(
|
988
1028
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
989
1029
|
)
|
990
1030
|
|
@@ -1049,7 +1089,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1049
1089
|
drop_input_cols = self._drop_input_cols,
|
1050
1090
|
expected_output_cols_type="float",
|
1051
1091
|
)
|
1052
|
-
expected_output_cols = self.
|
1092
|
+
expected_output_cols, _ = self._align_expected_output(
|
1053
1093
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1054
1094
|
)
|
1055
1095
|
|
@@ -1114,7 +1154,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1114
1154
|
transform_kwargs = dict(
|
1115
1155
|
session=dataset._session,
|
1116
1156
|
dependencies=self._deps,
|
1117
|
-
score_sproc_imports=['xgboost'],
|
1157
|
+
score_sproc_imports=['xgboost', 'sklearn'],
|
1118
1158
|
)
|
1119
1159
|
elif isinstance(dataset, pd.DataFrame):
|
1120
1160
|
# pandas_handler.score() does not require any extra kwargs.
|
@@ -4,18 +4,17 @@
|
|
4
4
|
#
|
5
5
|
import inspect
|
6
6
|
import os
|
7
|
-
import
|
8
|
-
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
9
|
-
from typing_extensions import TypeGuard
|
7
|
+
from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
|
10
8
|
from uuid import uuid4
|
11
9
|
|
12
10
|
import cloudpickle as cp
|
13
|
-
import pandas as pd
|
14
11
|
import numpy as np
|
12
|
+
import pandas as pd
|
15
13
|
from numpy import typing as npt
|
16
14
|
|
17
15
|
|
18
16
|
import numpy
|
17
|
+
import sklearn
|
19
18
|
import xgboost
|
20
19
|
from sklearn.utils.metaestimators import available_if
|
21
20
|
|
@@ -23,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
|
23
22
|
from snowflake.ml._internal import telemetry
|
24
23
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
25
24
|
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
26
|
-
from snowflake.ml._internal.utils import
|
25
|
+
from snowflake.ml._internal.utils import identifier
|
27
26
|
from snowflake.snowpark import DataFrame, Session
|
28
27
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
29
28
|
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
30
29
|
from snowflake.ml.modeling._internal.transformer_protocols import (
|
31
|
-
ModelTransformHandlers,
|
32
30
|
BatchInferenceKwargsTypedDict,
|
33
31
|
ScoreKwargsTypedDict
|
34
32
|
)
|
@@ -363,7 +361,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
363
361
|
self.set_sample_weight_col(sample_weight_col)
|
364
362
|
self._use_external_memory_version = use_external_memory_version
|
365
363
|
self._batch_size = batch_size
|
366
|
-
deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
364
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
|
367
365
|
|
368
366
|
self._deps = list(deps)
|
369
367
|
|
@@ -699,12 +697,23 @@ class XGBRFRegressor(BaseTransformer):
|
|
699
697
|
autogenerated=self._autogenerated,
|
700
698
|
subproject=_SUBPROJECT,
|
701
699
|
)
|
702
|
-
|
703
|
-
|
704
|
-
expected_output_cols_list=(
|
705
|
-
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
706
|
-
),
|
700
|
+
expected_output_cols = (
|
701
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
707
702
|
)
|
703
|
+
if isinstance(dataset, DataFrame):
|
704
|
+
expected_output_cols, example_output_pd_df = self._align_expected_output(
|
705
|
+
"fit_predict", dataset, expected_output_cols, output_cols_prefix
|
706
|
+
)
|
707
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
708
|
+
drop_input_cols=self._drop_input_cols,
|
709
|
+
expected_output_cols_list=expected_output_cols,
|
710
|
+
example_output_pd_df=example_output_pd_df,
|
711
|
+
)
|
712
|
+
else:
|
713
|
+
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
714
|
+
drop_input_cols=self._drop_input_cols,
|
715
|
+
expected_output_cols_list=expected_output_cols,
|
716
|
+
)
|
708
717
|
self._sklearn_object = fitted_estimator
|
709
718
|
self._is_fitted = True
|
710
719
|
return output_result
|
@@ -783,12 +792,41 @@ class XGBRFRegressor(BaseTransformer):
|
|
783
792
|
|
784
793
|
return rv
|
785
794
|
|
786
|
-
def
|
787
|
-
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
788
|
-
) -> List[str]:
|
795
|
+
def _align_expected_output(
|
796
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
|
797
|
+
) -> Tuple[List[str], pd.DataFrame]:
|
798
|
+
""" Run 1 line of data with the desired method, and return one tuple that consists of the output column names
|
799
|
+
and output dataframe with 1 line.
|
800
|
+
If the method is fit_predict, run 2 lines of data.
|
801
|
+
"""
|
789
802
|
# in case the inferred output column names dimension is different
|
790
803
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
791
|
-
|
804
|
+
|
805
|
+
# For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
|
806
|
+
# so change the minimum of number of rows to 2
|
807
|
+
num_examples = 2
|
808
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
809
|
+
project=_PROJECT,
|
810
|
+
subproject=_SUBPROJECT,
|
811
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
812
|
+
inspect.currentframe(), XGBRFRegressor.__class__.__name__
|
813
|
+
),
|
814
|
+
api_calls=[Session.call],
|
815
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
816
|
+
)
|
817
|
+
if output_cols_prefix == "fit_predict_":
|
818
|
+
if hasattr(self._sklearn_object, "n_clusters"):
|
819
|
+
# cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
|
820
|
+
num_examples = self._sklearn_object.n_clusters
|
821
|
+
elif hasattr(self._sklearn_object, "min_samples"):
|
822
|
+
# OPTICS default min_samples 5, which requires at least 5 lines of data
|
823
|
+
num_examples = self._sklearn_object.min_samples
|
824
|
+
elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
|
825
|
+
# LocalOutlierFactor expects n_neighbors <= n_samples
|
826
|
+
num_examples = self._sklearn_object.n_neighbors
|
827
|
+
sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
|
828
|
+
else:
|
829
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
|
792
830
|
|
793
831
|
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
794
832
|
# seen during the fit.
|
@@ -800,12 +838,14 @@ class XGBRFRegressor(BaseTransformer):
|
|
800
838
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
801
839
|
if self.sample_weight_col:
|
802
840
|
output_df_columns_set -= set(self.sample_weight_col)
|
841
|
+
|
803
842
|
# if the dimension of inferred output column names is correct; use it
|
804
843
|
if len(expected_output_cols_list) == len(output_df_columns_set):
|
805
|
-
return expected_output_cols_list
|
844
|
+
return expected_output_cols_list, output_df_pd
|
806
845
|
# otherwise, use the sklearn estimator's output
|
807
846
|
else:
|
808
|
-
|
847
|
+
expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
848
|
+
return expected_output_cols_list, output_df_pd[expected_output_cols_list]
|
809
849
|
|
810
850
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
811
851
|
@telemetry.send_api_usage_telemetry(
|
@@ -851,7 +891,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
851
891
|
drop_input_cols=self._drop_input_cols,
|
852
892
|
expected_output_cols_type="float",
|
853
893
|
)
|
854
|
-
expected_output_cols = self.
|
894
|
+
expected_output_cols, _ = self._align_expected_output(
|
855
895
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
856
896
|
)
|
857
897
|
|
@@ -917,7 +957,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
917
957
|
drop_input_cols=self._drop_input_cols,
|
918
958
|
expected_output_cols_type="float",
|
919
959
|
)
|
920
|
-
expected_output_cols = self.
|
960
|
+
expected_output_cols, _ = self._align_expected_output(
|
921
961
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
922
962
|
)
|
923
963
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -980,7 +1020,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
980
1020
|
drop_input_cols=self._drop_input_cols,
|
981
1021
|
expected_output_cols_type="float",
|
982
1022
|
)
|
983
|
-
expected_output_cols = self.
|
1023
|
+
expected_output_cols, _ = self._align_expected_output(
|
984
1024
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
985
1025
|
)
|
986
1026
|
|
@@ -1045,7 +1085,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
1045
1085
|
drop_input_cols = self._drop_input_cols,
|
1046
1086
|
expected_output_cols_type="float",
|
1047
1087
|
)
|
1048
|
-
expected_output_cols = self.
|
1088
|
+
expected_output_cols, _ = self._align_expected_output(
|
1049
1089
|
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1050
1090
|
)
|
1051
1091
|
|
@@ -1110,7 +1150,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
1110
1150
|
transform_kwargs = dict(
|
1111
1151
|
session=dataset._session,
|
1112
1152
|
dependencies=self._deps,
|
1113
|
-
score_sproc_imports=['xgboost'],
|
1153
|
+
score_sproc_imports=['xgboost', 'sklearn'],
|
1114
1154
|
)
|
1115
1155
|
elif isinstance(dataset, pd.DataFrame):
|
1116
1156
|
# pandas_handler.score() does not require any extra kwargs.
|
@@ -50,6 +50,7 @@ class ModelManager:
|
|
50
50
|
sample_input_data: Optional[model_types.SupportedDataType] = None,
|
51
51
|
code_paths: Optional[List[str]] = None,
|
52
52
|
ext_modules: Optional[List[ModuleType]] = None,
|
53
|
+
model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
|
53
54
|
options: Optional[model_types.ModelSaveOption] = None,
|
54
55
|
statement_params: Optional[Dict[str, Any]] = None,
|
55
56
|
) -> model_version_impl.ModelVersion:
|
@@ -89,6 +90,7 @@ class ModelManager:
|
|
89
90
|
sample_input_data=sample_input_data,
|
90
91
|
code_paths=code_paths,
|
91
92
|
ext_modules=ext_modules,
|
93
|
+
model_objective=model_objective,
|
92
94
|
options=options,
|
93
95
|
statement_params=statement_params,
|
94
96
|
)
|
@@ -108,6 +110,7 @@ class ModelManager:
|
|
108
110
|
sample_input_data: Optional[model_types.SupportedDataType] = None,
|
109
111
|
code_paths: Optional[List[str]] = None,
|
110
112
|
ext_modules: Optional[List[ModuleType]] = None,
|
113
|
+
model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
|
111
114
|
options: Optional[model_types.ModelSaveOption] = None,
|
112
115
|
statement_params: Optional[Dict[str, Any]] = None,
|
113
116
|
) -> model_version_impl.ModelVersion:
|
@@ -156,6 +159,7 @@ class ModelManager:
|
|
156
159
|
code_paths=code_paths,
|
157
160
|
ext_modules=ext_modules,
|
158
161
|
options=options,
|
162
|
+
model_objective=model_objective,
|
159
163
|
)
|
160
164
|
statement_params = telemetry.add_statement_params_custom_tags(
|
161
165
|
statement_params, model_metadata.telemetry_metadata()
|
@@ -576,7 +576,7 @@ fully integrated into the new registry.
|
|
576
576
|
raw_stage_path = uri.get_snowflake_stage_path_from_uri(model_uri)
|
577
577
|
if not raw_stage_path:
|
578
578
|
return None
|
579
|
-
(db, schema, stage, _) = identifier.
|
579
|
+
(db, schema, stage, _) = identifier.parse_snowflake_stage_path(raw_stage_path)
|
580
580
|
return identifier.get_schema_level_object_identifier(db, schema, stage)
|
581
581
|
|
582
582
|
def _list_selected_models(
|
@@ -244,8 +244,7 @@ class Registry:
|
|
244
244
|
warnings.warn(
|
245
245
|
"Models logged specifying `pip_requirements` can not be executed "
|
246
246
|
"in Snowflake Warehouse where all dependencies are required to be retrieved "
|
247
|
-
"from Snowflake Anaconda Channel.
|
248
|
-
"to log model with pip dependencies.",
|
247
|
+
"from Snowflake Anaconda Channel.",
|
249
248
|
category=UserWarning,
|
250
249
|
stacklevel=1,
|
251
250
|
)
|
snowflake/ml/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION="1.6.
|
1
|
+
VERSION="1.6.2"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: snowflake-ml-python
|
3
|
-
Version: 1.6.
|
3
|
+
Version: 1.6.2
|
4
4
|
Summary: The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.
|
5
5
|
Author-email: "Snowflake, Inc" <support@snowflake.com>
|
6
6
|
License:
|
@@ -253,7 +253,7 @@ Requires-Dist: snowflake-connector-python[pandas] <4,>=3.5.0
|
|
253
253
|
Requires-Dist: snowflake-snowpark-python <2,>=1.17.0
|
254
254
|
Requires-Dist: sqlparse <1,>=0.4
|
255
255
|
Requires-Dist: typing-extensions <5,>=4.1.0
|
256
|
-
Requires-Dist: xgboost <2,>=1.7.3
|
256
|
+
Requires-Dist: xgboost <2.1,>=1.7.3
|
257
257
|
Provides-Extra: all
|
258
258
|
Requires-Dist: catboost <2,>=1.2.0 ; extra == 'all'
|
259
259
|
Requires-Dist: lightgbm <5,>=3.3.5 ; extra == 'all'
|
@@ -373,7 +373,27 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
|
|
373
373
|
|
374
374
|
# Release History
|
375
375
|
|
376
|
-
## 1.6.
|
376
|
+
## 1.6.2 (TBD)
|
377
|
+
|
378
|
+
### Bug Fixes
|
379
|
+
|
380
|
+
- Modeling: Support XGBoost version that is larger than 2.
|
381
|
+
|
382
|
+
- Data: Fix multiple epoch iteration over `DataConnector.to_torch_datapipe()` DataPipes.
|
383
|
+
- Generic: Fix a bug that when an invalid name is provided to argument where fully qualified name is expected, it will
|
384
|
+
be parsed wrongly. Now it raises an exception correctly.
|
385
|
+
- Model Explainability: Handle explanations for multiclass XGBoost classification models
|
386
|
+
- Model Explainability: Workarounds and better error handling for XGB>2.1.0 not working with SHAP==0.42.1
|
387
|
+
|
388
|
+
### New Features
|
389
|
+
|
390
|
+
- Data: Add top-level exports for `DataConnector` and `DataSource` to `snowflake.ml.data`.
|
391
|
+
- Data: Add native batching support via `batch_size` and `drop_last_batch` arguments to `DataConnector.to_torch_dataset()`
|
392
|
+
- Feature Store: update_feature_view() supports taking feature view object as argument.
|
393
|
+
|
394
|
+
### Behavior Changes
|
395
|
+
|
396
|
+
## 1.6.1 (2024-08-12)
|
377
397
|
|
378
398
|
### Bug Fixes
|
379
399
|
|
@@ -390,7 +410,6 @@ be compatibility issues. Server-side functionality that `snowflake-ml-python` de
|
|
390
410
|
### New Features
|
391
411
|
|
392
412
|
- Enable `set_params` to set the parameters of the underlying sklearn estimator, if the snowflake-ml model has been fit.
|
393
|
-
- Data: Add top-level exports for `DataConnector` and `DataSource` to `snowflake.ml.data`.
|
394
413
|
- Data: Add `snowflake.ml.data.ingestor_utils` module with utility functions helpful for `DataIngestor` implementations.
|
395
414
|
- Data: Add new `to_torch_dataset()` connector to `DataConnector` to replace deprecated DataPipe.
|
396
415
|
- Registry: Option to `enable_explainability` set to True by default for XGBoost, LightGBM and CatBoost as PuPr feature.
|