snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +19 -0
- snowflake/ml/_internal/env_utils.py +64 -21
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/relax_version_strategy.py +16 -0
- snowflake/ml/_internal/telemetry.py +21 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/feature_store.py +18 -0
- snowflake/ml/feature_store/feature_view.py +46 -1
- snowflake/ml/fileset/fileset.py +6 -0
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +57 -0
- snowflake/ml/jobs/_utils/payload_utils.py +438 -0
- snowflake/ml/jobs/_utils/spec_utils.py +296 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +71 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/ops/model_ops.py +11 -2
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_packager/model_env/model_env.py +45 -28
- snowflake/ml/model/_packager/model_handlers/_utils.py +19 -6
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +17 -0
- snowflake/ml/model/_packager/model_handlers/keras.py +230 -0
- snowflake/ml/model/_packager/model_handlers/pytorch.py +1 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +28 -3
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +74 -21
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +27 -49
- snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +3 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -1
- snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/core.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +11 -12
- snowflake/ml/model/_signatures/pandas_handler.py +11 -9
- snowflake/ml/model/_signatures/pytorch_handler.py +3 -6
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +25 -4
- snowflake/ml/model/type_hints.py +15 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +28 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -5
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
- snowflake/ml/registry/registry.py +34 -4
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/METADATA +81 -33
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/RECORD +208 -196
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/top_level.txt +0 -0
snowflake/ml/modeling/svm/svc.py
CHANGED
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class SVC(BaseTransformer):
|
61
64
|
r"""C-Support Vector Classification
|
62
65
|
For more details on this class, see [sklearn.svm.SVC]
|
@@ -511,7 +514,7 @@ class SVC(BaseTransformer):
|
|
511
514
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
512
515
|
expected_dtype = "array"
|
513
516
|
else:
|
514
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
517
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
515
518
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
516
519
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
517
520
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1174,7 +1177,7 @@ class SVC(BaseTransformer):
|
|
1174
1177
|
|
1175
1178
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1176
1179
|
|
1177
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1180
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1178
1181
|
outputs: List[BaseFeatureSpec] = []
|
1179
1182
|
if hasattr(self, "predict"):
|
1180
1183
|
# keep mypy happy
|
@@ -1182,7 +1185,7 @@ class SVC(BaseTransformer):
|
|
1182
1185
|
# For classifier, the type of predict is the same as the type of label
|
1183
1186
|
if self._sklearn_object._estimator_type == "classifier":
|
1184
1187
|
# label columns is the desired type for output
|
1185
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1188
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1186
1189
|
# rename the output columns
|
1187
1190
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1188
1191
|
self._model_signature_dict["predict"] = ModelSignature(
|
snowflake/ml/modeling/svm/svr.py
CHANGED
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.svm".replace("sklearn.",
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class SVR(BaseTransformer):
|
61
64
|
r"""Epsilon-Support Vector Regression
|
62
65
|
For more details on this class, see [sklearn.svm.SVR]
|
@@ -470,7 +473,7 @@ class SVR(BaseTransformer):
|
|
470
473
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
471
474
|
expected_dtype = "array"
|
472
475
|
else:
|
473
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
476
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
474
477
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
475
478
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
476
479
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1127,7 +1130,7 @@ class SVR(BaseTransformer):
|
|
1127
1130
|
|
1128
1131
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1129
1132
|
|
1130
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1133
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1131
1134
|
outputs: List[BaseFeatureSpec] = []
|
1132
1135
|
if hasattr(self, "predict"):
|
1133
1136
|
# keep mypy happy
|
@@ -1135,7 +1138,7 @@ class SVR(BaseTransformer):
|
|
1135
1138
|
# For classifier, the type of predict is the same as the type of label
|
1136
1139
|
if self._sklearn_object._estimator_type == "classifier":
|
1137
1140
|
# label columns is the desired type for output
|
1138
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1141
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1139
1142
|
# rename the output columns
|
1140
1143
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1141
1144
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class DecisionTreeClassifier(BaseTransformer):
|
61
64
|
r"""A decision tree classifier
|
62
65
|
For more details on this class, see [sklearn.tree.DecisionTreeClassifier]
|
@@ -554,7 +557,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
554
557
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
555
558
|
expected_dtype = "array"
|
556
559
|
else:
|
557
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
560
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
558
561
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
559
562
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
560
563
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1215,7 +1218,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1215
1218
|
|
1216
1219
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1217
1220
|
|
1218
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1221
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1219
1222
|
outputs: List[BaseFeatureSpec] = []
|
1220
1223
|
if hasattr(self, "predict"):
|
1221
1224
|
# keep mypy happy
|
@@ -1223,7 +1226,7 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
1223
1226
|
# For classifier, the type of predict is the same as the type of label
|
1224
1227
|
if self._sklearn_object._estimator_type == "classifier":
|
1225
1228
|
# label columns is the desired type for output
|
1226
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1229
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1227
1230
|
# rename the output columns
|
1228
1231
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1229
1232
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class DecisionTreeRegressor(BaseTransformer):
|
61
64
|
r"""A decision tree regressor
|
62
65
|
For more details on this class, see [sklearn.tree.DecisionTreeRegressor]
|
@@ -533,7 +536,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
533
536
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
534
537
|
expected_dtype = "array"
|
535
538
|
else:
|
536
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
539
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
537
540
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
538
541
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
539
542
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1190,7 +1193,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1190
1193
|
|
1191
1194
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1192
1195
|
|
1193
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1196
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1194
1197
|
outputs: List[BaseFeatureSpec] = []
|
1195
1198
|
if hasattr(self, "predict"):
|
1196
1199
|
# keep mypy happy
|
@@ -1198,7 +1201,7 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
1198
1201
|
# For classifier, the type of predict is the same as the type of label
|
1199
1202
|
if self._sklearn_object._estimator_type == "classifier":
|
1200
1203
|
# label columns is the desired type for output
|
1201
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1204
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1202
1205
|
# rename the output columns
|
1203
1206
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1204
1207
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class ExtraTreeClassifier(BaseTransformer):
|
61
64
|
r"""An extremely randomized tree classifier
|
62
65
|
For more details on this class, see [sklearn.tree.ExtraTreeClassifier]
|
@@ -546,7 +549,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
546
549
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
547
550
|
expected_dtype = "array"
|
548
551
|
else:
|
549
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
552
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
550
553
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
551
554
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
552
555
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1207,7 +1210,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1207
1210
|
|
1208
1211
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1209
1212
|
|
1210
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1213
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1211
1214
|
outputs: List[BaseFeatureSpec] = []
|
1212
1215
|
if hasattr(self, "predict"):
|
1213
1216
|
# keep mypy happy
|
@@ -1215,7 +1218,7 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
1215
1218
|
# For classifier, the type of predict is the same as the type of label
|
1216
1219
|
if self._sklearn_object._estimator_type == "classifier":
|
1217
1220
|
# label columns is the desired type for output
|
1218
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1221
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1219
1222
|
# rename the output columns
|
1220
1223
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1221
1224
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.tree".replace("sklearn."
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class ExtraTreeRegressor(BaseTransformer):
|
61
64
|
r"""An extremely randomized tree regressor
|
62
65
|
For more details on this class, see [sklearn.tree.ExtraTreeRegressor]
|
@@ -525,7 +528,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
525
528
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
526
529
|
expected_dtype = "array"
|
527
530
|
else:
|
528
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
531
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
529
532
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
530
533
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
531
534
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1182,7 +1185,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1182
1185
|
|
1183
1186
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1184
1187
|
|
1185
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1188
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1186
1189
|
outputs: List[BaseFeatureSpec] = []
|
1187
1190
|
if hasattr(self, "predict"):
|
1188
1191
|
# keep mypy happy
|
@@ -1190,7 +1193,7 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
1190
1193
|
# For classifier, the type of predict is the same as the type of label
|
1191
1194
|
if self._sklearn_object._estimator_type == "classifier":
|
1192
1195
|
# label columns is the desired type for output
|
1193
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1196
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1194
1197
|
# rename the output columns
|
1195
1198
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1196
1199
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class XGBClassifier(BaseTransformer):
|
61
64
|
r"""Implementation of the scikit-learn API for XGBoost classification
|
62
65
|
For more details on this class, see [xgboost.XGBClassifier]
|
@@ -701,7 +704,7 @@ class XGBClassifier(BaseTransformer):
|
|
701
704
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
702
705
|
expected_dtype = "array"
|
703
706
|
else:
|
704
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
707
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
705
708
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
706
709
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
707
710
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1362,7 +1365,7 @@ class XGBClassifier(BaseTransformer):
|
|
1362
1365
|
|
1363
1366
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1364
1367
|
|
1365
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1368
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1366
1369
|
outputs: List[BaseFeatureSpec] = []
|
1367
1370
|
if hasattr(self, "predict"):
|
1368
1371
|
# keep mypy happy
|
@@ -1370,7 +1373,7 @@ class XGBClassifier(BaseTransformer):
|
|
1370
1373
|
# For classifier, the type of predict is the same as the type of label
|
1371
1374
|
if self._sklearn_object._estimator_type == "classifier":
|
1372
1375
|
# label columns is the desired type for output
|
1373
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1376
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1374
1377
|
# rename the output columns
|
1375
1378
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1376
1379
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class XGBRegressor(BaseTransformer):
|
61
64
|
r"""Implementation of the scikit-learn API for XGBoost regression
|
62
65
|
For more details on this class, see [xgboost.XGBRegressor]
|
@@ -702,7 +705,7 @@ class XGBRegressor(BaseTransformer):
|
|
702
705
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
703
706
|
expected_dtype = "array"
|
704
707
|
else:
|
705
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
708
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
706
709
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
707
710
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
708
711
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1359,7 +1362,7 @@ class XGBRegressor(BaseTransformer):
|
|
1359
1362
|
|
1360
1363
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1361
1364
|
|
1362
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1365
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1363
1366
|
outputs: List[BaseFeatureSpec] = []
|
1364
1367
|
if hasattr(self, "predict"):
|
1365
1368
|
# keep mypy happy
|
@@ -1367,7 +1370,7 @@ class XGBRegressor(BaseTransformer):
|
|
1367
1370
|
# For classifier, the type of predict is the same as the type of label
|
1368
1371
|
if self._sklearn_object._estimator_type == "classifier":
|
1369
1372
|
# label columns is the desired type for output
|
1370
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1373
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1371
1374
|
# rename the output columns
|
1372
1375
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1373
1376
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class XGBRFClassifier(BaseTransformer):
|
61
64
|
r"""scikit-learn API for XGBoost random forest classification
|
62
65
|
For more details on this class, see [xgboost.XGBRFClassifier]
|
@@ -707,7 +710,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
707
710
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
708
711
|
expected_dtype = "array"
|
709
712
|
else:
|
710
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
713
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
711
714
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
712
715
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
713
716
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1368,7 +1371,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1368
1371
|
|
1369
1372
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1370
1373
|
|
1371
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1374
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1372
1375
|
outputs: List[BaseFeatureSpec] = []
|
1373
1376
|
if hasattr(self, "predict"):
|
1374
1377
|
# keep mypy happy
|
@@ -1376,7 +1379,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1376
1379
|
# For classifier, the type of predict is the same as the type of label
|
1377
1380
|
if self._sklearn_object._estimator_type == "classifier":
|
1378
1381
|
# label columns is the desired type for output
|
1379
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1382
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1380
1383
|
# rename the output columns
|
1381
1384
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1382
1385
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class XGBRFRegressor(BaseTransformer):
|
61
64
|
r"""scikit-learn API for XGBoost random forest regression
|
62
65
|
For more details on this class, see [xgboost.XGBRFRegressor]
|
@@ -707,7 +710,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
707
710
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
708
711
|
expected_dtype = "array"
|
709
712
|
else:
|
710
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
713
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
711
714
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
712
715
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
713
716
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1364,7 +1367,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
1364
1367
|
|
1365
1368
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1366
1369
|
|
1367
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1370
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1368
1371
|
outputs: List[BaseFeatureSpec] = []
|
1369
1372
|
if hasattr(self, "predict"):
|
1370
1373
|
# keep mypy happy
|
@@ -1372,7 +1375,7 @@ class XGBRFRegressor(BaseTransformer):
|
|
1372
1375
|
# For classifier, the type of predict is the same as the type of label
|
1373
1376
|
if self._sklearn_object._estimator_type == "classifier":
|
1374
1377
|
# label columns is the desired type for output
|
1375
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1378
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1376
1379
|
# rename the output columns
|
1377
1380
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1378
1381
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -78,7 +78,7 @@ class Registry:
|
|
78
78
|
session, database_name=self._database_name, schema_name=self._schema_name
|
79
79
|
)
|
80
80
|
|
81
|
-
self.enable_monitoring = options.get("enable_monitoring",
|
81
|
+
self.enable_monitoring = options.get("enable_monitoring", True) if options else True
|
82
82
|
if self.enable_monitoring:
|
83
83
|
monitor_statement_params = telemetry.get_statement_params(
|
84
84
|
project=telemetry.TelemetryProject.MLOPS.value,
|
@@ -162,8 +162,12 @@ class Registry:
|
|
162
162
|
- relax_version: Whether to relax the version constraints of the dependencies when running in the
|
163
163
|
Warehouse. It detects any ==x.y.z in specifiers and replaced with >=x.y, <(x+1). Defaults to True.
|
164
164
|
- function_type: Set the method function type globally. To set method function types individually see
|
165
|
-
|
166
|
-
- method_options: Per-method saving options
|
165
|
+
function_type in model_options.
|
166
|
+
- method_options: Per-method saving options. This dictionary has method names as keys and dictionary
|
167
|
+
values with the desired options.
|
168
|
+
|
169
|
+
The following are the available method options:
|
170
|
+
|
167
171
|
- case_sensitive: Indicates whether the method and its signature should be case sensitive.
|
168
172
|
This means when you refer the method in the SQL, you need to double quote it.
|
169
173
|
This will be helpful if you need case to tell apart your methods or features, or you have
|
@@ -283,7 +287,11 @@ class Registry:
|
|
283
287
|
Warehouse. It detects any ==x.y.z in specifiers and replaced with >=x.y, <(x+1). Defaults to True.
|
284
288
|
- function_type: Set the method function type globally. To set method function types individually see
|
285
289
|
function_type in model_options.
|
286
|
-
- method_options: Per-method saving options
|
290
|
+
- method_options: Per-method saving options. This dictionary has method names as keys and dictionary
|
291
|
+
values with the desired options. See the example below.
|
292
|
+
|
293
|
+
The following are the available method options:
|
294
|
+
|
287
295
|
- case_sensitive: Indicates whether the method and its signature should be case sensitive.
|
288
296
|
This means when you refer the method in the SQL, you need to double quote it.
|
289
297
|
This will be helpful if you need case to tell apart your methods or features, or you have
|
@@ -294,6 +302,28 @@ class Registry:
|
|
294
302
|
|
295
303
|
Returns:
|
296
304
|
ModelVersion: ModelVersion object corresponding to the model just logged.
|
305
|
+
|
306
|
+
Example::
|
307
|
+
|
308
|
+
from snowflake.ml.registry import Registry
|
309
|
+
|
310
|
+
# create a session
|
311
|
+
session = ...
|
312
|
+
|
313
|
+
registry = Registry(session=session)
|
314
|
+
|
315
|
+
# Define `method_options` for each inference method if needed.
|
316
|
+
method_options={
|
317
|
+
"predict": {
|
318
|
+
"case_sensitive": True
|
319
|
+
}
|
320
|
+
}
|
321
|
+
|
322
|
+
registry.log_model(
|
323
|
+
model=model,
|
324
|
+
model_name="my_model",
|
325
|
+
method_options=method_options,
|
326
|
+
)
|
297
327
|
"""
|
298
328
|
statement_params = telemetry.get_statement_params(
|
299
329
|
project=_TELEMETRY_PROJECT,
|
snowflake/ml/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION="1.7.
|
1
|
+
VERSION="1.7.5"
|