snowflake-ml-python 1.7.3__py3-none-any.whl → 1.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +19 -0
- snowflake/ml/_internal/env_utils.py +64 -21
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/relax_version_strategy.py +16 -0
- snowflake/ml/_internal/telemetry.py +21 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +1 -1
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/feature_store.py +18 -0
- snowflake/ml/feature_store/feature_view.py +46 -1
- snowflake/ml/fileset/fileset.py +6 -0
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +57 -0
- snowflake/ml/jobs/_utils/payload_utils.py +438 -0
- snowflake/ml/jobs/_utils/spec_utils.py +296 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +71 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/ops/model_ops.py +11 -2
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_packager/model_env/model_env.py +45 -28
- snowflake/ml/model/_packager/model_handlers/_utils.py +19 -6
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +17 -0
- snowflake/ml/model/_packager/model_handlers/keras.py +230 -0
- snowflake/ml/model/_packager/model_handlers/pytorch.py +1 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +28 -3
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +74 -21
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +27 -49
- snowflake/ml/model/_packager/model_handlers_migrator/tensorflow_migrator_2023_12_01.py +48 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +3 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -1
- snowflake/ml/model/_packager/model_task/model_task_utils.py +5 -1
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/core.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +11 -12
- snowflake/ml/model/_signatures/pandas_handler.py +11 -9
- snowflake/ml/model/_signatures/pytorch_handler.py +3 -6
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +25 -4
- snowflake/ml/model/type_hints.py +15 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +14 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +28 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -5
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +6 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +6 -3
- snowflake/ml/registry/registry.py +34 -4
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/METADATA +81 -33
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/RECORD +208 -196
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.3.dist-info → snowflake_ml_python-1.7.5.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.decomposition".replace("
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class TruncatedSVD(BaseTransformer):
|
61
64
|
r"""Dimensionality reduction using truncated SVD (aka LSA)
|
62
65
|
For more details on this class, see [sklearn.decomposition.TruncatedSVD]
|
@@ -447,7 +450,7 @@ class TruncatedSVD(BaseTransformer):
|
|
447
450
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
448
451
|
expected_dtype = "array"
|
449
452
|
else:
|
450
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
453
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
451
454
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
452
455
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
453
456
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1104,7 +1107,7 @@ class TruncatedSVD(BaseTransformer):
|
|
1104
1107
|
|
1105
1108
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1106
1109
|
|
1107
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1110
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1108
1111
|
outputs: List[BaseFeatureSpec] = []
|
1109
1112
|
if hasattr(self, "predict"):
|
1110
1113
|
# keep mypy happy
|
@@ -1112,7 +1115,7 @@ class TruncatedSVD(BaseTransformer):
|
|
1112
1115
|
# For classifier, the type of predict is the same as the type of label
|
1113
1116
|
if self._sklearn_object._estimator_type == "classifier":
|
1114
1117
|
# label columns is the desired type for output
|
1115
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1118
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1116
1119
|
# rename the output columns
|
1117
1120
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1118
1121
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.discriminant_analysis".r
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class LinearDiscriminantAnalysis(BaseTransformer):
|
61
64
|
r"""Linear Discriminant Analysis
|
62
65
|
For more details on this class, see [sklearn.discriminant_analysis.LinearDiscriminantAnalysis]
|
@@ -470,7 +473,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
470
473
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
471
474
|
expected_dtype = "array"
|
472
475
|
else:
|
473
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
476
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
474
477
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
475
478
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
476
479
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1135,7 +1138,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1135
1138
|
|
1136
1139
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1137
1140
|
|
1138
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1141
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1139
1142
|
outputs: List[BaseFeatureSpec] = []
|
1140
1143
|
if hasattr(self, "predict"):
|
1141
1144
|
# keep mypy happy
|
@@ -1143,7 +1146,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
1143
1146
|
# For classifier, the type of predict is the same as the type of label
|
1144
1147
|
if self._sklearn_object._estimator_type == "classifier":
|
1145
1148
|
# label columns is the desired type for output
|
1146
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1149
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1147
1150
|
# rename the output columns
|
1148
1151
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1149
1152
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.discriminant_analysis".r
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class QuadraticDiscriminantAnalysis(BaseTransformer):
|
61
64
|
r"""Quadratic Discriminant Analysis
|
62
65
|
For more details on this class, see [sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis]
|
@@ -424,7 +427,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
424
427
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
425
428
|
expected_dtype = "array"
|
426
429
|
else:
|
427
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
430
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
428
431
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
429
432
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
430
433
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1087,7 +1090,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1087
1090
|
|
1088
1091
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1089
1092
|
|
1090
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1093
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1091
1094
|
outputs: List[BaseFeatureSpec] = []
|
1092
1095
|
if hasattr(self, "predict"):
|
1093
1096
|
# keep mypy happy
|
@@ -1095,7 +1098,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
1095
1098
|
# For classifier, the type of predict is the same as the type of label
|
1096
1099
|
if self._sklearn_object._estimator_type == "classifier":
|
1097
1100
|
# label columns is the desired type for output
|
1098
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1101
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1099
1102
|
# rename the output columns
|
1100
1103
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1101
1104
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class AdaBoostClassifier(BaseTransformer):
|
61
64
|
r"""An AdaBoost classifier
|
62
65
|
For more details on this class, see [sklearn.ensemble.AdaBoostClassifier]
|
@@ -438,7 +441,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
438
441
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
439
442
|
expected_dtype = "array"
|
440
443
|
else:
|
441
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
444
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
442
445
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
443
446
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
444
447
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1101,7 +1104,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1101
1104
|
|
1102
1105
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1103
1106
|
|
1104
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1107
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1105
1108
|
outputs: List[BaseFeatureSpec] = []
|
1106
1109
|
if hasattr(self, "predict"):
|
1107
1110
|
# keep mypy happy
|
@@ -1109,7 +1112,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
1109
1112
|
# For classifier, the type of predict is the same as the type of label
|
1110
1113
|
if self._sklearn_object._estimator_type == "classifier":
|
1111
1114
|
# label columns is the desired type for output
|
1112
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1115
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1113
1116
|
# rename the output columns
|
1114
1117
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1115
1118
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class AdaBoostRegressor(BaseTransformer):
|
61
64
|
r"""An AdaBoost regressor
|
62
65
|
For more details on this class, see [sklearn.ensemble.AdaBoostRegressor]
|
@@ -436,7 +439,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
436
439
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
437
440
|
expected_dtype = "array"
|
438
441
|
else:
|
439
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
442
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
440
443
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
441
444
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
442
445
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1093,7 +1096,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1093
1096
|
|
1094
1097
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1095
1098
|
|
1096
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1099
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1097
1100
|
outputs: List[BaseFeatureSpec] = []
|
1098
1101
|
if hasattr(self, "predict"):
|
1099
1102
|
# keep mypy happy
|
@@ -1101,7 +1104,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
1101
1104
|
# For classifier, the type of predict is the same as the type of label
|
1102
1105
|
if self._sklearn_object._estimator_type == "classifier":
|
1103
1106
|
# label columns is the desired type for output
|
1104
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1107
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1105
1108
|
# rename the output columns
|
1106
1109
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1107
1110
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class BaggingClassifier(BaseTransformer):
|
61
64
|
r"""A Bagging classifier
|
62
65
|
For more details on this class, see [sklearn.ensemble.BaggingClassifier]
|
@@ -474,7 +477,7 @@ class BaggingClassifier(BaseTransformer):
|
|
474
477
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
475
478
|
expected_dtype = "array"
|
476
479
|
else:
|
477
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
480
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
478
481
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
479
482
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
480
483
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1137,7 +1140,7 @@ class BaggingClassifier(BaseTransformer):
|
|
1137
1140
|
|
1138
1141
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1139
1142
|
|
1140
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1143
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1141
1144
|
outputs: List[BaseFeatureSpec] = []
|
1142
1145
|
if hasattr(self, "predict"):
|
1143
1146
|
# keep mypy happy
|
@@ -1145,7 +1148,7 @@ class BaggingClassifier(BaseTransformer):
|
|
1145
1148
|
# For classifier, the type of predict is the same as the type of label
|
1146
1149
|
if self._sklearn_object._estimator_type == "classifier":
|
1147
1150
|
# label columns is the desired type for output
|
1148
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1151
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1149
1152
|
# rename the output columns
|
1150
1153
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1151
1154
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class BaggingRegressor(BaseTransformer):
|
61
64
|
r"""A Bagging regressor
|
62
65
|
For more details on this class, see [sklearn.ensemble.BaggingRegressor]
|
@@ -474,7 +477,7 @@ class BaggingRegressor(BaseTransformer):
|
|
474
477
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
475
478
|
expected_dtype = "array"
|
476
479
|
else:
|
477
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
480
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
478
481
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
479
482
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
480
483
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1131,7 +1134,7 @@ class BaggingRegressor(BaseTransformer):
|
|
1131
1134
|
|
1132
1135
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1133
1136
|
|
1134
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1137
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1135
1138
|
outputs: List[BaseFeatureSpec] = []
|
1136
1139
|
if hasattr(self, "predict"):
|
1137
1140
|
# keep mypy happy
|
@@ -1139,7 +1142,7 @@ class BaggingRegressor(BaseTransformer):
|
|
1139
1142
|
# For classifier, the type of predict is the same as the type of label
|
1140
1143
|
if self._sklearn_object._estimator_type == "classifier":
|
1141
1144
|
# label columns is the desired type for output
|
1142
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1145
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1143
1146
|
# rename the output columns
|
1144
1147
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1145
1148
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class ExtraTreesClassifier(BaseTransformer):
|
61
64
|
r"""An extra-trees classifier
|
62
65
|
For more details on this class, see [sklearn.ensemble.ExtraTreesClassifier]
|
@@ -603,7 +606,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
603
606
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
604
607
|
expected_dtype = "array"
|
605
608
|
else:
|
606
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
609
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
607
610
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
608
611
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
609
612
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1264,7 +1267,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1264
1267
|
|
1265
1268
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1266
1269
|
|
1267
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1270
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1268
1271
|
outputs: List[BaseFeatureSpec] = []
|
1269
1272
|
if hasattr(self, "predict"):
|
1270
1273
|
# keep mypy happy
|
@@ -1272,7 +1275,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
1272
1275
|
# For classifier, the type of predict is the same as the type of label
|
1273
1276
|
if self._sklearn_object._estimator_type == "classifier":
|
1274
1277
|
# label columns is the desired type for output
|
1275
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1278
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1276
1279
|
# rename the output columns
|
1277
1280
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1278
1281
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class ExtraTreesRegressor(BaseTransformer):
|
61
64
|
r"""An extra-trees regressor
|
62
65
|
For more details on this class, see [sklearn.ensemble.ExtraTreesRegressor]
|
@@ -579,7 +582,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
579
582
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
580
583
|
expected_dtype = "array"
|
581
584
|
else:
|
582
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
585
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
583
586
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
584
587
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
585
588
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1236,7 +1239,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1236
1239
|
|
1237
1240
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1238
1241
|
|
1239
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1242
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1240
1243
|
outputs: List[BaseFeatureSpec] = []
|
1241
1244
|
if hasattr(self, "predict"):
|
1242
1245
|
# keep mypy happy
|
@@ -1244,7 +1247,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
1244
1247
|
# For classifier, the type of predict is the same as the type of label
|
1245
1248
|
if self._sklearn_object._estimator_type == "classifier":
|
1246
1249
|
# label columns is the desired type for output
|
1247
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1250
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1248
1251
|
# rename the output columns
|
1249
1252
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1250
1253
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class GradientBoostingClassifier(BaseTransformer):
|
61
64
|
r"""Gradient Boosting for classification
|
62
65
|
For more details on this class, see [sklearn.ensemble.GradientBoostingClassifier]
|
@@ -598,7 +601,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
598
601
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
599
602
|
expected_dtype = "array"
|
600
603
|
else:
|
601
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
604
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
602
605
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
603
606
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
604
607
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1261,7 +1264,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1261
1264
|
|
1262
1265
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1263
1266
|
|
1264
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1267
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1265
1268
|
outputs: List[BaseFeatureSpec] = []
|
1266
1269
|
if hasattr(self, "predict"):
|
1267
1270
|
# keep mypy happy
|
@@ -1269,7 +1272,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
1269
1272
|
# For classifier, the type of predict is the same as the type of label
|
1270
1273
|
if self._sklearn_object._estimator_type == "classifier":
|
1271
1274
|
# label columns is the desired type for output
|
1272
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1275
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1273
1276
|
# rename the output columns
|
1274
1277
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1275
1278
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class GradientBoostingRegressor(BaseTransformer):
|
61
64
|
r"""Gradient Boosting for regression
|
62
65
|
For more details on this class, see [sklearn.ensemble.GradientBoostingRegressor]
|
@@ -607,7 +610,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
607
610
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
608
611
|
expected_dtype = "array"
|
609
612
|
else:
|
610
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
613
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
611
614
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
612
615
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
613
616
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1264,7 +1267,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1264
1267
|
|
1265
1268
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1266
1269
|
|
1267
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1270
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1268
1271
|
outputs: List[BaseFeatureSpec] = []
|
1269
1272
|
if hasattr(self, "predict"):
|
1270
1273
|
# keep mypy happy
|
@@ -1272,7 +1275,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1272
1275
|
# For classifier, the type of predict is the same as the type of label
|
1273
1276
|
if self._sklearn_object._estimator_type == "classifier":
|
1274
1277
|
# label columns is the desired type for output
|
1275
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1278
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1276
1279
|
# rename the output columns
|
1277
1280
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1278
1281
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class HistGradientBoostingClassifier(BaseTransformer):
|
61
64
|
r"""Histogram-based Gradient Boosting Classification Tree
|
62
65
|
For more details on this class, see [sklearn.ensemble.HistGradientBoostingClassifier]
|
@@ -590,7 +593,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
590
593
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
591
594
|
expected_dtype = "array"
|
592
595
|
else:
|
593
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
596
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
594
597
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
595
598
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
596
599
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1253,7 +1256,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1253
1256
|
|
1254
1257
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1255
1258
|
|
1256
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1259
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1257
1260
|
outputs: List[BaseFeatureSpec] = []
|
1258
1261
|
if hasattr(self, "predict"):
|
1259
1262
|
# keep mypy happy
|
@@ -1261,7 +1264,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
1261
1264
|
# For classifier, the type of predict is the same as the type of label
|
1262
1265
|
if self._sklearn_object._estimator_type == "classifier":
|
1263
1266
|
# label columns is the desired type for output
|
1264
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1267
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1265
1268
|
# rename the output columns
|
1266
1269
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1267
1270
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class HistGradientBoostingRegressor(BaseTransformer):
|
61
64
|
r"""Histogram-based Gradient Boosting Regression Tree
|
62
65
|
For more details on this class, see [sklearn.ensemble.HistGradientBoostingRegressor]
|
@@ -578,7 +581,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
578
581
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
579
582
|
expected_dtype = "array"
|
580
583
|
else:
|
581
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
584
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
582
585
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
583
586
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
584
587
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1235,7 +1238,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1235
1238
|
|
1236
1239
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1237
1240
|
|
1238
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1241
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1239
1242
|
outputs: List[BaseFeatureSpec] = []
|
1240
1243
|
if hasattr(self, "predict"):
|
1241
1244
|
# keep mypy happy
|
@@ -1243,7 +1246,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
1243
1246
|
# For classifier, the type of predict is the same as the type of label
|
1244
1247
|
if self._sklearn_object._estimator_type == "classifier":
|
1245
1248
|
# label columns is the desired type for output
|
1246
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1249
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1247
1250
|
# rename the output columns
|
1248
1251
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1249
1252
|
self._model_signature_dict["predict"] = ModelSignature(
|
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class IsolationForest(BaseTransformer):
|
61
64
|
r"""Isolation Forest Algorithm
|
62
65
|
For more details on this class, see [sklearn.ensemble.IsolationForest]
|
@@ -470,7 +473,7 @@ class IsolationForest(BaseTransformer):
|
|
470
473
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
471
474
|
expected_dtype = "array"
|
472
475
|
else:
|
473
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
476
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
474
477
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
475
478
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
476
479
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1131,7 +1134,7 @@ class IsolationForest(BaseTransformer):
|
|
1131
1134
|
|
1132
1135
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1133
1136
|
|
1134
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1137
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1135
1138
|
outputs: List[BaseFeatureSpec] = []
|
1136
1139
|
if hasattr(self, "predict"):
|
1137
1140
|
# keep mypy happy
|
@@ -1139,7 +1142,7 @@ class IsolationForest(BaseTransformer):
|
|
1139
1142
|
# For classifier, the type of predict is the same as the type of label
|
1140
1143
|
if self._sklearn_object._estimator_type == "classifier":
|
1141
1144
|
# label columns is the desired type for output
|
1142
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1145
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1143
1146
|
# rename the output columns
|
1144
1147
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1145
1148
|
self._model_signature_dict["predict"] = ModelSignature(
|